"""Alt Text Generator Service - OpenAI GPT-4 Vision""" import httpx import base64 import os from datetime import datetime from app.database import SessionLocal from app.models.job import Job from app.models.asset import Asset from app.config import settings async def generate(job_id: str): """Generate alt text for image using GPT-4 Vision""" db = SessionLocal() try: job = db.query(Job).filter(Job.id == job_id).first() if not job: return input_asset_ids = job.input_asset_ids if not input_asset_ids: raise ValueError("No input asset provided") input_asset = db.query(Asset).filter(Asset.id == input_asset_ids[0]).first() if not input_asset: raise ValueError("Input asset not found") job.progress = 10 job.api_provider = "openai" job.api_model = "gpt-4o" db.commit() # Read and encode image with open(input_asset.file_path, "rb") as f: image_data = base64.b64encode(f.read()).decode("utf-8") job.progress = 20 db.commit() # Call GPT-4 Vision async with httpx.AsyncClient(timeout=60) as client: response = await client.post( "https://api.openai.com/v1/chat/completions", headers={ "Authorization": f"Bearer {settings.openai_api_key}", "Content-Type": "application/json" }, json={ "model": "gpt-4o", "messages": [ { "role": "system", "content": """You are an expert at writing accessible alt text for images. Your alt text should: - Be concise and descriptive - Focus on the most important elements - Avoid starting with "image of" or "picture of" - Include any text visible in the image - Be factual and non-subjective Provide two versions: 1. Short version: 150 characters or less 2. Long version: 400 characters or less""" }, { "role": "user", "content": [ { "type": "text", "text": "Please analyze this image and provide alt text descriptions in the following format exactly:\n\nShort version: [brief description]\n\nLong version: [detailed description]" }, { "type": "image_url", "image_url": { "url": f"data:{input_asset.mime_type};base64,{image_data}" } } ] } ], "max_tokens": 500 } ) response.raise_for_status() result = response.json() job.progress = 80 db.commit() # Parse response content = result.get("choices", [{}])[0].get("message", {}).get("content", "") # Extract short and long versions short_alt = "" long_alt = "" lines = content.split("\n") for i, line in enumerate(lines): if line.lower().startswith("short version:"): short_alt = line.replace("Short version:", "").replace("short version:", "").strip() elif line.lower().startswith("long version:"): long_alt = line.replace("Long version:", "").replace("long version:", "").strip() # If parsing failed, use full content if not short_alt and not long_alt: short_alt = content[:150] long_alt = content[:400] job.output_data = { "short_alt_text": short_alt, "long_alt_text": long_alt, "raw_response": content } job.progress = 100 job.status = "completed" job.completed_at = datetime.utcnow() db.commit() except Exception as e: job.status = "failed" job.error_message = str(e) db.commit() finally: db.close()