forge/backend/app/services/alt_text_generator.py

"""Alt Text Generator Service - OpenAI GPT-4 Vision"""
import httpx
import base64
import os
from datetime import datetime

from app.database import SessionLocal
from app.models.job import Job
from app.models.asset import Asset
from app.config import settings


async def generate(job_id: str):
    """Generate alt text for image using GPT-4 Vision"""
    db = SessionLocal()
    try:
        job = db.query(Job).filter(Job.id == job_id).first()
        if not job:
            return

        input_asset_ids = job.input_asset_ids

        if not input_asset_ids:
            raise ValueError("No input asset provided")

        input_asset = db.query(Asset).filter(Asset.id == input_asset_ids[0]).first()
        if not input_asset:
            raise ValueError("Input asset not found")

        job.progress = 10
        job.api_provider = "openai"
        job.api_model = "gpt-4o"
        db.commit()

        # Read and encode image
        with open(input_asset.file_path, "rb") as f:
            image_data = base64.b64encode(f.read()).decode("utf-8")

        job.progress = 20
        db.commit()

        # Call GPT-4 Vision
        async with httpx.AsyncClient(timeout=60) as client:
            response = await client.post(
                "https://api.openai.com/v1/chat/completions",
                headers={
                    "Authorization": f"Bearer {settings.openai_api_key}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": "gpt-4o",
                    "messages": [
                        {
                            "role": "system",
                            "content": """You are an expert at writing accessible alt text for images.
Your alt text should:
- Be concise and descriptive
- Focus on the most important elements
- Avoid starting with "image of" or "picture of"
- Include any text visible in the image
- Be factual and non-subjective

Provide two versions:
1. Short version: 150 characters or less
2. Long version: 400 characters or less"""
                        },
                        {
                            "role": "user",
                            "content": [
                                {
                                    "type": "text",
                                    "text": "Please analyze this image and provide alt text descriptions in the following format exactly:\n\nShort version: [brief description]\n\nLong version: [detailed description]"
                                },
                                {
                                    "type": "image_url",
                                    "image_url": {
                                        "url": f"data:{input_asset.mime_type};base64,{image_data}"
                                    }
                                }
                            ]
                        }
                    ],
                    "max_tokens": 500
                }
            )
            response.raise_for_status()
            result = response.json()

        job.progress = 80
        db.commit()

        # Parse response
        content = result.get("choices", [{}])[0].get("message", {}).get("content", "")

        # Extract short and long versions
        short_alt = ""
        long_alt = ""

        lines = content.split("\n")
        for i, line in enumerate(lines):
            if line.lower().startswith("short version:"):
                short_alt = line.replace("Short version:", "").replace("short version:", "").strip()
            elif line.lower().startswith("long version:"):
                long_alt = line.replace("Long version:", "").replace("long version:", "").strip()

        # If parsing failed, use full content
        if not short_alt and not long_alt:
            short_alt = content[:150]
            long_alt = content[:400]

        job.output_data = {
            "short_alt_text": short_alt,
            "long_alt_text": long_alt,
            "raw_response": content
        }
        job.progress = 100
        job.status = "completed"
        job.completed_at = datetime.utcnow()
        db.commit()

    except Exception as e:
        job.status = "failed"
        job.error_message = str(e)
        db.commit()
    finally:
        db.close()