forge/backend/app/services/alt_text_generator.py
DJP 7a804e896d Initial commit - FORGE AI unified platform
Features:
- Image generation (OpenAI, Gemini, Leonardo, Bria, Stability, Flux)
- Nano Banana iterative editing
- Video generation and upscaling
- Audio TTS, STT, sound effects (ElevenLabs)
- Text prompt studio and alt text
- User authentication with JWT/cookies
- Admin panel with voice management
- Job queue with Celery
- PostgreSQL + Redis backend
- Next.js 15 + FastAPI architecture

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2025-12-09 20:39:00 -05:00

126 lines
4.2 KiB
Python

"""Alt Text Generator Service - OpenAI GPT-4 Vision"""
import httpx
import base64
import os
from datetime import datetime
from app.database import SessionLocal
from app.models.job import Job
from app.models.asset import Asset
from app.config import settings
async def generate(job_id: str):
"""Generate alt text for image using GPT-4 Vision"""
db = SessionLocal()
try:
job = db.query(Job).filter(Job.id == job_id).first()
if not job:
return
input_asset_ids = job.input_asset_ids
if not input_asset_ids:
raise ValueError("No input asset provided")
input_asset = db.query(Asset).filter(Asset.id == input_asset_ids[0]).first()
if not input_asset:
raise ValueError("Input asset not found")
job.progress = 10
job.api_provider = "openai"
job.api_model = "gpt-4o"
db.commit()
# Read and encode image
with open(input_asset.file_path, "rb") as f:
image_data = base64.b64encode(f.read()).decode("utf-8")
job.progress = 20
db.commit()
# Call GPT-4 Vision
async with httpx.AsyncClient(timeout=60) as client:
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {settings.openai_api_key}",
"Content-Type": "application/json"
},
json={
"model": "gpt-4o",
"messages": [
{
"role": "system",
"content": """You are an expert at writing accessible alt text for images.
Your alt text should:
- Be concise and descriptive
- Focus on the most important elements
- Avoid starting with "image of" or "picture of"
- Include any text visible in the image
- Be factual and non-subjective
Provide two versions:
1. Short version: 150 characters or less
2. Long version: 400 characters or less"""
},
{
"role": "user",
"content": [
{
"type": "text",
"text": "Please analyze this image and provide alt text descriptions in the following format exactly:\n\nShort version: [brief description]\n\nLong version: [detailed description]"
},
{
"type": "image_url",
"image_url": {
"url": f"data:{input_asset.mime_type};base64,{image_data}"
}
}
]
}
],
"max_tokens": 500
}
)
response.raise_for_status()
result = response.json()
job.progress = 80
db.commit()
# Parse response
content = result.get("choices", [{}])[0].get("message", {}).get("content", "")
# Extract short and long versions
short_alt = ""
long_alt = ""
lines = content.split("\n")
for i, line in enumerate(lines):
if line.lower().startswith("short version:"):
short_alt = line.replace("Short version:", "").replace("short version:", "").strip()
elif line.lower().startswith("long version:"):
long_alt = line.replace("Long version:", "").replace("long version:", "").strip()
# If parsing failed, use full content
if not short_alt and not long_alt:
short_alt = content[:150]
long_alt = content[:400]
job.output_data = {
"short_alt_text": short_alt,
"long_alt_text": long_alt,
"raw_response": content
}
job.progress = 100
job.status = "completed"
job.completed_at = datetime.utcnow()
db.commit()
except Exception as e:
job.status = "failed"
job.error_message = str(e)
db.commit()
finally:
db.close()