Features: - Image generation (OpenAI, Gemini, Leonardo, Bria, Stability, Flux) - Nano Banana iterative editing - Video generation and upscaling - Audio TTS, STT, sound effects (ElevenLabs) - Text prompt studio and alt text - User authentication with JWT/cookies - Admin panel with voice management - Job queue with Celery - PostgreSQL + Redis backend - Next.js 15 + FastAPI architecture 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
126 lines
4.2 KiB
Python
126 lines
4.2 KiB
Python
"""Alt Text Generator Service - OpenAI GPT-4 Vision"""
|
|
import httpx
|
|
import base64
|
|
import os
|
|
from datetime import datetime
|
|
|
|
from app.database import SessionLocal
|
|
from app.models.job import Job
|
|
from app.models.asset import Asset
|
|
from app.config import settings
|
|
|
|
|
|
async def generate(job_id: str):
|
|
"""Generate alt text for image using GPT-4 Vision"""
|
|
db = SessionLocal()
|
|
try:
|
|
job = db.query(Job).filter(Job.id == job_id).first()
|
|
if not job:
|
|
return
|
|
|
|
input_asset_ids = job.input_asset_ids
|
|
|
|
if not input_asset_ids:
|
|
raise ValueError("No input asset provided")
|
|
|
|
input_asset = db.query(Asset).filter(Asset.id == input_asset_ids[0]).first()
|
|
if not input_asset:
|
|
raise ValueError("Input asset not found")
|
|
|
|
job.progress = 10
|
|
job.api_provider = "openai"
|
|
job.api_model = "gpt-4o"
|
|
db.commit()
|
|
|
|
# Read and encode image
|
|
with open(input_asset.file_path, "rb") as f:
|
|
image_data = base64.b64encode(f.read()).decode("utf-8")
|
|
|
|
job.progress = 20
|
|
db.commit()
|
|
|
|
# Call GPT-4 Vision
|
|
async with httpx.AsyncClient(timeout=60) as client:
|
|
response = await client.post(
|
|
"https://api.openai.com/v1/chat/completions",
|
|
headers={
|
|
"Authorization": f"Bearer {settings.openai_api_key}",
|
|
"Content-Type": "application/json"
|
|
},
|
|
json={
|
|
"model": "gpt-4o",
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": """You are an expert at writing accessible alt text for images.
|
|
Your alt text should:
|
|
- Be concise and descriptive
|
|
- Focus on the most important elements
|
|
- Avoid starting with "image of" or "picture of"
|
|
- Include any text visible in the image
|
|
- Be factual and non-subjective
|
|
|
|
Provide two versions:
|
|
1. Short version: 150 characters or less
|
|
2. Long version: 400 characters or less"""
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "text",
|
|
"text": "Please analyze this image and provide alt text descriptions in the following format exactly:\n\nShort version: [brief description]\n\nLong version: [detailed description]"
|
|
},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:{input_asset.mime_type};base64,{image_data}"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"max_tokens": 500
|
|
}
|
|
)
|
|
response.raise_for_status()
|
|
result = response.json()
|
|
|
|
job.progress = 80
|
|
db.commit()
|
|
|
|
# Parse response
|
|
content = result.get("choices", [{}])[0].get("message", {}).get("content", "")
|
|
|
|
# Extract short and long versions
|
|
short_alt = ""
|
|
long_alt = ""
|
|
|
|
lines = content.split("\n")
|
|
for i, line in enumerate(lines):
|
|
if line.lower().startswith("short version:"):
|
|
short_alt = line.replace("Short version:", "").replace("short version:", "").strip()
|
|
elif line.lower().startswith("long version:"):
|
|
long_alt = line.replace("Long version:", "").replace("long version:", "").strip()
|
|
|
|
# If parsing failed, use full content
|
|
if not short_alt and not long_alt:
|
|
short_alt = content[:150]
|
|
long_alt = content[:400]
|
|
|
|
job.output_data = {
|
|
"short_alt_text": short_alt,
|
|
"long_alt_text": long_alt,
|
|
"raw_response": content
|
|
}
|
|
job.progress = 100
|
|
job.status = "completed"
|
|
job.completed_at = datetime.utcnow()
|
|
db.commit()
|
|
|
|
except Exception as e:
|
|
job.status = "failed"
|
|
job.error_message = str(e)
|
|
db.commit()
|
|
finally:
|
|
db.close()
|