Features: - Image generation (OpenAI, Gemini, Leonardo, Bria, Stability, Flux) - Nano Banana iterative editing - Video generation and upscaling - Audio TTS, STT, sound effects (ElevenLabs) - Text prompt studio and alt text - User authentication with JWT/cookies - Admin panel with voice management - Job queue with Celery - PostgreSQL + Redis backend - Next.js 15 + FastAPI architecture 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
890 lines
33 KiB
Python
890 lines
33 KiB
Python
"""Image Generator Service - Multiple AI Providers
|
|
|
|
Supported Providers:
|
|
- openai: GPT-Image-1 (latest) or DALL-E 3
|
|
- imagen: Google Imagen 4 (Standard, Ultra, Fast)
|
|
- nano-banana: Gemini 2.5 Flash Image / Nano Banana Pro
|
|
- stable-diffusion: Stability AI SDXL, SD3, image-to-image
|
|
- leonardo: Leonardo.ai models
|
|
- ideogram: Ideogram v2 with text rendering
|
|
- flux: Black Forest Labs Flux Pro
|
|
|
|
OpenAI GPT-Image-1 (April 2025):
|
|
- model: 'gpt-image-1' (default) or 'dall-e-3'
|
|
- quality: 'low', 'medium', 'high' (default high)
|
|
- size: 1024x1024, 1024x1536, 1536x1024
|
|
- background: 'transparent', 'opaque', 'auto' (for PNG/WebP)
|
|
- output_format: 'png', 'jpeg', 'webp'
|
|
- n: 1-10 images per request
|
|
- Pricing: ~$0.02 (low), $0.07 (medium), $0.19 (high) per image
|
|
|
|
Google Imagen 4 (December 2025):
|
|
- model: 'imagen-4.0-generate-001' (default), 'imagen-4.0-ultra-generate-001', 'imagen-4.0-fast-generate-001'
|
|
- image_size: '1K', '2K' (Ultra/Standard only)
|
|
- aspect_ratio: '1:1', '3:4', '4:3', '9:16', '16:9'
|
|
- number_of_images: 1-4
|
|
- enhance_prompt: true/false (LLM prompt enhancement)
|
|
- person_generation: 'dont_allow', 'allow_adult', 'allow_all'
|
|
- Pricing: $0.02 (Fast), $0.04 (Standard), $0.06 (Ultra) per image
|
|
|
|
Nano Banana / Gemini Image (December 2025):
|
|
- model: 'gemini-2.5-flash-image' (Nano Banana), 'gemini-3-pro-image-preview' (Nano Banana Pro)
|
|
- aspect_ratio: '1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', '21:9'
|
|
- image_size: '1K', '2K', '4K' (Pro only for 4K)
|
|
- Features: Text rendering, image editing, multi-turn conversation
|
|
- Pricing: ~$0.04 per 1MP image
|
|
|
|
DALL-E 3 Options:
|
|
- quality: 'standard' or 'hd' (default hd)
|
|
- style: 'vivid' (hyper-real) or 'natural' (more realistic)
|
|
- size: 1024x1024, 1024x1792, 1792x1024
|
|
|
|
Stability AI Options:
|
|
- model: sd3.5-large, sd3.5-medium, sd3-large, sd3-medium, sdxl-1.0
|
|
- aspect_ratio: 1:1, 16:9, 9:16, 4:3, 3:4, 21:9, 9:21
|
|
- negative_prompt: What to avoid in generation
|
|
- image_to_image: Use input image as starting point
|
|
- strength: 0.0-1.0 for image-to-image (how much to change)
|
|
- style_preset: enhance, anime, photographic, digital-art, etc.
|
|
"""
|
|
import httpx
|
|
import os
|
|
import base64
|
|
import logging
|
|
from uuid import uuid4
|
|
from datetime import datetime
|
|
from typing import Optional, Dict, Any, Tuple
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
from app.database import SessionLocal
|
|
from app.models.job import Job
|
|
from app.models.asset import Asset
|
|
from app.config import settings
|
|
|
|
# Provider configurations
|
|
IMAGE_PROVIDERS = {
|
|
"openai": {
|
|
"name": "OpenAI Image Generation",
|
|
"models": ["gpt-image-1", "dall-e-3", "dall-e-2"],
|
|
"default_model": "gpt-image-1",
|
|
"gpt-image-1": {
|
|
"sizes": ["1024x1024", "1024x1536", "1536x1024"],
|
|
"qualities": ["low", "medium", "high"],
|
|
"output_formats": ["png", "jpeg", "webp"],
|
|
"backgrounds": ["auto", "transparent", "opaque"],
|
|
"max_images": 10
|
|
},
|
|
"dall-e-3": {
|
|
"sizes": ["1024x1024", "1024x1792", "1792x1024"],
|
|
"qualities": ["standard", "hd"],
|
|
"styles": ["vivid", "natural"]
|
|
},
|
|
"supports_styles": True
|
|
},
|
|
"imagen": {
|
|
"name": "Google Imagen 4",
|
|
"models": ["imagen-4.0-generate-001", "imagen-4.0-ultra-generate-001", "imagen-4.0-fast-generate-001"],
|
|
"default_model": "imagen-4.0-generate-001",
|
|
"aspect_ratios": ["1:1", "3:4", "4:3", "9:16", "16:9"],
|
|
"image_sizes": ["1K", "2K"],
|
|
"max_images": 4,
|
|
"supports_enhance_prompt": True,
|
|
"supports_person_generation": True
|
|
},
|
|
"nano-banana": {
|
|
"name": "Nano Banana (Gemini Image)",
|
|
"models": ["gemini-2.5-flash-image", "gemini-3-pro-image-preview"],
|
|
"default_model": "gemini-2.5-flash-image",
|
|
"aspect_ratios": ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"],
|
|
"image_sizes": ["1K", "2K", "4K"],
|
|
"supports_text_rendering": True,
|
|
"supports_image_editing": True
|
|
},
|
|
"stable-diffusion": {
|
|
"name": "Stability AI",
|
|
"models": ["sd3.5-large", "sd3.5-medium", "sd3-large", "sd3-medium", "sdxl-1.0"],
|
|
"default_model": "sd3.5-large",
|
|
"aspect_ratios": ["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"],
|
|
"supports_img2img": True,
|
|
"supports_negative_prompt": True
|
|
},
|
|
"leonardo": {
|
|
"name": "Leonardo.ai",
|
|
"models": {
|
|
# Latest Models (2025)
|
|
"de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3": "Leonardo Phoenix 1.0",
|
|
"7b592283-e8a7-4c5a-9ba6-d18c31f258b9": "Lucid Origin",
|
|
"05ce0082-2d80-4a2d-8653-4d1c85e2418e": "Lucid Realism",
|
|
"28aeddf8-bd19-4803-80fc-79602d1a9989": "FLUX.1 Kontext",
|
|
"b2614463-296c-462a-9586-aafdb8f00e36": "Flux Dev",
|
|
"1dd50843-d653-4516-a8e3-f0238ee453ff": "Flux Schnell",
|
|
# Phoenix/XL Models
|
|
"6b645e3a-d64f-4341-a6d8-7a3690fbf042": "Leonardo Phoenix 0.9",
|
|
"e71a1c2f-4f80-4800-934f-2c68979d8cc8": "Leonardo Anime XL",
|
|
"b24e16ff-06e3-43eb-8d33-4416c2d75876": "Leonardo Lightning XL",
|
|
"aa77f04e-3eec-4034-9c07-d0f619684628": "Leonardo Kino XL",
|
|
"5c232a9e-9061-4777-980a-ddc8e65647c6": "Leonardo Vision XL",
|
|
"1e60896f-3c26-4296-8ecc-53e2afecc132": "Leonardo Diffusion XL",
|
|
# SDXL Models
|
|
"16e7060a-803e-4df3-97ee-edcfa5dc9cc8": "SDXL 1.0",
|
|
"2067ae52-33fd-4a82-bb92-c2c55e7d2786": "AlbedoBase XL",
|
|
"b63f7119-31dc-4540-969b-2a9df997e173": "SDXL 0.9",
|
|
# Style Models
|
|
"f1929ea3-b169-4c18-a16c-5d58b4292c69": "RPG v5",
|
|
"d69c8273-6b17-4a30-a13e-d6637ae1c644": "3D Animation Style",
|
|
"ac614f96-1082-45bf-be9d-757f2d31c174": "DreamShaper v7",
|
|
"e316348f-7773-490e-adcd-46757c738eb7": "Absolute Reality v1.6"
|
|
},
|
|
"default_model": "de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3",
|
|
"widths": [512, 768, 1024, 1472],
|
|
"heights": [512, 768, 832, 1024],
|
|
"style_presets": [
|
|
"ANIME", "BOKEH", "CINEMATIC", "CINEMATIC_CLOSEUP", "CREATIVE",
|
|
"DYNAMIC", "ENVIRONMENT", "FASHION", "FILM", "FOOD", "GENERAL",
|
|
"HDR", "ILLUSTRATION", "LEONARDO", "LONG_EXPOSURE", "MACRO",
|
|
"MINIMALISTIC", "MONOCHROME", "MOODY", "NONE", "NEUTRAL",
|
|
"PHOTOGRAPHY", "PORTRAIT", "RAYTRACED", "RENDER_3D", "RETRO",
|
|
"SKETCH_BW", "SKETCH_COLOR", "STOCK_PHOTO", "VIBRANT", "UNPROCESSED"
|
|
],
|
|
"supports_img2img": True,
|
|
"supports_character_reference": True,
|
|
"supports_style_reference": True
|
|
},
|
|
"bria": {
|
|
"name": "Bria AI",
|
|
"models": ["base", "fast"],
|
|
"default_model": "base",
|
|
"aspect_ratios": ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9"],
|
|
"mediums": ["photography", "art"],
|
|
"supports_prompt_enhancement": True,
|
|
"base_config": {"steps_num": [20, 50], "guidance_scale": [1, 10]},
|
|
"fast_config": {"steps_num": [4, 10]}
|
|
},
|
|
"ideogram": {
|
|
"name": "Ideogram",
|
|
"models": ["V_2", "V_2_TURBO"],
|
|
"supports_text_rendering": True
|
|
},
|
|
"flux": {
|
|
"name": "Flux Pro",
|
|
"models": ["flux-pro-1.1", "flux-dev", "flux-schnell"],
|
|
"supports_img2img": True
|
|
}
|
|
}
|
|
|
|
STABILITY_STYLE_PRESETS = [
|
|
"enhance", "anime", "photographic", "digital-art", "comic-book",
|
|
"fantasy-art", "line-art", "analog-film", "neon-punk", "isometric",
|
|
"low-poly", "origami", "modeling-compound", "cinematic", "3d-model", "pixel-art"
|
|
]
|
|
|
|
|
|
async def generate(job_id: str):
|
|
"""Generate image based on provider"""
|
|
db = SessionLocal()
|
|
try:
|
|
job = db.query(Job).filter(Job.id == job_id).first()
|
|
if not job:
|
|
return
|
|
|
|
input_data = job.input_data
|
|
provider = input_data.get("provider", "openai")
|
|
prompt = input_data.get("prompt", "")
|
|
|
|
# Update progress
|
|
job.progress = 10
|
|
job.api_provider = provider
|
|
db.commit()
|
|
|
|
image_data = None
|
|
filename = None
|
|
|
|
if provider == "openai" or provider == "dalle3":
|
|
image_data, filename = await _generate_openai(input_data)
|
|
job.api_model = input_data.get("model", "gpt-image-1")
|
|
elif provider == "imagen":
|
|
image_data, filename = await _generate_imagen(input_data)
|
|
job.api_model = input_data.get("model", "imagen-4.0-generate-001")
|
|
elif provider == "nano-banana" or provider == "gemini":
|
|
image_data, filename = await _generate_nano_banana(input_data)
|
|
job.api_model = input_data.get("model", "gemini-2.5-flash-image")
|
|
elif provider == "stable-diffusion":
|
|
image_data, filename = await _generate_stability(input_data)
|
|
job.api_model = input_data.get("model", "sd3.5-large")
|
|
elif provider == "leonardo":
|
|
image_data, filename = await _generate_leonardo(input_data)
|
|
job.api_model = "leonardo"
|
|
elif provider == "ideogram":
|
|
image_data, filename = await _generate_ideogram(input_data)
|
|
job.api_model = "ideogram-v2"
|
|
elif provider == "flux":
|
|
image_data, filename = await _generate_flux(input_data)
|
|
job.api_model = "flux-pro"
|
|
elif provider == "bria":
|
|
image_data, filename = await _generate_bria(input_data)
|
|
job.api_model = input_data.get("model", "base")
|
|
else:
|
|
raise ValueError(f"Unknown provider: {provider}")
|
|
|
|
job.progress = 80
|
|
db.commit()
|
|
|
|
# Save image
|
|
if image_data:
|
|
storage_path = os.path.join(settings.storage_path, "images")
|
|
os.makedirs(storage_path, exist_ok=True)
|
|
file_path = os.path.join(storage_path, filename)
|
|
|
|
with open(file_path, "wb") as f:
|
|
f.write(image_data)
|
|
|
|
# Create asset
|
|
asset = Asset(
|
|
user_id=job.user_id,
|
|
project_id=job.project_id,
|
|
original_filename=filename,
|
|
stored_filename=filename,
|
|
file_path=file_path,
|
|
file_type="image",
|
|
mime_type="image/png",
|
|
file_size_bytes=len(image_data),
|
|
source_module="image_generator",
|
|
source_job_id=job.id,
|
|
metadata={
|
|
"prompt": prompt,
|
|
"provider": provider,
|
|
"model": job.api_model
|
|
}
|
|
)
|
|
db.add(asset)
|
|
db.commit()
|
|
db.refresh(asset)
|
|
|
|
job.output_asset_ids = [asset.id]
|
|
job.output_data = {"asset_id": str(asset.id), "file_path": file_path}
|
|
|
|
job.progress = 100
|
|
job.status = "completed"
|
|
job.completed_at = datetime.utcnow()
|
|
db.commit()
|
|
|
|
except Exception as e:
|
|
job.status = "failed"
|
|
job.error_message = str(e)
|
|
db.commit()
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
async def _generate_openai(input_data: dict) -> Tuple[Optional[bytes], Optional[str]]:
|
|
"""Generate image using OpenAI GPT-Image-1 or DALL-E 3
|
|
|
|
GPT-Image-1 Parameters (default):
|
|
- prompt: Text description (max 32000 chars)
|
|
- quality: 'low', 'medium', 'high' (default: high)
|
|
- size: '1024x1024', '1024x1536', '1536x1024'
|
|
- background: 'transparent', 'opaque', 'auto'
|
|
- output_format: 'png', 'jpeg', 'webp' (default: png)
|
|
- output_compression: 0-100 for jpeg/webp
|
|
- moderation: 'auto' or 'low' (less restrictive)
|
|
- n: 1-10 images
|
|
|
|
DALL-E 3 Parameters:
|
|
- prompt: Text description (max 4000 chars)
|
|
- quality: 'standard' or 'hd' (default: hd)
|
|
- style: 'vivid' or 'natural' (default: vivid)
|
|
- size: '1024x1024', '1024x1792', '1792x1024'
|
|
"""
|
|
prompt = input_data.get("prompt", "")
|
|
model = input_data.get("model", "gpt-image-1")
|
|
width = input_data.get("width", 1024)
|
|
height = input_data.get("height", 1024)
|
|
|
|
# Determine size based on width/height
|
|
if width > height:
|
|
size = "1536x1024" if model == "gpt-image-1" else "1792x1024"
|
|
elif height > width:
|
|
size = "1024x1536" if model == "gpt-image-1" else "1024x1792"
|
|
else:
|
|
size = "1024x1024"
|
|
|
|
async with httpx.AsyncClient(timeout=180) as client:
|
|
if model == "gpt-image-1":
|
|
# GPT-Image-1 (latest model)
|
|
quality = input_data.get("quality", "high")
|
|
background = input_data.get("background", "auto")
|
|
output_format = input_data.get("output_format", "png")
|
|
output_compression = input_data.get("output_compression", 100)
|
|
moderation = input_data.get("moderation", "auto")
|
|
n = min(input_data.get("n", 1), 10)
|
|
|
|
payload = {
|
|
"model": "gpt-image-1",
|
|
"prompt": prompt,
|
|
"size": size,
|
|
"quality": quality,
|
|
"n": n
|
|
}
|
|
|
|
# Add optional parameters
|
|
if background != "auto":
|
|
payload["background"] = background
|
|
if output_format != "png":
|
|
payload["output_format"] = output_format
|
|
if output_format in ["jpeg", "webp"] and output_compression != 100:
|
|
payload["output_compression"] = output_compression
|
|
if moderation != "auto":
|
|
payload["moderation"] = moderation
|
|
|
|
response = await client.post(
|
|
"https://api.openai.com/v1/images/generations",
|
|
headers={
|
|
"Authorization": f"Bearer {settings.openai_api_key}",
|
|
"Content-Type": "application/json"
|
|
},
|
|
json=payload
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
if data.get("data") and len(data["data"]) > 0:
|
|
# GPT-Image-1 always returns base64
|
|
b64_image = data["data"][0].get("b64_json")
|
|
if b64_image:
|
|
ext = output_format if output_format in ["png", "jpeg", "webp"] else "png"
|
|
filename = f"gptimage1_{quality}_{uuid4()}.{ext}"
|
|
return base64.b64decode(b64_image), filename
|
|
|
|
else:
|
|
# DALL-E 3 (or DALL-E 2)
|
|
quality = input_data.get("quality", "hd")
|
|
style = input_data.get("style", "vivid")
|
|
|
|
payload = {
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"size": size,
|
|
"n": 1,
|
|
"response_format": "b64_json"
|
|
}
|
|
|
|
# DALL-E 3 specific options
|
|
if model == "dall-e-3":
|
|
payload["quality"] = quality
|
|
payload["style"] = style
|
|
|
|
response = await client.post(
|
|
"https://api.openai.com/v1/images/generations",
|
|
headers={
|
|
"Authorization": f"Bearer {settings.openai_api_key}",
|
|
"Content-Type": "application/json"
|
|
},
|
|
json=payload
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
if data.get("data") and len(data["data"]) > 0:
|
|
b64_image = data["data"][0].get("b64_json")
|
|
if b64_image:
|
|
filename = f"{model.replace('-', '')}_{style if model == 'dall-e-3' else 'gen'}_{uuid4()}.png"
|
|
return base64.b64decode(b64_image), filename
|
|
|
|
return None, None
|
|
|
|
|
|
async def _generate_stability(input_data: dict, input_image_data: Optional[bytes] = None) -> Tuple[Optional[bytes], Optional[str]]:
|
|
"""Generate image using Stability AI
|
|
|
|
Parameters:
|
|
- prompt: Text description (required)
|
|
- negative_prompt: What to avoid in generation
|
|
- model: 'sd3.5-large', 'sd3.5-medium', 'sd3-large', 'sd3-medium'
|
|
- aspect_ratio: '1:1', '16:9', '9:16', '4:3', '3:4', '21:9', '9:21'
|
|
- seed: Optional seed for reproducibility (0-4294967294)
|
|
- mode: 'text-to-image' or 'image-to-image'
|
|
"""
|
|
if not settings.stability_api_key:
|
|
raise ValueError("Stability API key not configured")
|
|
|
|
prompt = input_data.get("prompt", "")
|
|
if not prompt:
|
|
raise ValueError("Prompt is required")
|
|
|
|
negative_prompt = input_data.get("negative_prompt", "")
|
|
model = input_data.get("model", "sd3.5-large")
|
|
aspect_ratio = input_data.get("aspect_ratio", "1:1")
|
|
seed = input_data.get("seed")
|
|
output_format = input_data.get("output_format", "png")
|
|
|
|
async with httpx.AsyncClient(timeout=180) as client:
|
|
# Build form data - Stability uses multipart/form-data
|
|
form_data = {
|
|
"prompt": prompt,
|
|
"mode": "text-to-image",
|
|
"model": model,
|
|
"aspect_ratio": aspect_ratio,
|
|
"output_format": output_format,
|
|
}
|
|
|
|
if negative_prompt:
|
|
form_data["negative_prompt"] = negative_prompt
|
|
|
|
if seed is not None:
|
|
form_data["seed"] = seed
|
|
|
|
# Image-to-image mode
|
|
files = None
|
|
if input_image_data:
|
|
form_data["mode"] = "image-to-image"
|
|
form_data["strength"] = input_data.get("strength", 0.7)
|
|
files = {"image": ("input.png", input_image_data, "image/png")}
|
|
|
|
try:
|
|
response = await client.post(
|
|
"https://api.stability.ai/v2beta/stable-image/generate/sd3",
|
|
headers={
|
|
"Authorization": f"Bearer {settings.stability_api_key}",
|
|
"Accept": "image/*"
|
|
},
|
|
data=form_data,
|
|
files=files
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
error_text = response.text
|
|
logger.error(f"Stability AI error {response.status_code}: {error_text}")
|
|
raise Exception(f"Stability AI error: {error_text}")
|
|
|
|
model_short = model.replace("-", "").replace(".", "")
|
|
filename = f"stability_{model_short}_{uuid4()}.{output_format}"
|
|
return response.content, filename
|
|
|
|
except httpx.HTTPStatusError as e:
|
|
logger.error(f"Stability AI HTTP error: {e.response.status_code} - {e.response.text}")
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Stability AI generation error: {e}")
|
|
raise
|
|
|
|
|
|
async def _generate_leonardo(input_data: dict) -> tuple:
|
|
"""
|
|
Generate image using Leonardo AI
|
|
|
|
Parameters:
|
|
- prompt: Text description
|
|
- model: Leonardo model ID (default: Phoenix)
|
|
- width: Image width (512, 768, 1024, 1472)
|
|
- height: Image height (512, 768, 832, 1024)
|
|
- preset_style: Style preset (ANIME, CINEMATIC, PHOTOGRAPHY, etc.)
|
|
- num_images: Number of images to generate
|
|
- guidance_scale: How closely to follow prompt (7-15)
|
|
- num_inference_steps: Quality/speed tradeoff (30-60)
|
|
- negative_prompt: What to avoid
|
|
- init_image_id: For image-to-image
|
|
- init_strength: How much to change input image (0.1-0.9)
|
|
"""
|
|
# Default model is Leonardo Phoenix
|
|
model_id = input_data.get("model", "6b645e3a-d64f-4341-a6d8-7a3690fbf042")
|
|
|
|
# Build request payload
|
|
payload = {
|
|
"prompt": input_data.get("prompt"),
|
|
"modelId": model_id,
|
|
"width": input_data.get("width", 1024),
|
|
"height": input_data.get("height", 1024),
|
|
"num_images": input_data.get("num_images", 1),
|
|
}
|
|
|
|
# Add optional parameters
|
|
if input_data.get("preset_style"):
|
|
payload["presetStyle"] = input_data.get("preset_style")
|
|
|
|
if input_data.get("guidance_scale"):
|
|
payload["guidance_scale"] = input_data.get("guidance_scale")
|
|
|
|
if input_data.get("num_inference_steps"):
|
|
payload["num_inference_steps"] = input_data.get("num_inference_steps")
|
|
|
|
if input_data.get("negative_prompt"):
|
|
payload["negative_prompt"] = input_data.get("negative_prompt")
|
|
|
|
# Image-to-image support
|
|
if input_data.get("init_image_id"):
|
|
payload["init_image_id"] = input_data.get("init_image_id")
|
|
payload["init_strength"] = input_data.get("init_strength", 0.5)
|
|
|
|
async with httpx.AsyncClient(timeout=180) as client:
|
|
# Create generation
|
|
response = await client.post(
|
|
"https://cloud.leonardo.ai/api/rest/v1/generations",
|
|
headers={
|
|
"Authorization": f"Bearer {settings.leonardo_api_key}",
|
|
"Content-Type": "application/json"
|
|
},
|
|
json=payload
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
# Poll for result
|
|
generation_id = data.get("sdGenerationJob", {}).get("generationId")
|
|
if generation_id:
|
|
import asyncio
|
|
for _ in range(90): # Wait up to 3 minutes
|
|
await asyncio.sleep(2)
|
|
status_response = await client.get(
|
|
f"https://cloud.leonardo.ai/api/rest/v1/generations/{generation_id}",
|
|
headers={"Authorization": f"Bearer {settings.leonardo_api_key}"}
|
|
)
|
|
status_data = status_response.json()
|
|
generation = status_data.get("generations_by_pk", {})
|
|
status = generation.get("status")
|
|
|
|
if status == "COMPLETE":
|
|
images = generation.get("generated_images", [])
|
|
if images:
|
|
image_url = images[0].get("url")
|
|
if image_url:
|
|
img_response = await client.get(image_url)
|
|
model_name = IMAGE_PROVIDERS["leonardo"]["models"].get(model_id, "leonardo")
|
|
filename = f"leonardo_{model_name.replace(' ', '_').lower()}_{uuid4()}.png"
|
|
return img_response.content, filename
|
|
elif status == "FAILED":
|
|
raise Exception("Leonardo generation failed")
|
|
|
|
return None, None
|
|
|
|
|
|
async def _generate_bria(input_data: dict) -> tuple:
|
|
"""
|
|
Generate image using Bria AI
|
|
|
|
Parameters:
|
|
- prompt: Text description
|
|
- model: 'base' (Bria 2.3 Base) or 'fast' (Bria 2.3 Fast)
|
|
- aspect_ratio: Image aspect ratio
|
|
- medium: 'photography' or 'art'
|
|
- prompt_enhancement: Enable AI prompt enhancement
|
|
- steps_num: Number of inference steps
|
|
- guidance_scale: How closely to follow prompt
|
|
- negative_prompt: What to avoid
|
|
"""
|
|
model = input_data.get("model", "base")
|
|
base_url = "https://engine.prod.bria-api.com/v1/text-to-image"
|
|
|
|
# Build request payload
|
|
payload = {
|
|
"prompt": input_data.get("prompt"),
|
|
"num_results": 1
|
|
}
|
|
|
|
# Add aspect ratio
|
|
if input_data.get("aspect_ratio"):
|
|
payload["aspect_ratio"] = input_data.get("aspect_ratio")
|
|
|
|
# Add medium
|
|
if input_data.get("medium"):
|
|
payload["medium"] = input_data.get("medium")
|
|
|
|
# Add prompt enhancement
|
|
if input_data.get("prompt_enhancement"):
|
|
payload["prompt_enhancement"] = True
|
|
|
|
# Add negative prompt
|
|
if input_data.get("negative_prompt"):
|
|
payload["negative_prompt"] = input_data.get("negative_prompt")
|
|
|
|
# Model-specific parameters
|
|
if model == "base":
|
|
url = f"{base_url}/base"
|
|
if input_data.get("steps_num"):
|
|
payload["steps_num"] = input_data.get("steps_num")
|
|
if input_data.get("guidance_scale"):
|
|
payload["text_guidance_scale"] = input_data.get("guidance_scale")
|
|
else:
|
|
url = f"{base_url}/fast"
|
|
if input_data.get("steps_num"):
|
|
payload["steps_num"] = min(input_data.get("steps_num"), 10)
|
|
|
|
async with httpx.AsyncClient(timeout=120) as client:
|
|
response = await client.post(
|
|
url,
|
|
headers={
|
|
"api_token": settings.bria_api_key,
|
|
"Content-Type": "application/json"
|
|
},
|
|
json=payload
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
# Get the result
|
|
result = data.get("result", [])
|
|
if result and len(result) > 0:
|
|
image_url = result[0].get("urls", {}).get("url")
|
|
if image_url:
|
|
img_response = await client.get(image_url)
|
|
filename = f"bria_{model}_{uuid4()}.png"
|
|
return img_response.content, filename
|
|
|
|
return None, None
|
|
|
|
|
|
async def _generate_ideogram(input_data: dict) -> tuple:
|
|
"""Generate image using Ideogram"""
|
|
async with httpx.AsyncClient(timeout=120) as client:
|
|
response = await client.post(
|
|
"https://api.ideogram.ai/generate",
|
|
headers={
|
|
"Api-Key": settings.ideogram_api_key,
|
|
"Content-Type": "application/json"
|
|
},
|
|
json={
|
|
"image_request": {
|
|
"prompt": input_data.get("prompt"),
|
|
"model": "V_2",
|
|
"aspect_ratio": "ASPECT_1_1"
|
|
}
|
|
}
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
if data.get("data") and len(data["data"]) > 0:
|
|
image_url = data["data"][0].get("url")
|
|
if image_url:
|
|
img_response = await client.get(image_url)
|
|
filename = f"ideogram_{uuid4()}.png"
|
|
return img_response.content, filename
|
|
|
|
return None, None
|
|
|
|
|
|
async def _generate_flux(input_data: dict) -> tuple:
|
|
"""Generate image using Flux (Black Forest Labs)
|
|
|
|
Note: Requires FLUX_API_KEY from https://api.bfl.ml/
|
|
May require paid account for flux-pro-1.1 model
|
|
"""
|
|
if not settings.flux_api_key:
|
|
raise ValueError("FLUX_API_KEY not configured")
|
|
|
|
async with httpx.AsyncClient(timeout=120) as client:
|
|
try:
|
|
response = await client.post(
|
|
"https://api.bfl.ml/v1/flux-pro-1.1",
|
|
headers={
|
|
"x-key": settings.flux_api_key,
|
|
"Content-Type": "application/json"
|
|
},
|
|
json={
|
|
"prompt": input_data.get("prompt"),
|
|
"width": input_data.get("width", 1024),
|
|
"height": input_data.get("height", 1024)
|
|
}
|
|
)
|
|
|
|
if response.status_code == 403:
|
|
logger.error("Flux API 403: Invalid API key or insufficient permissions")
|
|
raise ValueError("Flux API key is invalid or your account doesn't have access to flux-pro-1.1")
|
|
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
# Poll for result
|
|
request_id = data.get("id")
|
|
if request_id:
|
|
import asyncio
|
|
for _ in range(60):
|
|
await asyncio.sleep(2)
|
|
status_response = await client.get(
|
|
f"https://api.bfl.ml/v1/get_result?id={request_id}",
|
|
headers={"x-key": settings.flux_api_key}
|
|
)
|
|
status_data = status_response.json()
|
|
if status_data.get("status") == "Ready":
|
|
image_url = status_data.get("result", {}).get("sample")
|
|
if image_url:
|
|
img_response = await client.get(image_url)
|
|
filename = f"flux_{uuid4()}.png"
|
|
return img_response.content, filename
|
|
|
|
except Exception as e:
|
|
logger.error(f"Flux generation error: {e}")
|
|
raise
|
|
|
|
return None, None
|
|
|
|
|
|
async def _generate_gemini(input_data: dict) -> tuple:
|
|
"""Generate image using Google Gemini"""
|
|
import google.generativeai as genai
|
|
|
|
genai.configure(api_key=settings.google_api_key)
|
|
model = genai.GenerativeModel("gemini-2.0-flash-exp")
|
|
|
|
response = model.generate_content(
|
|
input_data.get("prompt"),
|
|
generation_config=genai.types.GenerationConfig(
|
|
response_mime_type="image/png"
|
|
)
|
|
)
|
|
|
|
if response.candidates and response.candidates[0].content.parts:
|
|
for part in response.candidates[0].content.parts:
|
|
if hasattr(part, 'inline_data') and part.inline_data:
|
|
filename = f"gemini_{uuid4()}.png"
|
|
return part.inline_data.data, filename
|
|
|
|
return None, None
|
|
|
|
|
|
async def _generate_imagen(input_data: dict) -> tuple:
|
|
"""
|
|
Generate image using Google Imagen 3 via REST API
|
|
|
|
Note: Imagen 3 is accessed through the generativelanguage API with API key.
|
|
|
|
Parameters:
|
|
- prompt: Text description of the image
|
|
- aspect_ratio: "1:1", "3:4", "4:3", "9:16", "16:9"
|
|
- number_of_images: 1-4
|
|
- negative_prompt: What to avoid in the image
|
|
"""
|
|
if not settings.google_api_key:
|
|
raise ValueError("GOOGLE_API_KEY not configured")
|
|
|
|
prompt = input_data.get("prompt", "")
|
|
negative_prompt = input_data.get("negative_prompt", "")
|
|
aspect_ratio = input_data.get("aspect_ratio", "1:1")
|
|
number_of_images = min(input_data.get("number_of_images", 1), 4)
|
|
|
|
# Use the Generative Language API for Imagen
|
|
url = f"https://generativelanguage.googleapis.com/v1beta/models/imagen-3.0-generate-001:predict?key={settings.google_api_key}"
|
|
|
|
payload = {
|
|
"instances": [{"prompt": prompt}],
|
|
"parameters": {
|
|
"sampleCount": number_of_images,
|
|
"aspectRatio": aspect_ratio,
|
|
}
|
|
}
|
|
|
|
if negative_prompt:
|
|
payload["instances"][0]["negativePrompt"] = negative_prompt
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
|
response = await client.post(
|
|
url,
|
|
headers={"Content-Type": "application/json"},
|
|
json=payload
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
predictions = data.get("predictions", [])
|
|
if predictions and predictions[0].get("bytesBase64Encoded"):
|
|
image_data = base64.b64decode(predictions[0]["bytesBase64Encoded"])
|
|
filename = f"imagen3_{uuid4()}.png"
|
|
return image_data, filename
|
|
else:
|
|
logger.warning(f"Imagen API error: {response.status_code} - {response.text}")
|
|
# Fall back to Nano Banana (Gemini native)
|
|
logger.info("Falling back to Nano Banana (Gemini native image generation)")
|
|
return await _generate_nano_banana(input_data)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Imagen generation error: {e}")
|
|
# Fallback to Gemini native image generation
|
|
return await _generate_nano_banana(input_data)
|
|
|
|
return None, None
|
|
|
|
|
|
async def _generate_nano_banana(input_data: dict) -> tuple:
|
|
"""
|
|
Generate image using Nano Banana (Gemini native image generation)
|
|
|
|
Models:
|
|
- gemini-2.5-flash-image: Fast image generation with Gemini
|
|
- gemini-3-pro-image-preview: Higher quality image generation
|
|
|
|
Features:
|
|
- Native text rendering (can include text in images)
|
|
- Up to 4K resolution
|
|
- Wide range of aspect ratios
|
|
- Conversational image editing
|
|
|
|
Parameters:
|
|
- prompt: Text description of the image
|
|
- model: Gemini model to use
|
|
- aspect_ratio: Various ratios from 1:1 to 21:9
|
|
- image_size: "1K", "2K", "4K"
|
|
- number_of_images: Number of images to generate
|
|
- reference_image: Optional base64 image for editing
|
|
"""
|
|
import google.generativeai as genai
|
|
|
|
genai.configure(api_key=settings.google_api_key)
|
|
|
|
model_name = input_data.get("model", "gemini-2.5-flash-image")
|
|
|
|
# Map model names to actual Gemini model IDs
|
|
model_mapping = {
|
|
"gemini-2.5-flash-image": "gemini-2.0-flash-exp-image-generation",
|
|
"gemini-3-pro-image-preview": "gemini-2.0-flash-exp-image-generation", # Use available model
|
|
}
|
|
|
|
actual_model = model_mapping.get(model_name, "gemini-2.0-flash-exp-image-generation")
|
|
model = genai.GenerativeModel(actual_model)
|
|
|
|
# Handle aspect ratio if provided
|
|
aspect_ratio = input_data.get("aspect_ratio", "1:1")
|
|
|
|
# Build the prompt - can include aspect ratio hints
|
|
prompt = input_data.get("prompt", "")
|
|
if aspect_ratio != "1:1":
|
|
prompt = f"{prompt} [aspect ratio: {aspect_ratio}]"
|
|
|
|
# If reference image provided, include it in the request
|
|
contents = [prompt]
|
|
|
|
if input_data.get("reference_image"):
|
|
import base64
|
|
# Add reference image for editing
|
|
ref_data = input_data.get("reference_image")
|
|
if isinstance(ref_data, str) and ref_data.startswith("data:"):
|
|
# Extract base64 data from data URL
|
|
ref_data = ref_data.split(",")[1]
|
|
contents = [
|
|
{
|
|
"parts": [
|
|
{"text": prompt},
|
|
{
|
|
"inline_data": {
|
|
"mime_type": "image/png",
|
|
"data": ref_data
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
|
|
try:
|
|
# Generate content - Gemini automatically returns image data
|
|
response = model.generate_content(contents)
|
|
|
|
if response.candidates and response.candidates[0].content.parts:
|
|
for part in response.candidates[0].content.parts:
|
|
if hasattr(part, 'inline_data') and part.inline_data:
|
|
filename = f"nano_banana_{uuid4()}.png"
|
|
return part.inline_data.data, filename
|
|
except Exception as e:
|
|
logger.error(f"Nano Banana generation error: {e}")
|
|
raise
|
|
|
|
return None, None
|