"""Image Generator Service - Multiple AI Providers Supported Providers: - openai: GPT-Image-1 (latest) or DALL-E 3 - imagen: Google Imagen 4 (Standard, Ultra, Fast) - nano-banana: Gemini 2.5 Flash Image / Nano Banana Pro - stable-diffusion: Stability AI SDXL, SD3, image-to-image - leonardo: Leonardo.ai models - ideogram: Ideogram v2 with text rendering - flux: Black Forest Labs Flux Pro OpenAI GPT-Image-1 (April 2025): - model: 'gpt-image-1' (default) or 'dall-e-3' - quality: 'low', 'medium', 'high' (default high) - size: 1024x1024, 1024x1536, 1536x1024 - background: 'transparent', 'opaque', 'auto' (for PNG/WebP) - output_format: 'png', 'jpeg', 'webp' - n: 1-10 images per request - Pricing: ~$0.02 (low), $0.07 (medium), $0.19 (high) per image Google Imagen 4 (December 2025): - model: 'imagen-4.0-generate-001' (default), 'imagen-4.0-ultra-generate-001', 'imagen-4.0-fast-generate-001' - image_size: '1K', '2K' (Ultra/Standard only) - aspect_ratio: '1:1', '3:4', '4:3', '9:16', '16:9' - number_of_images: 1-4 - enhance_prompt: true/false (LLM prompt enhancement) - person_generation: 'dont_allow', 'allow_adult', 'allow_all' - Pricing: $0.02 (Fast), $0.04 (Standard), $0.06 (Ultra) per image Nano Banana / Gemini Image (December 2025): - model: 'gemini-2.5-flash-image' (Nano Banana), 'gemini-3-pro-image-preview' (Nano Banana Pro) - aspect_ratio: '1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', '21:9' - image_size: '1K', '2K', '4K' (Pro only for 4K) - Features: Text rendering, image editing, multi-turn conversation - Pricing: ~$0.04 per 1MP image DALL-E 3 Options: - quality: 'standard' or 'hd' (default hd) - style: 'vivid' (hyper-real) or 'natural' (more realistic) - size: 1024x1024, 1024x1792, 1792x1024 Stability AI Options: - model: sd3.5-large, sd3.5-medium, sd3-large, sd3-medium, sdxl-1.0 - aspect_ratio: 1:1, 16:9, 9:16, 4:3, 3:4, 21:9, 9:21 - negative_prompt: What to avoid in generation - image_to_image: Use input image as starting point - strength: 0.0-1.0 for image-to-image (how much to change) - style_preset: enhance, anime, photographic, digital-art, etc. """ import httpx import os import base64 import logging from uuid import uuid4 from datetime import datetime from typing import Optional, Dict, Any, Tuple logger = logging.getLogger(__name__) from app.database import SessionLocal from app.models.job import Job from app.models.asset import Asset from app.config import settings # Provider configurations IMAGE_PROVIDERS = { "openai": { "name": "OpenAI Image Generation", "models": ["gpt-image-1", "dall-e-3", "dall-e-2"], "default_model": "gpt-image-1", "gpt-image-1": { "sizes": ["1024x1024", "1024x1536", "1536x1024"], "qualities": ["low", "medium", "high"], "output_formats": ["png", "jpeg", "webp"], "backgrounds": ["auto", "transparent", "opaque"], "max_images": 10 }, "dall-e-3": { "sizes": ["1024x1024", "1024x1792", "1792x1024"], "qualities": ["standard", "hd"], "styles": ["vivid", "natural"] }, "supports_styles": True }, "imagen": { "name": "Google Imagen 4", "models": ["imagen-4.0-generate-001", "imagen-4.0-ultra-generate-001", "imagen-4.0-fast-generate-001"], "default_model": "imagen-4.0-generate-001", "aspect_ratios": ["1:1", "3:4", "4:3", "9:16", "16:9"], "image_sizes": ["1K", "2K"], "max_images": 4, "supports_enhance_prompt": True, "supports_person_generation": True }, "nano-banana": { "name": "Nano Banana (Gemini Image)", "models": ["gemini-2.5-flash-image", "gemini-3-pro-image-preview"], "default_model": "gemini-2.5-flash-image", "aspect_ratios": ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"], "image_sizes": ["1K", "2K", "4K"], "supports_text_rendering": True, "supports_image_editing": True }, "stable-diffusion": { "name": "Stability AI", "models": ["sd3.5-large", "sd3.5-medium", "sd3-large", "sd3-medium", "sdxl-1.0"], "default_model": "sd3.5-large", "aspect_ratios": ["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"], "supports_img2img": True, "supports_negative_prompt": True }, "leonardo": { "name": "Leonardo.ai", "models": { # Latest Models (2025) # Phoenix: de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3 (Found in docs) "de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3": "Leonardo Phoenix 1.0", "7b592283-e8a7-4c5a-9ba6-d18c31f258b9": "Lucid Origin", "05ce0082-2d80-4a2d-8653-4d1c85e2418e": "Lucid Realism", "28aeddf8-bd19-4803-80fc-79602d1a9989": "FLUX.1 Kontext", "b2614463-296c-462a-9586-aafdb8f00e36": "Flux Dev", "1dd50843-d653-4516-a8e3-f0238ee453ff": "Flux Schnell", # XL Models "aa77f04e-3eec-4034-9c07-d0f619684628": "Leonardo Kino XL", "5c232a9e-9061-4777-980a-ddc8e65647c6": "Leonardo Vision XL", "b24e16ff-06e3-43eb-8d33-4416c2d75876": "Leonardo Lightning XL", "1e60896f-3c26-4296-8ecc-53e2afecc132": "Leonardo Diffusion XL", # Older/Other Support "16e7060a-803e-4df3-97ee-edcfa5dc9cc8": "SDXL 1.0", "ac614f96-1082-45bf-be9d-757f2d31c174": "DreamShaper v7", "e316348f-7773-490e-adcd-46757c738eb7": "Absolute Reality v1.6" }, "default_model": "de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3", # Explicit mapping for Aspect Ratio -> Dimensions (Width x Height) # These are generally safe for SDXL/Phoenix models "dimensions": { "1:1": {"width": 1024, "height": 1024}, "16:9": {"width": 1472, "height": 832}, "9:16": {"width": 832, "height": 1472}, "4:3": {"width": 1248, "height": 928}, # Approx for SDXL "3:4": {"width": 928, "height": 1248}, "21:9": {"width": 1536, "height": 640}, # Ultra wide "9:21": {"width": 640, "height": 1536} }, "style_presets": [ "ANIME", "BOKEH", "CINEMATIC", "CINEMATIC_CLOSEUP", "CREATIVE", "DYNAMIC", "ENVIRONMENT", "FASHION", "FILM", "FOOD", "GENERAL", "HDR", "ILLUSTRATION", "LEONARDO", "LONG_EXPOSURE", "MACRO", "MINIMALISTIC", "MONOCHROME", "MOODY", "NONE", "NEUTRAL", "PHOTOGRAPHY", "PORTRAIT", "RAYTRACED", "RENDER_3D", "RETRO", "SKETCH_BW", "SKETCH_COLOR", "STOCK_PHOTO", "VIBRANT", "UNPROCESSED" ], "supports_img2img": True, "supports_character_reference": True, "supports_style_reference": True }, "bria": { "name": "Bria AI", "models": ["base", "fast"], "default_model": "base", "aspect_ratios": ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9"], "mediums": ["photography", "art"], "supports_prompt_enhancement": True, "base_config": {"steps_num": [20, 50], "guidance_scale": [1, 10]}, "fast_config": {"steps_num": [4, 10]} }, "ideogram": { "name": "Ideogram", "models": ["V_2", "V_2_TURBO"], "supports_text_rendering": True }, "flux": { "name": "Flux Pro", "models": ["flux-pro-1.1", "flux-dev", "flux-schnell"], "supports_img2img": True } } STABILITY_STYLE_PRESETS = [ "enhance", "anime", "photographic", "digital-art", "comic-book", "fantasy-art", "line-art", "analog-film", "neon-punk", "isometric", "low-poly", "origami", "modeling-compound", "cinematic", "3d-model", "pixel-art" ] async def generate(job_id: str): """Generate image based on provider""" db = SessionLocal() try: job = db.query(Job).filter(Job.id == job_id).first() if not job: return input_data = job.input_data provider = input_data.get("provider", "openai") prompt = input_data.get("prompt", "") # Update progress job.progress = 10 job.api_provider = provider db.commit() image_data = None filename = None if provider == "openai" or provider == "dalle3": image_data, filename = await _generate_openai(input_data) job.api_model = input_data.get("model", "gpt-image-1") elif provider == "imagen": image_data, filename = await _generate_imagen(input_data) job.api_model = input_data.get("model", "imagen-4.0-generate-001") elif provider == "nano-banana" or provider == "gemini": # Fetch reference image if provided ref_id = input_data.get("reference_asset_id") ref_image_data = None if ref_id: ref_asset = db.query(Asset).filter(Asset.id == ref_id).first() if ref_asset and os.path.exists(ref_asset.file_path): with open(ref_asset.file_path, "rb") as f: ref_image_data = f.read() image_data, filename = await _generate_nano_banana(input_data, ref_image_data) job.api_model = input_data.get("model", "gemini-2.5-flash-image") elif provider == "stable-diffusion": image_data, filename = await _generate_stability(input_data) job.api_model = input_data.get("model", "sd3.5-large") elif provider == "leonardo": image_data, filename = await _generate_leonardo(input_data) job.api_model = "leonardo" elif provider == "ideogram": image_data, filename = await _generate_ideogram(input_data) job.api_model = "ideogram-v2" elif provider == "flux": image_data, filename = await _generate_flux(input_data) job.api_model = "flux-pro" elif provider == "bria": image_data, filename = await _generate_bria(input_data) job.api_model = input_data.get("model", "base") elif provider == "runway-image": image_data, filename = await _generate_runway_image(input_data) job.api_model = "gen4_image" else: raise ValueError(f"Unknown provider: {provider}") job.progress = 80 db.commit() # Save image if image_data: storage_path = os.path.join(settings.storage_path, "images") os.makedirs(storage_path, exist_ok=True) file_path = os.path.join(storage_path, filename) with open(file_path, "wb") as f: f.write(image_data) # Create asset asset = Asset( user_id=job.user_id, project_id=job.project_id, original_filename=filename, stored_filename=filename, file_path=file_path, file_type="image", mime_type="image/png", file_size_bytes=len(image_data), source_module="image_generator", source_job_id=job.id, asset_metadata={ "prompt": prompt, "provider": provider, "model": job.api_model } ) db.add(asset) db.commit() db.refresh(asset) job.output_asset_ids = [asset.id] job.output_data = {"asset_id": str(asset.id), "file_path": file_path} job.progress = 100 job.status = "completed" job.completed_at = datetime.utcnow() db.commit() except Exception as e: job.status = "failed" job.error_message = str(e) db.commit() finally: db.close() async def _generate_openai(input_data: dict) -> Tuple[Optional[bytes], Optional[str]]: """Generate image using OpenAI GPT-Image-1 or DALL-E 3 GPT-Image-1 Parameters (default): - prompt: Text description (max 32000 chars) - quality: 'low', 'medium', 'high' (default: high) - size: '1024x1024', '1024x1536', '1536x1024' - background: 'transparent', 'opaque', 'auto' - output_format: 'png', 'jpeg', 'webp' (default: png) - output_compression: 0-100 for jpeg/webp - moderation: 'auto' or 'low' (less restrictive) - n: 1-10 images DALL-E 3 Parameters: - prompt: Text description (max 4000 chars) - quality: 'standard' or 'hd' (default: hd) - style: 'vivid' or 'natural' (default: vivid) - size: '1024x1024', '1024x1792', '1792x1024' """ prompt = input_data.get("prompt", "") model = input_data.get("model", "gpt-image-1") width = input_data.get("width", 1024) height = input_data.get("height", 1024) # Determine size based on width/height if width > height: size = "1536x1024" if model == "gpt-image-1" else "1792x1024" elif height > width: size = "1024x1536" if model == "gpt-image-1" else "1024x1792" else: size = "1024x1024" async with httpx.AsyncClient(timeout=180) as client: if model == "gpt-image-1": # GPT-Image-1 (latest model) quality = input_data.get("quality", "high") background = input_data.get("background", "auto") output_format = input_data.get("output_format", "png") output_compression = input_data.get("output_compression", 100) moderation = input_data.get("moderation", "auto") n = min(input_data.get("n", 1), 10) payload = { "model": "gpt-image-1", "prompt": prompt, "size": size, "quality": quality, "n": n } # Add optional parameters if background != "auto": payload["background"] = background if output_format != "png": payload["output_format"] = output_format if output_format in ["jpeg", "webp"] and output_compression != 100: payload["output_compression"] = output_compression if moderation != "auto": payload["moderation"] = moderation response = await client.post( "https://api.openai.com/v1/images/generations", headers={ "Authorization": f"Bearer {settings.openai_api_key}", "Content-Type": "application/json" }, json=payload ) response.raise_for_status() data = response.json() if data.get("data") and len(data["data"]) > 0: # GPT-Image-1 always returns base64 b64_image = data["data"][0].get("b64_json") if b64_image: ext = output_format if output_format in ["png", "jpeg", "webp"] else "png" filename = f"gptimage1_{quality}_{uuid4()}.{ext}" return base64.b64decode(b64_image), filename else: # DALL-E 3 (or DALL-E 2) quality = input_data.get("quality", "hd") style = input_data.get("style", "vivid") payload = { "model": model, "prompt": prompt, "size": size, "n": 1, "response_format": "b64_json" } # DALL-E 3 specific options if model == "dall-e-3": payload["quality"] = quality payload["style"] = style response = await client.post( "https://api.openai.com/v1/images/generations", headers={ "Authorization": f"Bearer {settings.openai_api_key}", "Content-Type": "application/json" }, json=payload ) response.raise_for_status() data = response.json() if data.get("data") and len(data["data"]) > 0: b64_image = data["data"][0].get("b64_json") if b64_image: filename = f"{model.replace('-', '')}_{style if model == 'dall-e-3' else 'gen'}_{uuid4()}.png" return base64.b64decode(b64_image), filename return None, None async def _generate_stability(input_data: dict, input_image_data: Optional[bytes] = None) -> Tuple[Optional[bytes], Optional[str]]: """Generate image using Stability AI Parameters: - prompt: Text description (required) - negative_prompt: What to avoid in generation - model: 'sd3.5-large', 'sd3.5-medium', 'sd3-large', 'sd3-medium' - aspect_ratio: '1:1', '16:9', '9:16', '4:3', '3:4', '21:9', '9:21' - seed: Optional seed for reproducibility (0-4294967294) - mode: 'text-to-image' or 'image-to-image' """ if not settings.stability_api_key: raise ValueError("Stability API key not configured") prompt = input_data.get("prompt", "") if not prompt: raise ValueError("Prompt is required") negative_prompt = input_data.get("negative_prompt", "") model = input_data.get("model", "sd3.5-large") aspect_ratio = input_data.get("aspect_ratio", "1:1") seed = input_data.get("seed") output_format = input_data.get("output_format", "png") async with httpx.AsyncClient(timeout=180) as client: # Build multipart form data - Stability requires multipart/form-data files = { "prompt": (None, prompt), "mode": (None, "text-to-image"), "model": (None, model), "aspect_ratio": (None, aspect_ratio), "output_format": (None, output_format), } if negative_prompt: files["negative_prompt"] = (None, negative_prompt) if seed is not None: files["seed"] = (None, str(seed)) # Image-to-image mode if input_image_data: files["mode"] = (None, "image-to-image") files["strength"] = (None, str(input_data.get("strength", 0.7))) files["image"] = ("input.png", input_image_data, "image/png") try: response = await client.post( "https://api.stability.ai/v2beta/stable-image/generate/sd3", headers={ "Authorization": f"Bearer {settings.stability_api_key}", "Accept": "image/*" }, files=files ) if response.status_code != 200: error_text = response.text logger.error(f"Stability AI error {response.status_code}: {error_text}") raise Exception(f"Stability AI error: {error_text}") model_short = model.replace("-", "").replace(".", "") filename = f"stability_{model_short}_{uuid4()}.{output_format}" return response.content, filename except httpx.HTTPStatusError as e: logger.error(f"Stability AI HTTP error: {e.response.status_code} - {e.response.text}") raise except Exception as e: logger.error(f"Stability AI generation error: {e}") raise async def _generate_leonardo(input_data: dict) -> tuple: """ Generate image using Leonardo AI Parameters: - prompt: Text description - model: Leonardo model ID (default: Phoenix) - width: Image width (512, 768, 1024, 1472) - height: Image height (512, 768, 832, 1024) - preset_style: Style preset (ANIME, CINEMATIC, PHOTOGRAPHY, etc.) - num_images: Number of images to generate - guidance_scale: How closely to follow prompt (7-15) - num_inference_steps: Quality/speed tradeoff (30-60) - negative_prompt: What to avoid - init_image_id: For image-to-image - init_strength: How much to change input image (0.1-0.9) """ # Default model is Leonardo Phoenix model_id = input_data.get("model", "6b645e3a-d64f-4341-a6d8-7a3690fbf042") # Determine dimensions from aspect ratio aspect_ratio = input_data.get("aspect_ratio", "1:1") dims = IMAGE_PROVIDERS["leonardo"]["dimensions"].get(aspect_ratio, {"width": 1024, "height": 1024}) # Allow explicit override if provided (and valid int) width = int(input_data.get("width", dims["width"])) height = int(input_data.get("height", dims["height"])) # Build request payload payload = { "prompt": input_data.get("prompt"), "modelId": model_id, "width": width, "height": height, "num_images": min(input_data.get("num_images", 1), 4), # Cap at 4 for safety "public": input_data.get("public", False) } # Alchemy / PhotoReal Logic # Phoenix (de7d3faf...) does NOT support Alchemy or PhotoReal (it has its own pipeline). # Sending 'alchemy': True with Phoenix causes "Invalid response from authorization hook" (500). is_phoenix = model_id == "de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3" alchemy = input_data.get("alchemy", False) photo_real = input_data.get("photo_real", False) if is_phoenix: # Force disable legacy features for Phoenix alchemy = False photo_real = False # Phoenix might support 'elements' or other new params, but definitely not legacy alchemy. if alchemy: payload["alchemy"] = True payload["contrastRatio"] = input_data.get("contrast_ratio", 0.5) if photo_real: payload["photoReal"] = True payload["photoRealStrength"] = input_data.get("photo_real_strength", 0.5) # If PhotoReal is on, we remove modelId to rely on system default for PhotoReal. if "modelId" in payload: del payload["modelId"] # Log payload for debugging logger.info(f"Leonardo Payload (Model: {model_id}): {payload}") if input_data.get("preset_style") and input_data.get("preset_style") != "NONE": payload["presetStyle"] = input_data.get("preset_style") if input_data.get("guidance_scale"): payload["guidance_scale"] = int(input_data.get("guidance_scale")) # Image-to-image / Reference # Modern Leonardo uses 'imagePrompts' array for reference. # 'init_image_id' is legacy but might still work for some models. init_image_id = input_data.get("init_image_id") if init_image_id: # Legacy support payload["init_image_id"] = init_image_id payload["init_strength"] = float(input_data.get("init_strength", 0.5)) async with httpx.AsyncClient(timeout=180) as client: # Create generation response = await client.post( "https://cloud.leonardo.ai/api/rest/v1/generations", headers={ "Authorization": f"Bearer {settings.leonardo_api_key}", "Content-Type": "application/json" }, json=payload ) if response.status_code != 200: error_text = response.text logger.error(f"Leonardo API error {response.status_code}: {error_text}") raise ValueError(f"Leonardo API returned {response.status_code}: {error_text}") data = response.json() logger.info(f"Leonardo response: {data}") # Poll for result generation_id = data.get("sdGenerationJob", {}).get("generationId") if generation_id: import asyncio for _ in range(90): # Wait up to 3 minutes await asyncio.sleep(2) status_response = await client.get( f"https://cloud.leonardo.ai/api/rest/v1/generations/{generation_id}", headers={"Authorization": f"Bearer {settings.leonardo_api_key}"} ) status_data = status_response.json() generation = status_data.get("generations_by_pk", {}) status = generation.get("status") if status == "COMPLETE": images = generation.get("generated_images", []) if images: image_url = images[0].get("url") if image_url: img_response = await client.get(image_url) model_name = IMAGE_PROVIDERS["leonardo"]["models"].get(model_id, "leonardo") filename = f"leonardo_{model_name.replace(' ', '_').lower()}_{uuid4()}.png" return img_response.content, filename elif status == "FAILED": raise Exception("Leonardo generation failed") return None, None async def _generate_bria(input_data: dict) -> tuple: """ Generate image using Bria AI Parameters: - prompt: Text description - model: 'base' (Bria 2.3 Base) or 'fast' (Bria 2.3 Fast) - aspect_ratio: Image aspect ratio - medium: 'photography' or 'art' - prompt_enhancement: Enable AI prompt enhancement - steps_num: Number of inference steps - guidance_scale: How closely to follow prompt - negative_prompt: What to avoid """ model = input_data.get("model", "base") base_url = "https://engine.prod.bria-api.com/v1/text-to-image" # Build request payload payload = { "prompt": input_data.get("prompt"), "num_results": 1 } # Add aspect ratio if input_data.get("aspect_ratio"): payload["aspect_ratio"] = input_data.get("aspect_ratio") # Add medium if input_data.get("medium"): payload["medium"] = input_data.get("medium") # Add prompt enhancement if input_data.get("prompt_enhancement"): payload["prompt_enhancement"] = True # Add negative prompt if input_data.get("negative_prompt"): payload["negative_prompt"] = input_data.get("negative_prompt") # Model-specific parameters if model == "base": url = f"{base_url}/base" if input_data.get("steps_num"): payload["steps_num"] = input_data.get("steps_num") if input_data.get("guidance_scale"): payload["text_guidance_scale"] = input_data.get("guidance_scale") else: url = f"{base_url}/fast" if input_data.get("steps_num"): payload["steps_num"] = min(input_data.get("steps_num"), 10) async with httpx.AsyncClient(timeout=120) as client: response = await client.post( url, headers={ "api_token": settings.bria_api_key, "Content-Type": "application/json" }, json=payload ) response.raise_for_status() data = response.json() # Get the result result = data.get("result", []) if result and len(result) > 0: image_url = result[0].get("urls", {}).get("url") if image_url: img_response = await client.get(image_url) filename = f"bria_{model}_{uuid4()}.png" return img_response.content, filename return None, None async def _generate_ideogram(input_data: dict) -> tuple: """Generate image using Ideogram""" async with httpx.AsyncClient(timeout=120) as client: response = await client.post( "https://api.ideogram.ai/generate", headers={ "Api-Key": settings.ideogram_api_key, "Content-Type": "application/json" }, json={ "image_request": { "prompt": input_data.get("prompt"), "model": "V_2", "aspect_ratio": "ASPECT_1_1" } } ) response.raise_for_status() data = response.json() if data.get("data") and len(data["data"]) > 0: image_url = data["data"][0].get("url") if image_url: img_response = await client.get(image_url) filename = f"ideogram_{uuid4()}.png" return img_response.content, filename return None, None async def _generate_flux(input_data: dict) -> tuple: """Generate image using Flux (Black Forest Labs) Note: Requires FLUX_API_KEY from https://api.bfl.ml/ May require paid account for flux-pro-1.1 model """ if not settings.flux_api_key: raise ValueError("FLUX_API_KEY not configured") async with httpx.AsyncClient(timeout=120) as client: try: response = await client.post( "https://api.bfl.ml/v1/flux-pro-1.1", headers={ "x-key": settings.flux_api_key, "Content-Type": "application/json" }, json={ "prompt": input_data.get("prompt"), "width": input_data.get("width", 1024), "height": input_data.get("height", 1024) } ) if response.status_code == 403: logger.error("Flux API 403: Invalid API key or insufficient permissions") raise ValueError("Flux API key is invalid or your account doesn't have access to flux-pro-1.1") response.raise_for_status() data = response.json() # Poll for result request_id = data.get("id") if request_id: import asyncio for _ in range(60): await asyncio.sleep(2) status_response = await client.get( f"https://api.bfl.ml/v1/get_result?id={request_id}", headers={"x-key": settings.flux_api_key} ) status_data = status_response.json() if status_data.get("status") == "Ready": image_url = status_data.get("result", {}).get("sample") if image_url: img_response = await client.get(image_url) filename = f"flux_{uuid4()}.png" return img_response.content, filename except Exception as e: logger.error(f"Flux generation error: {e}") raise return None, None async def _generate_gemini(input_data: dict) -> tuple: """Generate image using Google Gemini""" import google.generativeai as genai genai.configure(api_key=settings.google_api_key) model = genai.GenerativeModel("gemini-2.0-flash-exp") response = model.generate_content( input_data.get("prompt"), generation_config=genai.types.GenerationConfig( response_mime_type="image/png" ) ) if response.candidates and response.candidates[0].content.parts: for part in response.candidates[0].content.parts: if hasattr(part, 'inline_data') and part.inline_data: filename = f"gemini_{uuid4()}.png" return part.inline_data.data, filename return None, None async def _generate_imagen(input_data: dict) -> tuple: """ Generate image using Google Imagen 3 via REST API Note: Imagen 3 is accessed through the generativelanguage API with API key. Parameters: - prompt: Text description of the image - aspect_ratio: "1:1", "3:4", "4:3", "9:16", "16:9" - number_of_images: 1-4 - negative_prompt: What to avoid in the image """ if not settings.google_api_key: raise ValueError("GOOGLE_API_KEY not configured") prompt = input_data.get("prompt", "") negative_prompt = input_data.get("negative_prompt", "") aspect_ratio = input_data.get("aspect_ratio", "1:1") number_of_images = min(input_data.get("number_of_images", 1), 4) # Use the Generative Language API for Imagen 4 model_name = input_data.get("model", "imagen-4.0-generate-001") url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:predict" payload = { "instances": [{"prompt": prompt}], "parameters": { "sampleCount": number_of_images, "aspectRatio": aspect_ratio, } } if negative_prompt: payload["instances"][0]["negativePrompt"] = negative_prompt try: async with httpx.AsyncClient(timeout=120.0) as client: response = await client.post( url, headers={ "Content-Type": "application/json", "x-goog-api-key": settings.google_api_key }, json=payload ) if response.status_code == 200: data = response.json() predictions = data.get("predictions", []) if predictions and predictions[0].get("bytesBase64Encoded"): image_data = base64.b64decode(predictions[0]["bytesBase64Encoded"]) filename = f"imagen3_{uuid4()}.png" return image_data, filename else: logger.warning(f"Imagen API error: {response.status_code} - {response.text}") # Fall back to Nano Banana (Gemini native) logger.info("Falling back to Nano Banana (Gemini native image generation)") return await _generate_nano_banana(input_data) except Exception as e: logger.error(f"Imagen generation error: {e}") # Fallback to Gemini native image generation return await _generate_nano_banana(input_data) return None, None async def _generate_nano_banana(input_data: dict, image_data: Optional[bytes] = None) -> tuple: """ Generate image using Nano Banana (Gemini 2.5 Flash Image model) Model: gemini-2.5-flash-image (native image generation) """ if not settings.google_api_key: raise ValueError("GOOGLE_API_KEY not configured") prompt = input_data.get("prompt", "") if not prompt: raise ValueError("Prompt is required") # Use gemini-2.5-flash-image model for native image generation model_name = input_data.get("model", "gemini-2.5-flash-image") url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent" # Build payload with text and optional image # Build payload with image first (context) then text (instruction) parts = [] if image_data: import base64 b64_image = base64.b64encode(image_data).decode("utf-8") parts.append({ "inlineData": { "mimeType": "image/png", "data": b64_image } }) logger.info(f"Nano Banana: Added reference image ({len(image_data)} bytes) to payload") parts.append({"text": prompt}) payload = { "contents": [{ "parts": parts }] } try: async with httpx.AsyncClient(timeout=120.0) as client: response = await client.post( url, headers={ "Content-Type": "application/json", "x-goog-api-key": settings.google_api_key }, json=payload ) logger.info(f"Nano Banana response status: {response.status_code}") if response.status_code == 200: data = response.json() logger.info(f"Nano Banana response keys: {data.keys() if isinstance(data, dict) else 'not a dict'}") # Extract image from response candidates = data.get("candidates", []) if candidates and len(candidates) > 0: content = candidates[0].get("content", {}) parts = content.get("parts", []) for part in parts: if "inlineData" in part: inline_data = part["inlineData"] if "data" in inline_data: import base64 image_data = base64.b64decode(inline_data["data"]) filename = f"nano_banana_{uuid4()}.png" logger.info(f"✓ Nano Banana generated image: {len(image_data)} bytes") return image_data, filename logger.warning(f"Nano Banana: No image data in response. Response: {str(data)[:200]}") else: logger.error(f"Nano Banana API error: {response.status_code} - {response.text}") except Exception as e: logger.error(f"Nano Banana generation error: {e}") import traceback traceback.print_exc() return None, None async def _generate_runway_image(input_data: dict) -> tuple: """Generate image using Runway Gen-4 Image""" if not settings.runway_api_key: raise ValueError("RUNWAY_API_KEY not configured") prompt = input_data.get("prompt", "") ratio = input_data.get("ratio", "1360:768") seed = input_data.get("seed") payload = {"model": "gen4_image", "promptText": prompt, "ratio": ratio if ratio in ["1024:1024", "1360:768"] else "1360:768"} if seed and seed > 0: payload["seed"] = seed async with httpx.AsyncClient(timeout=180) as client: response = await client.post( "https://api.dev.runwayml.com/v1/text_to_image", headers={ "Authorization": f"Bearer {settings.runway_api_key}", "Content-Type": "application/json", "X-Runway-Version": "2024-11-06" }, json=payload ) response.raise_for_status() result = response.json() task_id = result.get("id") # Poll for completion import asyncio for _ in range(90): await asyncio.sleep(2) status_resp = await client.get( f"https://api.dev.runwayml.com/v1/tasks/{task_id}", headers={"Authorization": f"Bearer {settings.runway_api_key}", "X-Runway-Version": "2024-11-06"} ) status_data = status_resp.json() if status_data.get("status") == "SUCCEEDED": url = status_data.get("output", [None])[0] if url: img_resp = await client.get(url) return img_resp.content, f"runway_gen4_{uuid4()}.png" elif status_data.get("status") == "FAILED": raise ValueError(f"Runway failed: {status_data.get('error')}") return None, None