"""Image Generator Service - Multiple AI Providers Supported Providers: - openai: GPT-Image-1 (latest) or DALL-E 3 - imagen: Google Imagen 4 (Standard, Ultra, Fast) - nano-banana: Gemini 2.5 Flash Image / Nano Banana Pro - stable-diffusion: Stability AI SDXL, SD3, image-to-image - leonardo: Leonardo.ai models - ideogram: Ideogram v2 with text rendering - flux: Black Forest Labs Flux Pro OpenAI GPT-Image-1 (April 2025): - model: 'gpt-image-1' (default) or 'dall-e-3' - quality: 'low', 'medium', 'high' (default high) - size: 1024x1024, 1024x1536, 1536x1024 - background: 'transparent', 'opaque', 'auto' (for PNG/WebP) - output_format: 'png', 'jpeg', 'webp' - n: 1-10 images per request - Pricing: ~$0.02 (low), $0.07 (medium), $0.19 (high) per image Google Imagen 4 (December 2025): - model: 'imagen-4.0-generate-001' (default), 'imagen-4.0-ultra-generate-001', 'imagen-4.0-fast-generate-001' - image_size: '1K', '2K' (Ultra/Standard only) - aspect_ratio: '1:1', '3:4', '4:3', '9:16', '16:9' - number_of_images: 1-4 - enhance_prompt: true/false (LLM prompt enhancement) - person_generation: 'dont_allow', 'allow_adult', 'allow_all' - Pricing: $0.02 (Fast), $0.04 (Standard), $0.06 (Ultra) per image Nano Banana / Gemini Image (December 2025): - model: 'gemini-2.5-flash-image' (Nano Banana), 'gemini-3-pro-image-preview' (Nano Banana Pro) - aspect_ratio: '1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', '21:9' - image_size: '1K', '2K', '4K' (Pro only for 4K) - Features: Text rendering, image editing, multi-turn conversation - Pricing: ~$0.04 per 1MP image DALL-E 3 Options: - quality: 'standard' or 'hd' (default hd) - style: 'vivid' (hyper-real) or 'natural' (more realistic) - size: 1024x1024, 1024x1792, 1792x1024 Stability AI Options: - model: sd3.5-large, sd3.5-medium, sd3-large, sd3-medium, sdxl-1.0 - aspect_ratio: 1:1, 16:9, 9:16, 4:3, 3:4, 21:9, 9:21 - negative_prompt: What to avoid in generation - image_to_image: Use input image as starting point - strength: 0.0-1.0 for image-to-image (how much to change) - style_preset: enhance, anime, photographic, digital-art, etc. """ import httpx import os import base64 import logging from uuid import uuid4 from datetime import datetime from typing import Optional, Dict, Any, Tuple logger = logging.getLogger(__name__) from app.database import SessionLocal from app.models.job import Job from app.models.asset import Asset from app.config import settings # Provider configurations IMAGE_PROVIDERS = { "openai": { "name": "OpenAI Image Generation", "models": ["gpt-image-1", "dall-e-3", "dall-e-2"], "default_model": "gpt-image-1", "gpt-image-1": { "sizes": ["1024x1024", "1024x1536", "1536x1024"], "qualities": ["low", "medium", "high"], "output_formats": ["png", "jpeg", "webp"], "backgrounds": ["auto", "transparent", "opaque"], "max_images": 10 }, "dall-e-3": { "sizes": ["1024x1024", "1024x1792", "1792x1024"], "qualities": ["standard", "hd"], "styles": ["vivid", "natural"] }, "supports_styles": True }, "imagen": { "name": "Google Imagen 4", "models": ["imagen-4.0-generate-001", "imagen-4.0-ultra-generate-001", "imagen-4.0-fast-generate-001"], "default_model": "imagen-4.0-generate-001", "aspect_ratios": ["1:1", "3:4", "4:3", "9:16", "16:9"], "image_sizes": ["1K", "2K"], "max_images": 4, "supports_enhance_prompt": True, "supports_person_generation": True }, "nano-banana": { "name": "Nano Banana (Gemini Image)", "models": ["gemini-2.5-flash-image", "gemini-3-pro-image-preview"], "default_model": "gemini-2.5-flash-image", "aspect_ratios": ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"], "image_sizes": ["1K", "2K", "4K"], "supports_text_rendering": True, "supports_image_editing": True }, "stable-diffusion": { "name": "Stability AI", "models": ["sd3.5-large", "sd3.5-medium", "sd3-large", "sd3-medium", "sdxl-1.0"], "default_model": "sd3.5-large", "aspect_ratios": ["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"], "supports_img2img": True, "supports_negative_prompt": True }, "leonardo": { "name": "Leonardo.ai", "models": { # Latest Models (2025) "de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3": "Leonardo Phoenix 1.0", "7b592283-e8a7-4c5a-9ba6-d18c31f258b9": "Lucid Origin", "05ce0082-2d80-4a2d-8653-4d1c85e2418e": "Lucid Realism", "28aeddf8-bd19-4803-80fc-79602d1a9989": "FLUX.1 Kontext", "b2614463-296c-462a-9586-aafdb8f00e36": "Flux Dev", "1dd50843-d653-4516-a8e3-f0238ee453ff": "Flux Schnell", # Phoenix/XL Models "6b645e3a-d64f-4341-a6d8-7a3690fbf042": "Leonardo Phoenix 0.9", "e71a1c2f-4f80-4800-934f-2c68979d8cc8": "Leonardo Anime XL", "b24e16ff-06e3-43eb-8d33-4416c2d75876": "Leonardo Lightning XL", "aa77f04e-3eec-4034-9c07-d0f619684628": "Leonardo Kino XL", "5c232a9e-9061-4777-980a-ddc8e65647c6": "Leonardo Vision XL", "1e60896f-3c26-4296-8ecc-53e2afecc132": "Leonardo Diffusion XL", # SDXL Models "16e7060a-803e-4df3-97ee-edcfa5dc9cc8": "SDXL 1.0", "2067ae52-33fd-4a82-bb92-c2c55e7d2786": "AlbedoBase XL", "b63f7119-31dc-4540-969b-2a9df997e173": "SDXL 0.9", # Style Models "f1929ea3-b169-4c18-a16c-5d58b4292c69": "RPG v5", "d69c8273-6b17-4a30-a13e-d6637ae1c644": "3D Animation Style", "ac614f96-1082-45bf-be9d-757f2d31c174": "DreamShaper v7", "e316348f-7773-490e-adcd-46757c738eb7": "Absolute Reality v1.6" }, "default_model": "de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3", "widths": [512, 768, 1024, 1472], "heights": [512, 768, 832, 1024], "style_presets": [ "ANIME", "BOKEH", "CINEMATIC", "CINEMATIC_CLOSEUP", "CREATIVE", "DYNAMIC", "ENVIRONMENT", "FASHION", "FILM", "FOOD", "GENERAL", "HDR", "ILLUSTRATION", "LEONARDO", "LONG_EXPOSURE", "MACRO", "MINIMALISTIC", "MONOCHROME", "MOODY", "NONE", "NEUTRAL", "PHOTOGRAPHY", "PORTRAIT", "RAYTRACED", "RENDER_3D", "RETRO", "SKETCH_BW", "SKETCH_COLOR", "STOCK_PHOTO", "VIBRANT", "UNPROCESSED" ], "supports_img2img": True, "supports_character_reference": True, "supports_style_reference": True }, "bria": { "name": "Bria AI", "models": ["base", "fast"], "default_model": "base", "aspect_ratios": ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9"], "mediums": ["photography", "art"], "supports_prompt_enhancement": True, "base_config": {"steps_num": [20, 50], "guidance_scale": [1, 10]}, "fast_config": {"steps_num": [4, 10]} }, "ideogram": { "name": "Ideogram", "models": ["V_2", "V_2_TURBO"], "supports_text_rendering": True }, "flux": { "name": "Flux Pro", "models": ["flux-pro-1.1", "flux-dev", "flux-schnell"], "supports_img2img": True } } STABILITY_STYLE_PRESETS = [ "enhance", "anime", "photographic", "digital-art", "comic-book", "fantasy-art", "line-art", "analog-film", "neon-punk", "isometric", "low-poly", "origami", "modeling-compound", "cinematic", "3d-model", "pixel-art" ] async def generate(job_id: str): """Generate image based on provider""" db = SessionLocal() try: job = db.query(Job).filter(Job.id == job_id).first() if not job: return input_data = job.input_data provider = input_data.get("provider", "openai") prompt = input_data.get("prompt", "") # Update progress job.progress = 10 job.api_provider = provider db.commit() image_data = None filename = None if provider == "openai" or provider == "dalle3": image_data, filename = await _generate_openai(input_data) job.api_model = input_data.get("model", "gpt-image-1") elif provider == "imagen": image_data, filename = await _generate_imagen(input_data) job.api_model = input_data.get("model", "imagen-4.0-generate-001") elif provider == "nano-banana" or provider == "gemini": image_data, filename = await _generate_nano_banana(input_data) job.api_model = input_data.get("model", "gemini-2.5-flash-image") elif provider == "stable-diffusion": image_data, filename = await _generate_stability(input_data) job.api_model = input_data.get("model", "sd3.5-large") elif provider == "leonardo": image_data, filename = await _generate_leonardo(input_data) job.api_model = "leonardo" elif provider == "ideogram": image_data, filename = await _generate_ideogram(input_data) job.api_model = "ideogram-v2" elif provider == "flux": image_data, filename = await _generate_flux(input_data) job.api_model = "flux-pro" elif provider == "bria": image_data, filename = await _generate_bria(input_data) job.api_model = input_data.get("model", "base") else: raise ValueError(f"Unknown provider: {provider}") job.progress = 80 db.commit() # Save image if image_data: storage_path = os.path.join(settings.storage_path, "images") os.makedirs(storage_path, exist_ok=True) file_path = os.path.join(storage_path, filename) with open(file_path, "wb") as f: f.write(image_data) # Create asset asset = Asset( user_id=job.user_id, project_id=job.project_id, original_filename=filename, stored_filename=filename, file_path=file_path, file_type="image", mime_type="image/png", file_size_bytes=len(image_data), source_module="image_generator", source_job_id=job.id, metadata={ "prompt": prompt, "provider": provider, "model": job.api_model } ) db.add(asset) db.commit() db.refresh(asset) job.output_asset_ids = [asset.id] job.output_data = {"asset_id": str(asset.id), "file_path": file_path} job.progress = 100 job.status = "completed" job.completed_at = datetime.utcnow() db.commit() except Exception as e: job.status = "failed" job.error_message = str(e) db.commit() finally: db.close() async def _generate_openai(input_data: dict) -> Tuple[Optional[bytes], Optional[str]]: """Generate image using OpenAI GPT-Image-1 or DALL-E 3 GPT-Image-1 Parameters (default): - prompt: Text description (max 32000 chars) - quality: 'low', 'medium', 'high' (default: high) - size: '1024x1024', '1024x1536', '1536x1024' - background: 'transparent', 'opaque', 'auto' - output_format: 'png', 'jpeg', 'webp' (default: png) - output_compression: 0-100 for jpeg/webp - moderation: 'auto' or 'low' (less restrictive) - n: 1-10 images DALL-E 3 Parameters: - prompt: Text description (max 4000 chars) - quality: 'standard' or 'hd' (default: hd) - style: 'vivid' or 'natural' (default: vivid) - size: '1024x1024', '1024x1792', '1792x1024' """ prompt = input_data.get("prompt", "") model = input_data.get("model", "gpt-image-1") width = input_data.get("width", 1024) height = input_data.get("height", 1024) # Determine size based on width/height if width > height: size = "1536x1024" if model == "gpt-image-1" else "1792x1024" elif height > width: size = "1024x1536" if model == "gpt-image-1" else "1024x1792" else: size = "1024x1024" async with httpx.AsyncClient(timeout=180) as client: if model == "gpt-image-1": # GPT-Image-1 (latest model) quality = input_data.get("quality", "high") background = input_data.get("background", "auto") output_format = input_data.get("output_format", "png") output_compression = input_data.get("output_compression", 100) moderation = input_data.get("moderation", "auto") n = min(input_data.get("n", 1), 10) payload = { "model": "gpt-image-1", "prompt": prompt, "size": size, "quality": quality, "n": n } # Add optional parameters if background != "auto": payload["background"] = background if output_format != "png": payload["output_format"] = output_format if output_format in ["jpeg", "webp"] and output_compression != 100: payload["output_compression"] = output_compression if moderation != "auto": payload["moderation"] = moderation response = await client.post( "https://api.openai.com/v1/images/generations", headers={ "Authorization": f"Bearer {settings.openai_api_key}", "Content-Type": "application/json" }, json=payload ) response.raise_for_status() data = response.json() if data.get("data") and len(data["data"]) > 0: # GPT-Image-1 always returns base64 b64_image = data["data"][0].get("b64_json") if b64_image: ext = output_format if output_format in ["png", "jpeg", "webp"] else "png" filename = f"gptimage1_{quality}_{uuid4()}.{ext}" return base64.b64decode(b64_image), filename else: # DALL-E 3 (or DALL-E 2) quality = input_data.get("quality", "hd") style = input_data.get("style", "vivid") payload = { "model": model, "prompt": prompt, "size": size, "n": 1, "response_format": "b64_json" } # DALL-E 3 specific options if model == "dall-e-3": payload["quality"] = quality payload["style"] = style response = await client.post( "https://api.openai.com/v1/images/generations", headers={ "Authorization": f"Bearer {settings.openai_api_key}", "Content-Type": "application/json" }, json=payload ) response.raise_for_status() data = response.json() if data.get("data") and len(data["data"]) > 0: b64_image = data["data"][0].get("b64_json") if b64_image: filename = f"{model.replace('-', '')}_{style if model == 'dall-e-3' else 'gen'}_{uuid4()}.png" return base64.b64decode(b64_image), filename return None, None async def _generate_stability(input_data: dict, input_image_data: Optional[bytes] = None) -> Tuple[Optional[bytes], Optional[str]]: """Generate image using Stability AI Parameters: - prompt: Text description (required) - negative_prompt: What to avoid in generation - model: 'sd3.5-large', 'sd3.5-medium', 'sd3-large', 'sd3-medium' - aspect_ratio: '1:1', '16:9', '9:16', '4:3', '3:4', '21:9', '9:21' - seed: Optional seed for reproducibility (0-4294967294) - mode: 'text-to-image' or 'image-to-image' """ if not settings.stability_api_key: raise ValueError("Stability API key not configured") prompt = input_data.get("prompt", "") if not prompt: raise ValueError("Prompt is required") negative_prompt = input_data.get("negative_prompt", "") model = input_data.get("model", "sd3.5-large") aspect_ratio = input_data.get("aspect_ratio", "1:1") seed = input_data.get("seed") output_format = input_data.get("output_format", "png") async with httpx.AsyncClient(timeout=180) as client: # Build form data - Stability uses multipart/form-data form_data = { "prompt": prompt, "mode": "text-to-image", "model": model, "aspect_ratio": aspect_ratio, "output_format": output_format, } if negative_prompt: form_data["negative_prompt"] = negative_prompt if seed is not None: form_data["seed"] = seed # Image-to-image mode files = None if input_image_data: form_data["mode"] = "image-to-image" form_data["strength"] = input_data.get("strength", 0.7) files = {"image": ("input.png", input_image_data, "image/png")} try: response = await client.post( "https://api.stability.ai/v2beta/stable-image/generate/sd3", headers={ "Authorization": f"Bearer {settings.stability_api_key}", "Accept": "image/*" }, data=form_data, files=files ) if response.status_code != 200: error_text = response.text logger.error(f"Stability AI error {response.status_code}: {error_text}") raise Exception(f"Stability AI error: {error_text}") model_short = model.replace("-", "").replace(".", "") filename = f"stability_{model_short}_{uuid4()}.{output_format}" return response.content, filename except httpx.HTTPStatusError as e: logger.error(f"Stability AI HTTP error: {e.response.status_code} - {e.response.text}") raise except Exception as e: logger.error(f"Stability AI generation error: {e}") raise async def _generate_leonardo(input_data: dict) -> tuple: """ Generate image using Leonardo AI Parameters: - prompt: Text description - model: Leonardo model ID (default: Phoenix) - width: Image width (512, 768, 1024, 1472) - height: Image height (512, 768, 832, 1024) - preset_style: Style preset (ANIME, CINEMATIC, PHOTOGRAPHY, etc.) - num_images: Number of images to generate - guidance_scale: How closely to follow prompt (7-15) - num_inference_steps: Quality/speed tradeoff (30-60) - negative_prompt: What to avoid - init_image_id: For image-to-image - init_strength: How much to change input image (0.1-0.9) """ # Default model is Leonardo Phoenix model_id = input_data.get("model", "6b645e3a-d64f-4341-a6d8-7a3690fbf042") # Build request payload payload = { "prompt": input_data.get("prompt"), "modelId": model_id, "width": input_data.get("width", 1024), "height": input_data.get("height", 1024), "num_images": input_data.get("num_images", 1), } # Add optional parameters if input_data.get("preset_style"): payload["presetStyle"] = input_data.get("preset_style") if input_data.get("guidance_scale"): payload["guidance_scale"] = input_data.get("guidance_scale") if input_data.get("num_inference_steps"): payload["num_inference_steps"] = input_data.get("num_inference_steps") if input_data.get("negative_prompt"): payload["negative_prompt"] = input_data.get("negative_prompt") # Image-to-image support if input_data.get("init_image_id"): payload["init_image_id"] = input_data.get("init_image_id") payload["init_strength"] = input_data.get("init_strength", 0.5) async with httpx.AsyncClient(timeout=180) as client: # Create generation response = await client.post( "https://cloud.leonardo.ai/api/rest/v1/generations", headers={ "Authorization": f"Bearer {settings.leonardo_api_key}", "Content-Type": "application/json" }, json=payload ) response.raise_for_status() data = response.json() # Poll for result generation_id = data.get("sdGenerationJob", {}).get("generationId") if generation_id: import asyncio for _ in range(90): # Wait up to 3 minutes await asyncio.sleep(2) status_response = await client.get( f"https://cloud.leonardo.ai/api/rest/v1/generations/{generation_id}", headers={"Authorization": f"Bearer {settings.leonardo_api_key}"} ) status_data = status_response.json() generation = status_data.get("generations_by_pk", {}) status = generation.get("status") if status == "COMPLETE": images = generation.get("generated_images", []) if images: image_url = images[0].get("url") if image_url: img_response = await client.get(image_url) model_name = IMAGE_PROVIDERS["leonardo"]["models"].get(model_id, "leonardo") filename = f"leonardo_{model_name.replace(' ', '_').lower()}_{uuid4()}.png" return img_response.content, filename elif status == "FAILED": raise Exception("Leonardo generation failed") return None, None async def _generate_bria(input_data: dict) -> tuple: """ Generate image using Bria AI Parameters: - prompt: Text description - model: 'base' (Bria 2.3 Base) or 'fast' (Bria 2.3 Fast) - aspect_ratio: Image aspect ratio - medium: 'photography' or 'art' - prompt_enhancement: Enable AI prompt enhancement - steps_num: Number of inference steps - guidance_scale: How closely to follow prompt - negative_prompt: What to avoid """ model = input_data.get("model", "base") base_url = "https://engine.prod.bria-api.com/v1/text-to-image" # Build request payload payload = { "prompt": input_data.get("prompt"), "num_results": 1 } # Add aspect ratio if input_data.get("aspect_ratio"): payload["aspect_ratio"] = input_data.get("aspect_ratio") # Add medium if input_data.get("medium"): payload["medium"] = input_data.get("medium") # Add prompt enhancement if input_data.get("prompt_enhancement"): payload["prompt_enhancement"] = True # Add negative prompt if input_data.get("negative_prompt"): payload["negative_prompt"] = input_data.get("negative_prompt") # Model-specific parameters if model == "base": url = f"{base_url}/base" if input_data.get("steps_num"): payload["steps_num"] = input_data.get("steps_num") if input_data.get("guidance_scale"): payload["text_guidance_scale"] = input_data.get("guidance_scale") else: url = f"{base_url}/fast" if input_data.get("steps_num"): payload["steps_num"] = min(input_data.get("steps_num"), 10) async with httpx.AsyncClient(timeout=120) as client: response = await client.post( url, headers={ "api_token": settings.bria_api_key, "Content-Type": "application/json" }, json=payload ) response.raise_for_status() data = response.json() # Get the result result = data.get("result", []) if result and len(result) > 0: image_url = result[0].get("urls", {}).get("url") if image_url: img_response = await client.get(image_url) filename = f"bria_{model}_{uuid4()}.png" return img_response.content, filename return None, None async def _generate_ideogram(input_data: dict) -> tuple: """Generate image using Ideogram""" async with httpx.AsyncClient(timeout=120) as client: response = await client.post( "https://api.ideogram.ai/generate", headers={ "Api-Key": settings.ideogram_api_key, "Content-Type": "application/json" }, json={ "image_request": { "prompt": input_data.get("prompt"), "model": "V_2", "aspect_ratio": "ASPECT_1_1" } } ) response.raise_for_status() data = response.json() if data.get("data") and len(data["data"]) > 0: image_url = data["data"][0].get("url") if image_url: img_response = await client.get(image_url) filename = f"ideogram_{uuid4()}.png" return img_response.content, filename return None, None async def _generate_flux(input_data: dict) -> tuple: """Generate image using Flux (Black Forest Labs) Note: Requires FLUX_API_KEY from https://api.bfl.ml/ May require paid account for flux-pro-1.1 model """ if not settings.flux_api_key: raise ValueError("FLUX_API_KEY not configured") async with httpx.AsyncClient(timeout=120) as client: try: response = await client.post( "https://api.bfl.ml/v1/flux-pro-1.1", headers={ "x-key": settings.flux_api_key, "Content-Type": "application/json" }, json={ "prompt": input_data.get("prompt"), "width": input_data.get("width", 1024), "height": input_data.get("height", 1024) } ) if response.status_code == 403: logger.error("Flux API 403: Invalid API key or insufficient permissions") raise ValueError("Flux API key is invalid or your account doesn't have access to flux-pro-1.1") response.raise_for_status() data = response.json() # Poll for result request_id = data.get("id") if request_id: import asyncio for _ in range(60): await asyncio.sleep(2) status_response = await client.get( f"https://api.bfl.ml/v1/get_result?id={request_id}", headers={"x-key": settings.flux_api_key} ) status_data = status_response.json() if status_data.get("status") == "Ready": image_url = status_data.get("result", {}).get("sample") if image_url: img_response = await client.get(image_url) filename = f"flux_{uuid4()}.png" return img_response.content, filename except Exception as e: logger.error(f"Flux generation error: {e}") raise return None, None async def _generate_gemini(input_data: dict) -> tuple: """Generate image using Google Gemini""" import google.generativeai as genai genai.configure(api_key=settings.google_api_key) model = genai.GenerativeModel("gemini-2.0-flash-exp") response = model.generate_content( input_data.get("prompt"), generation_config=genai.types.GenerationConfig( response_mime_type="image/png" ) ) if response.candidates and response.candidates[0].content.parts: for part in response.candidates[0].content.parts: if hasattr(part, 'inline_data') and part.inline_data: filename = f"gemini_{uuid4()}.png" return part.inline_data.data, filename return None, None async def _generate_imagen(input_data: dict) -> tuple: """ Generate image using Google Imagen 3 via REST API Note: Imagen 3 is accessed through the generativelanguage API with API key. Parameters: - prompt: Text description of the image - aspect_ratio: "1:1", "3:4", "4:3", "9:16", "16:9" - number_of_images: 1-4 - negative_prompt: What to avoid in the image """ if not settings.google_api_key: raise ValueError("GOOGLE_API_KEY not configured") prompt = input_data.get("prompt", "") negative_prompt = input_data.get("negative_prompt", "") aspect_ratio = input_data.get("aspect_ratio", "1:1") number_of_images = min(input_data.get("number_of_images", 1), 4) # Use the Generative Language API for Imagen url = f"https://generativelanguage.googleapis.com/v1beta/models/imagen-3.0-generate-001:predict?key={settings.google_api_key}" payload = { "instances": [{"prompt": prompt}], "parameters": { "sampleCount": number_of_images, "aspectRatio": aspect_ratio, } } if negative_prompt: payload["instances"][0]["negativePrompt"] = negative_prompt try: async with httpx.AsyncClient(timeout=120.0) as client: response = await client.post( url, headers={"Content-Type": "application/json"}, json=payload ) if response.status_code == 200: data = response.json() predictions = data.get("predictions", []) if predictions and predictions[0].get("bytesBase64Encoded"): image_data = base64.b64decode(predictions[0]["bytesBase64Encoded"]) filename = f"imagen3_{uuid4()}.png" return image_data, filename else: logger.warning(f"Imagen API error: {response.status_code} - {response.text}") # Fall back to Nano Banana (Gemini native) logger.info("Falling back to Nano Banana (Gemini native image generation)") return await _generate_nano_banana(input_data) except Exception as e: logger.error(f"Imagen generation error: {e}") # Fallback to Gemini native image generation return await _generate_nano_banana(input_data) return None, None async def _generate_nano_banana(input_data: dict) -> tuple: """ Generate image using Nano Banana (Gemini native image generation) Models: - gemini-2.5-flash-image: Fast image generation with Gemini - gemini-3-pro-image-preview: Higher quality image generation Features: - Native text rendering (can include text in images) - Up to 4K resolution - Wide range of aspect ratios - Conversational image editing Parameters: - prompt: Text description of the image - model: Gemini model to use - aspect_ratio: Various ratios from 1:1 to 21:9 - image_size: "1K", "2K", "4K" - number_of_images: Number of images to generate - reference_image: Optional base64 image for editing """ import google.generativeai as genai genai.configure(api_key=settings.google_api_key) model_name = input_data.get("model", "gemini-2.5-flash-image") # Map model names to actual Gemini model IDs model_mapping = { "gemini-2.5-flash-image": "gemini-2.0-flash-exp-image-generation", "gemini-3-pro-image-preview": "gemini-2.0-flash-exp-image-generation", # Use available model } actual_model = model_mapping.get(model_name, "gemini-2.0-flash-exp-image-generation") model = genai.GenerativeModel(actual_model) # Handle aspect ratio if provided aspect_ratio = input_data.get("aspect_ratio", "1:1") # Build the prompt - can include aspect ratio hints prompt = input_data.get("prompt", "") if aspect_ratio != "1:1": prompt = f"{prompt} [aspect ratio: {aspect_ratio}]" # If reference image provided, include it in the request contents = [prompt] if input_data.get("reference_image"): import base64 # Add reference image for editing ref_data = input_data.get("reference_image") if isinstance(ref_data, str) and ref_data.startswith("data:"): # Extract base64 data from data URL ref_data = ref_data.split(",")[1] contents = [ { "parts": [ {"text": prompt}, { "inline_data": { "mime_type": "image/png", "data": ref_data } } ] } ] try: # Generate content - Gemini automatically returns image data response = model.generate_content(contents) if response.candidates and response.candidates[0].content.parts: for part in response.candidates[0].content.parts: if hasattr(part, 'inline_data') and part.inline_data: filename = f"nano_banana_{uuid4()}.png" return part.inline_data.data, filename except Exception as e: logger.error(f"Nano Banana generation error: {e}") raise return None, None