forge/backend/app/services/prompt_studio.py
DJP 7a804e896d Initial commit - FORGE AI unified platform
Features:
- Image generation (OpenAI, Gemini, Leonardo, Bria, Stability, Flux)
- Nano Banana iterative editing
- Video generation and upscaling
- Audio TTS, STT, sound effects (ElevenLabs)
- Text prompt studio and alt text
- User authentication with JWT/cookies
- Admin panel with voice management
- Job queue with Celery
- PostgreSQL + Redis backend
- Next.js 15 + FastAPI architecture

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2025-12-09 20:39:00 -05:00

514 lines
21 KiB
Python

"""Prompt Studio Service - AI-Powered Prompt Enhancement
Uses Google Gemini or OpenAI GPT-4 to transform basic prompts into
professional, detailed prompts optimized for AI image/video generation.
Features:
- Multiple style presets (cinematic, photographic, artistic, etc.)
- Provider support for various image generators (DALL-E, Stable Diffusion, Midjourney, Flux)
- Negative prompt generation
- Technical parameter suggestions
- Multi-language support
Styles Available:
- cinematic: Movie-like scenes with dramatic lighting
- photographic: Professional photography with camera settings
- artistic: Painterly descriptions with artistic techniques
- product: Commercial product photography
- fantasy: Magical, otherworldly scenes
- minimal: Clean, simple compositions
- vintage: Retro, nostalgic aesthetics
- futuristic: Sci-fi, high-tech visuals
- anime: Japanese animation style
- portrait: Professional portrait photography
- landscape: Nature and scenic photography
- abstract: Non-representational art
- fashion: High-end fashion photography
- architecture: Building and interior design
- food: Culinary and food photography
"""
import httpx
from typing import Optional, Dict, Any, List
from app.config import settings
# Style configurations with detailed instructions
STYLE_CONFIGS = {
"cinematic": {
"name": "Cinematic",
"instruction": """Transform this into a cinematic, movie-like scene description with:
- Dramatic lighting (golden hour, chiaroscuro, rim lighting, volumetric rays)
- Film-quality composition (rule of thirds, leading lines, depth of field)
- Atmospheric elements (fog, dust particles, lens flares)
- Color grading suggestions (teal and orange, desaturated, high contrast)
- Camera movement or angle (dolly shot, crane shot, dutch angle)
- Aspect ratio: 21:9 or 2.39:1 for widescreen cinematic feel""",
"negative_base": "amateur, low budget, poorly lit, flat lighting, snapshot quality",
"technical": {"aspect_ratio": "21:9", "style": "cinematic"}
},
"photographic": {
"name": "Professional Photography",
"instruction": """Transform this into a professional photography prompt with:
- Specific camera and lens (e.g., Canon EOS R5, Sony A7IV, 85mm f/1.4)
- Exact lighting setup (softbox, ring light, natural window light, golden hour)
- Technical settings (ISO, aperture, shutter speed)
- Composition technique (rule of thirds, symmetry, leading lines)
- Post-processing style (high contrast, film emulation, clean edit)""",
"negative_base": "blurry, out of focus, overexposed, underexposed, amateur",
"technical": {"quality": "high", "style": "photorealistic"}
},
"artistic": {
"name": "Fine Art",
"instruction": """Transform this into an artistic, painterly description with:
- Art movement reference (Impressionism, Surrealism, Art Nouveau, Baroque)
- Specific artist style influence (Monet, Van Gogh, Klimt, Dali)
- Medium specification (oil on canvas, watercolor, digital painting)
- Brushwork and texture details (impasto, glazing, wet-on-wet)
- Color palette (complementary, analogous, monochromatic)
- Emotional mood and atmosphere""",
"negative_base": "photorealistic, photograph, digital render, 3D, CGI",
"technical": {"style": "artistic"}
},
"product": {
"name": "Product Photography",
"instruction": """Transform this into professional product photography with:
- Clean, commercial backdrop (white seamless, gradient, lifestyle setting)
- Studio lighting setup (three-point lighting, beauty dish, softbox)
- Hero shot composition (angle, distance, focal point)
- Reflection and shadow control
- Brand-appropriate styling
- E-commerce or advertising context""",
"negative_base": "cluttered background, amateur lighting, dirty, damaged",
"technical": {"background": "transparent", "quality": "high"}
},
"fantasy": {
"name": "Fantasy Art",
"instruction": """Transform this into a fantastical, imaginative scene with:
- Magical elements (glowing particles, ethereal light, mystical symbols)
- Otherworldly setting details (floating islands, crystal formations, ancient ruins)
- Fantasy creature or character design elements
- Epic scale and grandeur
- Rich color palette (jewel tones, iridescent, bioluminescent)
- Atmospheric effects (mist, aurora, magical energy)""",
"negative_base": "mundane, realistic, boring, plain, everyday",
"technical": {"style": "fantasy-art"}
},
"minimal": {
"name": "Minimalist",
"instruction": """Transform this into a minimalist, clean description with:
- Negative space utilization (vast empty areas, breathing room)
- Limited color palette (monochrome, two-tone, muted)
- Simple geometric forms
- Clean lines and shapes
- Subtle textures
- Zen-like calm and balance""",
"negative_base": "cluttered, busy, complex, detailed, ornate, decorated",
"technical": {"style": "minimal"}
},
"vintage": {
"name": "Vintage/Retro",
"instruction": """Transform this into a vintage, retro-styled description with:
- Era-specific details (1920s Art Deco, 1950s Americana, 1970s psychedelic, 1980s neon)
- Film stock characteristics (Kodachrome, Polaroid, black and white)
- Grain and texture (film grain, light leaks, vignette)
- Period-appropriate color palette (sepia, faded, cross-processed)
- Nostalgic elements and props
- Authentic vintage aesthetic""",
"negative_base": "modern, digital, contemporary, clean, sharp",
"technical": {"style": "analog-film"}
},
"futuristic": {
"name": "Sci-Fi/Futuristic",
"instruction": """Transform this into a futuristic, sci-fi description with:
- Advanced technology elements (holograms, neon lights, cybernetic)
- Futuristic architecture (sleek, geometric, towering)
- Sci-fi lighting (neon, bioluminescent, holographic)
- Cyberpunk or utopian aesthetic
- High-tech materials (chrome, glass, LED)
- Atmospheric sci-fi elements (rain, smog, data streams)""",
"negative_base": "primitive, ancient, rustic, natural, organic",
"technical": {"style": "neon-punk"}
},
"anime": {
"name": "Anime/Manga",
"instruction": """Transform this into anime/manga style with:
- Character design elements (large expressive eyes, dynamic poses)
- Japanese animation aesthetic (cel shading, speed lines)
- Studio style reference (Studio Ghibli, Makoto Shinkai, MAPPA)
- Dramatic lighting and composition
- Vibrant color palette
- Emotional expression and atmosphere""",
"negative_base": "realistic, photograph, western cartoon, 3D render",
"technical": {"style": "anime"}
},
"portrait": {
"name": "Portrait Photography",
"instruction": """Transform this into professional portrait photography with:
- Flattering lighting setup (Rembrandt, butterfly, split lighting)
- Lens choice for portraits (85mm, 105mm, shallow depth of field)
- Background treatment (bokeh, studio backdrop, environmental)
- Skin tone and texture (natural, retouched, editorial)
- Expression and emotion capture
- Composition (headshot, half-body, full-body)""",
"negative_base": "unflattering angle, harsh shadows, distorted features",
"technical": {"style": "photographic"}
},
"landscape": {
"name": "Landscape Photography",
"instruction": """Transform this into epic landscape photography with:
- Golden hour or blue hour lighting
- Weather and atmospheric conditions (dramatic clouds, fog, storm)
- Geographic specificity (mountains, ocean, forest, desert)
- Foreground interest and depth
- Wide-angle perspective
- Long exposure effects (smooth water, star trails)""",
"negative_base": "flat, boring, midday harsh light, no depth",
"technical": {"aspect_ratio": "16:9", "style": "photorealistic"}
},
"abstract": {
"name": "Abstract Art",
"instruction": """Transform this into abstract art with:
- Non-representational forms and shapes
- Color theory application (complementary, triadic, split-complementary)
- Texture and pattern exploration
- Movement and flow
- Emotional expression through color and form
- Artistic technique (drip, splatter, geometric)""",
"negative_base": "representational, realistic, figurative, recognizable objects",
"technical": {"style": "digital-art"}
},
"fashion": {
"name": "Fashion Photography",
"instruction": """Transform this into high-end fashion photography with:
- Editorial or commercial context
- Designer styling and wardrobe
- High-fashion lighting (dramatic, clean, artistic)
- Model pose and expression
- Location or studio setting
- Magazine-worthy composition""",
"negative_base": "casual, everyday, amateur, unflattering",
"technical": {"style": "photographic", "quality": "high"}
},
"architecture": {
"name": "Architectural Photography",
"instruction": """Transform this into architectural photography with:
- Building style and era (modern, classical, brutalist, Art Deco)
- Perspective and angles (worm's eye, bird's eye, straight-on)
- Interior or exterior focus
- Lighting conditions (golden hour, twilight, dramatic shadows)
- Detail and texture emphasis
- Scale and grandeur""",
"negative_base": "distorted, amateur angle, poor lighting, obstructed view",
"technical": {"style": "photographic"}
},
"food": {
"name": "Food Photography",
"instruction": """Transform this into appetizing food photography with:
- Styling and plating details
- Lighting setup (backlit, side-lit, soft diffused)
- Props and context (table setting, ingredients, utensils)
- Texture and freshness emphasis
- Color harmony and contrast
- Angle (overhead, 45-degree, eye-level)""",
"negative_base": "unappetizing, messy, cold, stale, poor presentation",
"technical": {"style": "photographic", "quality": "high"}
}
}
# Provider-specific optimizations
PROVIDER_OPTIMIZATIONS = {
"openai": {
"max_length": 4000,
"style_suffix": "highly detailed, professional quality",
"avoid": "text, watermarks, logos"
},
"gpt-image-1": {
"max_length": 32000,
"style_suffix": "highly detailed, professional quality, masterpiece",
"avoid": "text, watermarks, logos, blurry"
},
"stable-diffusion": {
"max_length": 500,
"style_suffix": "(masterpiece, best quality, highly detailed)",
"avoid": "(worst quality, low quality, blurry, distorted)"
},
"midjourney": {
"max_length": 600,
"style_suffix": "--v 6 --q 2 --s 750",
"avoid": "--no text, watermarks, blurry"
},
"flux": {
"max_length": 2000,
"style_suffix": "ultra high quality, professional, detailed",
"avoid": "low quality, amateur, blurry"
},
"leonardo": {
"max_length": 1000,
"style_suffix": "highly detailed, professional, stunning",
"avoid": "low quality, blurry, distorted"
}
}
async def enhance(
prompt: str,
style: str = "cinematic",
provider: str = "openai",
include_negative: bool = True,
include_technical: bool = True,
language: str = "en"
) -> dict:
"""Enhance a prompt using AI
Args:
prompt: The original prompt to enhance
style: Style preset to apply (see STYLE_CONFIGS)
provider: Target image generation provider for optimization
include_negative: Whether to generate negative prompts
include_technical: Whether to include technical parameters
language: Output language code
Returns:
Dictionary with enhanced prompt, negative prompt, and metadata
"""
# Get style configuration
style_config = STYLE_CONFIGS.get(style, STYLE_CONFIGS["cinematic"])
provider_config = PROVIDER_OPTIMIZATIONS.get(provider, PROVIDER_OPTIMIZATIONS["openai"])
# Try Google Gemini first, then OpenAI, then fallback
enhanced_result = None
if settings.google_api_key:
enhanced_result = await _enhance_with_gemini(prompt, style_config, provider_config, language)
elif settings.openai_api_key:
enhanced_result = await _enhance_with_openai(prompt, style_config, provider_config, language)
if not enhanced_result:
# Fallback to rule-based enhancement
enhanced_result = _enhance_fallback(prompt, style_config, provider_config)
# Build response
response = {
"original_prompt": prompt,
"enhanced_prompt": enhanced_result.get("enhanced_prompt", prompt),
"style": style,
"style_name": style_config["name"],
"provider": provider
}
if include_negative:
response["negative_prompt"] = enhanced_result.get(
"negative_prompt",
style_config.get("negative_base", "blurry, low quality, distorted")
)
if include_technical:
response["technical_params"] = {
**style_config.get("technical", {}),
"max_prompt_length": provider_config["max_length"]
}
if enhanced_result.get("suggestions"):
response["suggestions"] = enhanced_result["suggestions"]
if enhanced_result.get("note"):
response["note"] = enhanced_result["note"]
return response
async def _enhance_with_gemini(
prompt: str,
style_config: dict,
provider_config: dict,
language: str
) -> Optional[Dict[str, Any]]:
"""Enhance prompt using Google Gemini"""
try:
import google.generativeai as genai
genai.configure(api_key=settings.google_api_key)
model = genai.GenerativeModel("gemini-2.0-flash-exp")
system_prompt = f"""You are an expert AI image prompt engineer. Your task is to transform basic prompts into detailed, professional prompts optimized for AI image generation.
STYLE: {style_config['name']}
{style_config['instruction']}
OPTIMIZATION TARGET: {provider_config.get('max_length', 1000)} characters maximum
Guidelines:
1. Add specific visual details (lighting, colors, textures, materials)
2. Include composition and framing suggestions
3. Add atmosphere, mood, and emotional tone
4. Be specific about quality indicators
5. Keep under {provider_config.get('max_length', 1000)} characters
6. Make it suitable for AI image generators
7. {"Output in " + language if language != "en" else ""}
ORIGINAL PROMPT: {prompt}
Respond in this exact format:
ENHANCED: [your enhanced prompt here]
NEGATIVE: [negative prompt - things to avoid]
SUGGESTIONS: [1-2 additional tips for better results]"""
response = model.generate_content(system_prompt)
text = response.text.strip()
# Parse response
enhanced_prompt = prompt
negative_prompt = style_config.get("negative_base", "")
suggestions = []
if "ENHANCED:" in text:
parts = text.split("ENHANCED:")[1]
if "NEGATIVE:" in parts:
enhanced_prompt = parts.split("NEGATIVE:")[0].strip()
parts = parts.split("NEGATIVE:")[1]
if "SUGGESTIONS:" in parts:
negative_prompt = parts.split("SUGGESTIONS:")[0].strip()
suggestions = parts.split("SUGGESTIONS:")[1].strip().split("\n")
else:
negative_prompt = parts.strip()
else:
enhanced_prompt = parts.strip()
else:
# If format not followed, use full response as enhanced prompt
enhanced_prompt = text
# Apply provider optimization suffix
if provider_config.get("style_suffix"):
enhanced_prompt = f"{enhanced_prompt}, {provider_config['style_suffix']}"
# Truncate if needed
max_len = provider_config.get("max_length", 1000)
if len(enhanced_prompt) > max_len:
enhanced_prompt = enhanced_prompt[:max_len-3] + "..."
return {
"enhanced_prompt": enhanced_prompt,
"negative_prompt": negative_prompt,
"suggestions": [s.strip() for s in suggestions if s.strip()]
}
except Exception as e:
return {"note": f"Gemini enhancement failed: {str(e)}"}
async def _enhance_with_openai(
prompt: str,
style_config: dict,
provider_config: dict,
language: str
) -> Optional[Dict[str, Any]]:
"""Enhance prompt using OpenAI GPT-4"""
try:
async with httpx.AsyncClient(timeout=60) as client:
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {settings.openai_api_key}",
"Content-Type": "application/json"
},
json={
"model": "gpt-4o-mini",
"messages": [
{
"role": "system",
"content": f"""You are an expert AI image prompt engineer. Transform basic prompts into detailed, professional prompts.
STYLE: {style_config['name']}
{style_config['instruction']}
Keep under {provider_config.get('max_length', 1000)} characters. Be specific about visual details, lighting, composition, and mood."""
},
{
"role": "user",
"content": f"Enhance this prompt for {style_config['name']} style:\n\n{prompt}\n\nRespond with only the enhanced prompt, nothing else."
}
],
"temperature": 0.7,
"max_tokens": 500
}
)
response.raise_for_status()
data = response.json()
enhanced_prompt = data["choices"][0]["message"]["content"].strip()
# Apply provider optimization
if provider_config.get("style_suffix"):
enhanced_prompt = f"{enhanced_prompt}, {provider_config['style_suffix']}"
return {
"enhanced_prompt": enhanced_prompt,
"negative_prompt": style_config.get("negative_base", "blurry, low quality")
}
except Exception as e:
return {"note": f"OpenAI enhancement failed: {str(e)}"}
def _enhance_fallback(
prompt: str,
style_config: dict,
provider_config: dict
) -> Dict[str, Any]:
"""Rule-based fallback enhancement when no API is available"""
# Basic enhancement patterns
enhancements = {
"cinematic": "cinematic lighting, dramatic composition, film grain, shallow depth of field, atmospheric, 8K resolution",
"photographic": "professional photography, sharp focus, natural lighting, high resolution, detailed",
"artistic": "artistic style, painterly, rich colors, textured brushstrokes, masterpiece",
"product": "studio lighting, clean white background, professional product photography, sharp details",
"fantasy": "magical atmosphere, ethereal lighting, fantasy art style, highly detailed, epic scale",
"minimal": "minimalist composition, clean lines, negative space, simple elegant",
"vintage": "vintage aesthetic, film grain, warm tones, retro style, nostalgic",
"futuristic": "futuristic, sci-fi, neon lights, cyberpunk aesthetic, high tech",
"anime": "anime style, vibrant colors, expressive, Japanese animation aesthetic",
"portrait": "portrait photography, professional lighting, shallow depth of field, sharp focus",
"landscape": "epic landscape, golden hour lighting, dramatic sky, high resolution",
"abstract": "abstract art, bold colors, dynamic composition, non-representational",
"fashion": "high fashion photography, editorial style, professional lighting, elegant",
"architecture": "architectural photography, dramatic angles, professional composition",
"food": "food photography, appetizing presentation, professional lighting, fresh"
}
style_key = style_config.get("name", "cinematic").lower().replace(" ", "_").replace("/", "_")
base_enhancement = enhancements.get(style_key, enhancements["cinematic"])
enhanced_prompt = f"{prompt}, {base_enhancement}"
if provider_config.get("style_suffix"):
enhanced_prompt = f"{enhanced_prompt}, {provider_config['style_suffix']}"
return {
"enhanced_prompt": enhanced_prompt,
"negative_prompt": style_config.get("negative_base", "blurry, low quality, distorted, poorly drawn"),
"note": "Enhanced using rule-based system (API keys not configured)"
}
def get_available_styles() -> List[Dict[str, str]]:
"""Get list of available style presets"""
return [
{"id": key, "name": config["name"]}
for key, config in STYLE_CONFIGS.items()
]
def get_style_info(style: str) -> Optional[Dict[str, Any]]:
"""Get detailed information about a style"""
config = STYLE_CONFIGS.get(style)
if not config:
return None
return {
"id": style,
"name": config["name"],
"description": config["instruction"].split("\n")[0],
"technical": config.get("technical", {}),
"negative_base": config.get("negative_base", "")
}