diff --git a/backend/app/api/v1/admin.py b/backend/app/api/v1/admin.py index d2cbf96..d5551e2 100644 --- a/backend/app/api/v1/admin.py +++ b/backend/app/api/v1/admin.py @@ -247,6 +247,86 @@ async def get_usage_reports( } +@router.get("/logs/search") +async def search_usage_logs( + query: Optional[str] = None, + provider: Optional[str] = None, + user_id: Optional[str] = None, + start_date: Optional[str] = None, # ISO format + page: int = Query(1, ge=1), + limit: int = Query(20, le=100), + db: Session = Depends(get_db), + admin: User = Depends(get_current_admin_user) +): + """ + Search usage logs by filename, prompt, user, or provider. + Surface detailed cost and metadata. + """ + sql_query = db.query(UsageLog, User).join(User, UsageLog.user_id == User.id) + + # 1. Text Search (Metadata) + if query: + # Search inside JSONB metadata fields (filename, prompt, etc) + # Cast JSONB to text for searching + search_term = f"%{query}%" + sql_query = sql_query.filter( + func.cast(UsageLog.request_metadata, String).ilike(search_term) | + func.cast(UsageLog.response_metadata, String).ilike(search_term) | + UsageLog.action.ilike(search_term) + ) + + # 2. Filters + if provider: + sql_query = sql_query.filter(UsageLog.api_provider == provider) + + if user_id: + sql_query = sql_query.filter(UsageLog.user_id == user_id) + + if start_date: + try: + dt = datetime.fromisoformat(start_date.replace('Z', '+00:00')) + sql_query = sql_query.filter(UsageLog.created_at >= dt) + except ValueError: + pass + + # Pagination + total = sql_query.count() + logs = sql_query.order_by(desc(UsageLog.created_at)).offset((page - 1) * limit).limit(limit).all() + + items = [] + for log, user in logs: + items.append({ + "id": str(log.id), + "timestamp": log.created_at.isoformat(), + "user": { + "id": str(user.id), + "email": user.email, + "name": user.display_name + }, + "service": { + "module": log.module, + "provider": log.api_provider, + "model": log.api_model + }, + "metrics": { + "tokens_in": log.tokens_input, + "tokens_out": log.tokens_output, + "cost_usd": float(log.estimated_cost_usd or 0), + "latency_ms": log.processing_time_ms + }, + # Return specific metadata fields relevant for UI + "request_details": log.request_metadata, + "response_details": log.response_metadata + }) + + return { + "items": items, + "total": total, + "page": page, + "limit": limit + } + + @router.get("/audit-logs") async def get_audit_logs( page: int = Query(1, ge=1), diff --git a/backend/app/api/v1/assets.py b/backend/app/api/v1/assets.py index 8435cce..c6f47d7 100644 --- a/backend/app/api/v1/assets.py +++ b/backend/app/api/v1/assets.py @@ -192,6 +192,146 @@ def download_asset(asset_id: UUID, db: Session = Depends(get_db)): ) +async def process_upload( + file: UploadFile, + db: Session, + user: Optional[User] = None, + project_id: Optional[str] = None, + source_module: Optional[str] = None, + is_temporary: bool = False, + overwrite: bool = False +) -> Asset: + """Core logic for uploading/saving an asset""" + + # Check for duplicates if not temporary + existing_asset = None + if not is_temporary and user: + existing_asset = db.query(Asset).filter( + Asset.user_id == user.id, + Asset.original_filename == file.filename, + Asset.is_temporary == False + ).first() + + if existing_asset: + if not overwrite: + # Return conflict with existing ID + # We interpret 409 specially in frontend + raise HTTPException( + status_code=409, + detail={"message": "File exists", "asset_id": str(existing_asset.id)} + ) + else: + # Overwrite: Delete existing file on disk but KEEP the record + if os.path.exists(existing_asset.file_path): + try: + os.remove(existing_asset.file_path) + except OSError: + pass + if existing_asset.thumbnail_path and os.path.exists(existing_asset.thumbnail_path): + try: + os.remove(existing_asset.thumbnail_path) + except OSError: + pass + + # Reuse the existing ID + asset_id = existing_asset.id + + # Determine file type + file_type = get_file_type(file.content_type) + + # Generate unique ID if new, otherwise reuse + if not 'asset_id' in locals(): + asset_id = uuid4() + + ext = os.path.splitext(file.filename)[1] if file.filename else "" + stored_filename = f"{asset_id}{ext}" + + # Determine storage path + storage_dir = os.path.join(settings.storage_path, f"{file_type}s") + os.makedirs(storage_dir, exist_ok=True) + file_path = os.path.join(storage_dir, stored_filename) + + # Save file + try: + with open(file_path, "wb") as buffer: + # Read in chunks to handle large files + while content := await file.read(1024 * 1024): + buffer.write(content) + await file.seek(0) # Reset cursor + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to save file: {str(e)}") + + # Extract metadata + width, height, duration_seconds = None, None, None + + try: + if file_type == "image": + with Image.open(file_path) as img: + width, height = img.size + elif file_type == "video": + # Placeholder for video metadata + # In production, use ffmpeg probe + from app.utils.video import extract_video_metadata + metadata = extract_video_metadata(file_path) + width = metadata.get('width') + height = metadata.get('height') + duration_seconds = metadata.get('duration_seconds') + except Exception: + pass # Ignore metadata extraction errors + + # Generate thumbnail + thumbnail_path = generate_thumbnail(file_path, file_type, str(asset_id)) + + # Create or Update Asset record + file_size = os.path.getsize(file_path) + + if existing_asset: + # Update existing record + existing_asset.stored_filename = stored_filename + existing_asset.file_path = file_path + existing_asset.thumbnail_path = thumbnail_path + existing_asset.file_type = file_type + existing_asset.mime_type = file.content_type + existing_asset.file_size_bytes = file_size + existing_asset.width = width + existing_asset.height = height + existing_asset.duration_seconds = duration_seconds + existing_asset.source_module = source_module + # Don't update project_id unless specified? For now keep it simple. + if project_id: + existing_asset.project_id = UUID(project_id) + + db.commit() + db.refresh(existing_asset) + return existing_asset + else: + # Create new record + asset = Asset( + id=asset_id, + user_id=user.id if user else None, + project_id=UUID(project_id) if project_id else None, + original_filename=file.filename, + stored_filename=stored_filename, + file_path=file_path, + thumbnail_path=thumbnail_path, + file_type=file_type, + mime_type=file.content_type, + file_size_bytes=file_size, + width=width, + height=height, + duration_seconds=duration_seconds, + + source_module=source_module, + is_temporary=is_temporary + ) + + db.add(asset) + db.commit() + db.refresh(asset) + + return asset + + @router.post("/upload", response_model=AssetResponse) async def upload_asset( file: UploadFile = File(...), @@ -205,108 +345,16 @@ async def upload_asset( # Get test user user = db.query(User).filter(User.email == "test@forge.ai").first() - # Check for duplicates if not temporary - if not is_temporary and user: - existing = db.query(Asset).filter( - Asset.user_id == user.id, - Asset.original_filename == file.filename, - Asset.is_temporary == False - ).first() - - if existing: - if not overwrite: - # Return conflict with existing ID - # We interpret 409 specially in frontend - raise HTTPException( - status_code=409, - detail={"message": "File exists", "asset_id": str(existing.id)} - ) - else: - # Overwrite: Delete existing file and record - if os.path.exists(existing.file_path): - try: - os.remove(existing.file_path) - except OSError: - pass - if existing.thumbnail_path and os.path.exists(existing.thumbnail_path): - try: - os.remove(existing.thumbnail_path) - except OSError: - pass - - db.delete(existing) - db.commit() - - # Determine file type - file_type = get_file_type(file.content_type) - - # Generate unique ID and filename - asset_id = uuid4() - ext = os.path.splitext(file.filename)[1] if file.filename else "" - stored_filename = f"{asset_id}{ext}" - - # Determine storage path - storage_dir = os.path.join(settings.storage_path, f"{file_type}s") - os.makedirs(storage_dir, exist_ok=True) - file_path = os.path.join(storage_dir, stored_filename) - - # Save file - with open(file_path, "wb") as buffer: - shutil.copyfileobj(file.file, buffer) - - # Get file size - file_size = os.path.getsize(file_path) - - # Get media dimensions and duration if applicable - width = None - height = None - duration_seconds = None - - if file_type == "image": - try: - with Image.open(file_path) as img: - width, height = img.size - except Exception: - pass - elif file_type == "video": - try: - from app.utils.video import extract_video_metadata - metadata = extract_video_metadata(file_path) - width = metadata.get('width') - height = metadata.get('height') - duration_seconds = metadata.get('duration_seconds') - except Exception as e: - print(f"Failed to extract video metadata: {e}") - - # Generate thumbnail - thumbnail_path = generate_thumbnail(file_path, file_type, str(asset_id)) - - # Create asset record - asset = Asset( - id=asset_id, - user_id=user.id if user else None, - project_id=UUID(project_id) if (project_id and isinstance(project_id, str)) else None, - original_filename=file.filename, - stored_filename=stored_filename, - file_path=file_path, - thumbnail_path=thumbnail_path, - file_type=file_type, - mime_type=file.content_type, - file_size_bytes=file_size, - width=width, - height=height, - duration_seconds=duration_seconds, - + return await process_upload( + file=file, + db=db, + user=user, + project_id=project_id, source_module=source_module, - is_temporary=is_temporary + is_temporary=is_temporary, + overwrite=overwrite ) - db.add(asset) - db.commit() - db.refresh(asset) - - return asset - @router.patch("/{asset_id}", response_model=AssetResponse) def update_asset(asset_id: UUID, asset_update: AssetUpdate, db: Session = Depends(get_db)): diff --git a/backend/app/api/v1/modules.py b/backend/app/api/v1/modules.py index 7c4fa15..f829120 100644 --- a/backend/app/api/v1/modules.py +++ b/backend/app/api/v1/modules.py @@ -204,6 +204,13 @@ class PromptEnhanceRequest(BaseModel): include_negative: bool = True include_technical: bool = True language: str = "en" + + # CinePrompt Studio Advanced Fields + application: Optional[str] = None + camera: Optional[str] = None + lens: Optional[str] = None + aspect_ratio: Optional[str] = "16:9" + creative_freedom: float = 0.3 class MermaidRenderRequest(BaseModel): @@ -520,21 +527,23 @@ async def generate_subtitles( italic: bool = Form(False), font_preset: Optional[str] = Form(None), word_timestamps: bool = Form(False), + subtitle_file: UploadFile = File(None), + subtitle_asset_id: Optional[str] = Form(None), background_tasks: BackgroundTasks = None, db: Session = Depends(get_db) ): """ - Generate subtitles for video using Whisper + DeepL - - Parameters: - - source_language: Source language code or "auto" for detection - - target_language: Target language code for translation (optional) - - burn_subtitles: Whether to burn subtitles into video - - whisper_model: Whisper model (tiny/base/small/medium/large/large-v2/large-v3) - - output_format: Output format (srt/vtt/ass) - - Styling (for burning): - - font: Font family name + Generate subtitles for a video using OpenAI Whisper. + + - **file**: Video file to process + - **source_language**: Language of the video (auto for detection) + - **target_language**: Language to translate to (optional) + - **burn_subtitles**: Burn subtitles into the video + - **whisper_model**: Whisper model size (tiny, base, small, medium, large) + - **output_format**: Output subtitle format (srt, vtt, ass) + + Styling Options (for burning): + - font: Font family (Arial, Helvetica, etc.) - checks system availability - font_size: Font size in points - text_color: Primary text color - outline_color: Text outline color @@ -550,50 +559,80 @@ async def generate_subtitles( - italic: Use italic text - font_preset: Predefined style preset (default/cinematic/documentary/news/social_media/minimal/bold) - word_timestamps: Include word-level timestamps + - subtitle_file: Optional subtitle file (SRT) to burn instead of generating + - subtitle_asset_id: Optional asset ID of existing subtitle to burn """ user = db.query(User).filter(User.email == "test@forge.ai").first() + + # Fallback to a default user or handle None if test user doesn't exist + if not user: + # Try to find any admin user or proceed with None (if Asset/Job models allow null user_id) + user = db.query(User).order_by(User.id).first() + + import structlog + logger = structlog.get_logger() + + logger.info("Subtitle generation request received", + filename=file.filename, + source_language=source_language, + target_language=target_language, + burn_subtitles=burn_subtitles, + font=font, + has_subtitle_file=bool(subtitle_file), + subtitle_asset_id=subtitle_asset_id) - from app.api.v1.assets import upload_asset - asset = await upload_asset(file=file, source_module="subtitle_processor", db=db) + from app.api.v1.assets import process_upload + try: + asset = await process_upload(file=file, source_module="subtitle_processor", db=db, user=user, overwrite=True) - job = Job( - user_id=user.id if user else None, - module="subtitle_processor", - action="generate", - input_data={ - "source_language": source_language, - "target_language": target_language, - "burn_subtitles": burn_subtitles, - "whisper_model": whisper_model, - "output_format": output_format, - "font": font, - "font_size": font_size, - "text_color": text_color, - "outline_color": outline_color, - "outline_width": outline_width, - "background_color": background_color, - "background_opacity": background_opacity, - "position": position, - "alignment": alignment, - "margin_v": margin_v, - "margin_h": margin_h, - "shadow": shadow, - "bold": bold, - "italic": italic, - "font_preset": font_preset, - "word_timestamps": word_timestamps - }, - input_asset_ids=[asset.id], - status="queued" - ) - db.add(job) - db.commit() - db.refresh(job) + # Process optional subtitle file upload + input_sub_id = subtitle_asset_id + if subtitle_file: + sub_asset = await process_upload(file=subtitle_file, source_module="subtitle_processor", db=db, user=user, overwrite=True, allow_extensions=['srt', 'vtt', 'ass']) + input_sub_id = str(sub_asset.id) - if background_tasks: - background_tasks.add_task(subtitle_processor.process, str(job.id)) + job = Job( + user_id=user.id if user else None, + module="subtitle_processor", + action="generate", + input_data={ + "source_language": source_language, + "target_language": target_language, + "burn_subtitles": burn_subtitles, + "whisper_model": whisper_model, + "output_format": output_format, + "font": font, + "font_size": font_size, + "text_color": text_color, + "outline_color": outline_color, + "outline_width": outline_width, + "background_color": background_color, + "background_opacity": background_opacity, + "position": position, + "alignment": alignment, + "margin_v": margin_v, + "margin_h": margin_h, + "shadow": shadow, + "bold": bold, + "italic": italic, + "font_preset": font_preset, + "word_timestamps": word_timestamps, + "subtitle_asset_id": input_sub_id + }, + input_asset_ids=[asset.id], + status="pending" + ) + db.add(job) + db.commit() + db.refresh(job) - return job_response(job) + if background_tasks: + background_tasks.add_task(subtitle_processor.process, str(job.id)) + + return job_response(job) + except Exception as e: + logger.error("Failed to initiate subtitle job", error=str(e), exc_info=True) + raise HTTPException(status_code=500, detail=f"Failed to start subtitle processing: {str(e)}") # ============== AUDIO MODULES ============== @@ -673,8 +712,8 @@ async def convert_voice( """Convert voice to another voice using ElevenLabs""" user = db.query(User).filter(User.email == "test@forge.ai").first() - from app.api.v1.assets import upload_asset - asset = await upload_asset(file=file, source_module="speech_to_speech", db=db) + from app.api.v1.assets import process_upload + asset = await process_upload(file=file, source_module="speech_to_speech", db=db, user=user, overwrite=True) job = Job( user_id=user.id if user else None, @@ -828,7 +867,13 @@ async def enhance_prompt( provider=request.provider, include_negative=request.include_negative, include_technical=request.include_technical, - language=request.language + language=request.language, + # Advanced CinePrompt params + application=request.application, + camera=request.camera, + lens=request.lens, + aspect_ratio=request.aspect_ratio, + creative_freedom=request.creative_freedom ) return result @@ -839,6 +884,12 @@ async def get_prompt_styles(): return prompt_studio.get_available_styles() +@router.get("/text/cine-options") +async def get_cine_options(): + """Get CinePrompt Studio data (cameras, lenses, etc.)""" + return prompt_studio.get_cine_options() + + # ============== MARKDOWN & MERMAID MODULES ============== @router.post("/text/mermaid/render") diff --git a/backend/app/models/pricing.py b/backend/app/models/pricing.py new file mode 100644 index 0000000..7f148ed --- /dev/null +++ b/backend/app/models/pricing.py @@ -0,0 +1,26 @@ +"""Model Pricing Model""" +from sqlalchemy import Column, String, Numeric, Enum, Boolean +from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.sql import func +import uuid +from app.database import Base + +class ModelPricing(Base): + __tablename__ = "model_pricing" + + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + provider = Column(String(100), nullable=False) # openai, google, runway... + model_name = Column(String(100), nullable=False) # gpt-4o, gen-3-alpha... + + # Pricing Metrics + cost_per_input_token = Column(Numeric(10, 8), default=0) + cost_per_output_token = Column(Numeric(10, 8), default=0) + cost_per_image = Column(Numeric(10, 4), default=0) + cost_per_second = Column(Numeric(10, 4), default=0) + cost_per_1k_chars = Column(Numeric(10, 4), default=0) + cost_per_request = Column(Numeric(10, 4), default=0) + + # Metadata + currency = Column(String(3), default="USD") + effective_date = Column(String(20)) # e.g. "2024-12-01" + is_active = Column(Boolean, default=True) diff --git a/backend/app/services/cine_prompt_studio.py b/backend/app/services/cine_prompt_studio.py new file mode 100644 index 0000000..6134870 --- /dev/null +++ b/backend/app/services/cine_prompt_studio.py @@ -0,0 +1,639 @@ + +from typing import Dict, Any, List, Optional +from app.config import settings + +# ========================================== +# CONSTANTS & DATA +# ========================================== + +CAMERA_DATA = [ + { + "value": "Arri Alexa 35", + "display": "Arri Alexa 35", + "sensorFormat": "Super 35", + "tooltip": "The Hollywood Standard. Best for natural skin tones and a classic cinematic 'blockbuster' feel.", + "tags": "Narrative / Drama", + "compatibleLenses": ["Panavision C-Series", "Cooke S7/i", "Canon K-35", "Laowa Probe"], + "physics": "ArriRaw sensor readout, high dynamic range, natural noise floor, thick color science" + }, + { + "value": "Sony Venice 2", + "display": "Sony Venice 2", + "sensorFormat": "Full Frame", + "tooltip": "The Low-Light King. Excellent for night scenes, clean shadows, and a modern aesthetic.", + "tags": "Commercial / Night", + "compatibleLenses": ["Panavision C-Series", "Cooke S7/i", "Canon K-35", "Arri Signature", "Laowa Probe"], + "physics": "Dual ISO digital sensor, clean shadows, modern color science, high frequency detail" + }, + { + "value": "Red V-Raptor", + "display": "Red V-Raptor", + "sensorFormat": "Full Frame", + "tooltip": "Hyper-Real Action. Perfect for high-speed motion, sports, and razor-sharp detail.", + "tags": "Action / Sports", + "compatibleLenses": ["Panavision C-Series", "Cooke S7/i", "Canon K-35", "Arri Signature", "Laowa Probe"], + "physics": "RedCode RAW 8K, clinical sharpness, high shutter angle clarity, hyper-realistic texture" + }, + { + "value": "Arriflex 416", + "display": "Arriflex 416", + "sensorFormat": "Super 16 (Film)", + "tooltip": "Gritty & Nostalgic. High grain, soft focus, and vibrant, messy colors. The 'Indie' look.", + "tags": "Vintage / Music Video", + "compatibleLenses": ["Zeiss Super Speed", "Laowa Probe"], + "physics": "Super 16mm film gate, heavy grain structure, soft optical resolution, vibrant chemical color" + }, + { + "value": "Arricam LT", + "display": "Arricam LT", + "sensorFormat": "35mm (Film)", + "tooltip": "The Golden Age. Fine grain, organic texture, and rich colors. The classic movie look before digital.", + "tags": "Period Piece / Premium", + "compatibleLenses": ["Panavision C-Series", "Cooke S7/i", "Canon K-35", "Laowa Probe"], + "physics": "35mm motion picture film stock, organic grain structure, halation on highlights, photochemical dynamic range" + }, + { + "value": "Fujifilm GFX 100", + "display": "Fujifilm GFX 100", + "sensorFormat": "Medium Format", + "tooltip": "The Studio Master. Massive resolution and depth. Unbeatable for print-quality stills.", + "tags": "Product / Fashion", + "compatibleLenses": ["Fujinon GF", "Arri Signature", "Laowa Probe"], + "physics": "Medium format digital sensor, zero circle of confusion, extreme resolution, pore-level detail" + }, + { + "value": "Phantom Flex4K", + "display": "Phantom Flex4K", + "sensorFormat": "Super 35", + "tooltip": "The Time Machine. 1000fps slow motion.", + "tags": "High-Speed / Sports", + "compatibleLenses": ["Panavision C-Series", "Cooke S7/i", "Canon K-35", "Laowa Probe", "Angénieux Optimo"], + "physics": "High-speed global shutter sensor, frozen fluid dynamics, zero motion blur, deep saturation, specialized for 1000fps playback" + }, + { + "value": "Blackmagic URSA Cine 12K", + "display": "URSA Cine 12K", + "sensorFormat": "Full Frame", + "tooltip": "Resolution Monster. Infinite reframing capability.", + "tags": "Future-Proof / VFX", + "compatibleLenses": ["Panavision C-Series", "Cooke S7/i", "Canon K-35", "Arri Signature", "Laowa Probe", "Canon TS-E", "Angénieux Optimo"], + "physics": "12K RGB sensor, extreme resolution, zero aliasing, distinct non-bayer pattern texture, analytics-grade sharpness" + } +] + +LENS_DATA = [ + { + "value": "Panavision C-Series", + "display": "Panavision C-Series", + "compatibleFormats": ["Super 35", "35mm"], + "tooltip": "Classic Widescreen. Horizontal blue flares, oval bokeh. The sci-fi blockbuster look.", + "keywords": "Flares, Oval Bokeh", + "physics": "anamorphic optics, characteristic oval bokeh, horizontal blue lens flares, slight barrel distortion" + }, + { + "value": "Cooke S7/i", + "display": "Cooke S7/i", + "compatibleFormats": ["Full Frame", "Super 35", "35mm"], + "tooltip": "The 'Cooke Look.' Warm, gentle, and incredibly flattering. Gold standard for portraits.", + "keywords": "Warmth, Face Focus", + "physics": "Cooke speed panchrio look, warm color rendering, gentle focus falloff, flattering face compression" + }, + { + "value": "Canon K-35", + "display": "Canon K-35", + "compatibleFormats": ["Full Frame", "Super 35", "35mm"], + "tooltip": "Dreamy & Retro. Low contrast, glowing highlights. 1970s/80s vibe.", + "keywords": "Glow, Retro", + "physics": "vintage aspherical elements, glowing highlights, low contrast, rainbow flaring, soft sharpness" + }, + { + "value": "Arri Signature", + "display": "Arri Signature", + "compatibleFormats": ["Large Format", "Full Frame"], + "tooltip": "Modern Perfection. Ultra-clean, no distortion, pure reality. The invisible lens.", + "keywords": "Clean, Realistic", + "physics": "telecentric optical design, zero breathing, ultra-flat field, modern rendering, pure black levels" + }, + { + "value": "Zeiss Super Speed", + "display": "Zeiss Super Speed", + "compatibleFormats": ["Super 16 ONLY"], + "tooltip": "The 16mm Classic. Sharp but textured. Designed specifically for the smaller 16mm film frame.", + "keywords": "Triangular Bokeh, Grit", + "physics": "vintage high-speed glass, triangular bokeh at wide apertures, chromatic aberration, gritty texture" + }, + { + "value": "Fujinon GF", + "display": "Fujinon GF", + "compatibleFormats": ["Medium Format ONLY"], + "tooltip": "Studio Glass. Clinically sharp, specifically designed for the massive GFX sensor.", + "keywords": "Clinical Sharpness", + "physics": "modern medium format optics, clinical edge-to-edge sharpness, zero distortion, high micro-contrast" + }, + { + "value": "Laowa Probe", + "display": "Laowa Probe", + "compatibleFormats": ["All Formats"], + "tooltip": "Insect-Eye View. Extreme close-ups of small objects/textures.", + "keywords": "Macro", + "physics": "macro bug-eye perspective, extreme depth of field, tubular lens construction, surreal wide-angle macro" + }, + { + "value": "Helios 44-2", + "display": "Helios 44-2 (Vintage)", + "compatibleFormats": ["Full Frame", "Super 35", "35mm"], + "tooltip": "Swirly Bokeh. The cult classic.", + "keywords": "Swirly Bokeh, Vintage", + "physics": "Vintage Soviet glass, characteristic swirly bokeh at edges, low contrast flaring, soft center focus, dreamlike aberrations" + }, + { + "value": "Canon TS-E", + "display": "Canon Tilt-Shift", + "compatibleFormats": ["Full Frame", "Medium Format"], + "tooltip": "Miniature Effect. Selective focus control.", + "keywords": "Tilt-Shift, Miniature", + "physics": "Tilted focal plane, miniature faking effect, selective focus slice, corrected perspective lines, architectural rigidity" + }, + { + "value": "Angénieux Optimo", + "display": "Angénieux Optimo", + "compatibleFormats": ["Super 35", "Full Frame"], + "tooltip": "The Hollywood Zoom. Perfect versatility.", + "keywords": "Cinema Zoom", + "physics": "Cinema zoom optics, warm organic contrast, breathing-free focus pulls, uniform field illumination" + } +] + +APPLICATION_DATA = [ + { + "value": "Portrait Studio", + "lighting": "Rembrandt lighting, softbox diffusion, 3-point setup", + "defaultCamera": "Arri Alexa 35", + "defaultLens": "Cooke S7/i", + "focusType": "stylistic" + }, + { + "value": "Product (Crisp)", + "lighting": "Infinity curve, bright diffuse lighting, shadowless, high key", + "defaultCamera": "Fujifilm GFX 100", + "defaultLens": "Fujinon GF", + "focusType": "realism" + }, + { + "value": "Food Photography", + "lighting": "Natural window light simulation, back-lighting for steam/texture, warm reflector fill, medium depth of field, focus on texture", + "defaultCamera": "Sony Venice 2", + "defaultLens": "Cooke S7/i", + "focusType": "stylistic" + }, + { + "value": "Golden Hour (Outdoor)", + "lighting": "Sun low on horizon, warm orange glow, long dramatic shadows, volumetric backlight, magic hour atmosphere, cinematic depth", + "defaultCamera": "Arricam LT", + "defaultLens": "Cooke S7/i", + "focusType": "stylistic", + "example": "A vintage Lancia Stratos rally car drifting sideways on a dirt track, kicking up a massive wall of dust that glows incandescent gold in the backlight, creating a dramatic silhouette against the sunset." + }, + { + "value": "Blue Hour (City)", + "lighting": "Twilight, deep blue ambient sky light contrasting with warm practical street lamps, moody, atmospheric, balanced exposure", + "defaultCamera": "Sony Venice 2", + "defaultLens": "Arri Signature", + "focusType": "stylistic" + }, + { + "value": "Neon Cyberpunk", + "lighting": "Harsh neon signage, mixed color temp, wet reflections", + "defaultCamera": "Red V-Raptor", + "defaultLens": "Panavision C-Series", + "focusType": "stylistic" + }, + { + "value": "Nostalgic Memory", + "lighting": "Hazy atmosphere, overexposed highlights, light leaks, warm color grade, sentimental mood, soft focus throughout", + "defaultCamera": "Arriflex 416", + "defaultLens": "Zeiss Super Speed", + "focusType": "stylistic" + }, + { + "value": "Corporate Headshot", + "lighting": "Clean white background, high-key lighting, professional balanced fill, sharp focus on eyes, moderate depth of field", + "defaultCamera": "Fujifilm GFX 100", + "defaultLens": "Cooke S7/i", + "focusType": "realism" + }, + { + "value": "Macro: Luxury Jewelry", + "lighting": "Sparkling point-source lighting, black velvet background, high contrast reflection control, focus stacking simulation for complete sharpness", + "defaultCamera": "Fujifilm GFX 100", + "defaultLens": "Laowa Probe", + "focusType": "realism" + }, + { + "value": "Macro: Nature Details", + "lighting": "Diffused natural sunlight, shallow depth of field, vibrant greens, morning dew, microscopic texture", + "defaultCamera": "Arri Alexa 35", + "defaultLens": "Laowa Probe", + "focusType": "stylistic" + }, + { + "value": "Wildlife / Safari", + "lighting": "Telephoto compression, frozen motion, golden hour backlight, natural habitat, separation from background", + "defaultCamera": "Red V-Raptor", + "defaultLens": "Cooke S7/i", + "focusType": "stylistic" + }, + { + "value": "Sports Action", + "lighting": "High shutter speed, frozen particles/sweat, stadium floodlights, dynamic composition, sharp subject focus", + "defaultCamera": "Red V-Raptor", + "defaultLens": "Panavision C-Series", + "focusType": "stylistic" + }, + { + "value": "Street Photography", + "lighting": "Candid moment, natural available light, messy urban background, hyperfocal distance, deep depth of field, everything in focus", + "defaultCamera": "Arriflex 416", + "defaultLens": "Canon K-35", + "focusType": "realism" + }, + { + "value": "Architecture", + "lighting": "Balanced mixed lighting, straight lines, airy atmosphere", + "defaultCamera": "Fujifilm GFX 100", + "defaultLens": "Fujinon GF", + "focusType": "realism" + }, + { + "value": "Fashion Editorial", + "lighting": "Avant-garde lighting, colored gels, stark shadows, high fashion pose, studio backdrop, stylized depth", + "defaultCamera": "Sony Venice 2", + "defaultLens": "Canon K-35", + "focusType": "stylistic" + }, + { + "value": "Cinematic Horror", + "lighting": "Underexposed, single harsh source (flashlight), heavy shadows", + "defaultCamera": "Arricam LT", + "defaultLens": "Canon K-35", + "focusType": "stylistic" + }, + { + "value": "Docu / Realism", + "lighting": "Natural window key light, negative fill, messy authentic background", + "defaultCamera": "Arri Alexa 35", + "defaultLens": "Arri Signature", + "focusType": "realism" + }, + { + "value": "Symmetrical Whimsy", + "lighting": "Shadowless high-key lighting, flat diorama aesthetic, vibrant pastel color palette, 90-degree planimetric composition", + "defaultCamera": "Arricam LT", + "defaultLens": "Cooke S7/i", + "focusType": "stylistic" + }, + { + "value": "IMAX Scale Epic", + "lighting": "Naturalistic practical lighting, cool color temperature, high contrast, immense sense of scale, deep depth of field", + "defaultCamera": "Arri Alexa 35", + "defaultLens": "Arri Signature", + "focusType": "realism" + }, + { + "value": "Clinical Thriller", + "lighting": "Low-key chiaroscuro, controlled shadows, sickly green/yellow color grade, precise stabilized motion", + "defaultCamera": "Red V-Raptor", + "defaultLens": "Arri Signature", + "focusType": "stylistic" + }, + { + "value": "Brutalist Atmosphere", + "lighting": "Single source silhouette, atmospheric haze, monochromatic orange/sepia tones, stark geometry, visual silence", + "defaultCamera": "Arri Alexa 35", + "defaultLens": "Panavision C-Series", + "focusType": "stylistic" + }, + { + "value": "Technicolor Dream", + "lighting": "Artificial studio lighting, high saturation, vibrant pinks and cyans, glossy plastic textures, high-key brightness", + "defaultCamera": "Arri Alexa 35", + "defaultLens": "Cooke S7/i", + "focusType": "stylistic" + }, + { + "value": "Obsessive Symmetry", + "lighting": "One-point perspective, deep focus, wide angle distortion, cold practical lighting, clinical perfection", + "defaultCamera": "Arricam LT", + "defaultLens": "Arri Signature", + "focusType": "realism" + }, + { + "value": "Hong Kong Nostalgia", + "lighting": "Step-printing effect, motion blur, neon-soaked humidity, intimate handheld, rain-slicked textures", + "defaultCamera": "Arriflex 416", + "defaultLens": "Zeiss Super Speed", + "focusType": "stylistic" + }, + { + "value": "Industrial Haze", + "lighting": "Volumetric lighting, visible shafts of light (god rays), atmospheric haze, high-density industrial detail", + "defaultCamera": "Arri Alexa 35", + "defaultLens": "Panavision C-Series", + "focusType": "stylistic" + }, + { + "value": "Gothic Fantasy", + "lighting": "German Expressionist lighting, high contrast long shadows, twisted geometry, desaturated palette", + "defaultCamera": "Arri Alexa 35", + "defaultLens": "Panavision C-Series", + "focusType": "stylistic" + }, + { + "value": "LED Volume (Virtual Production)", + "lighting": "Interactive environmental lighting, soft ambient wrap from LED panels, perfect reflection matching, zero green spill", + "defaultCamera": "Arri Alexa 35", + "defaultLens": "Arri Signature", + "focusType": "realism" + }, + { + "value": "Automotive: Showroom", + "lighting": "Massive softbox ceiling, continuous highlight lines along bodywork, negative fill to shape curves, pure white infinity cove", + "defaultCamera": "Sony Venice 2", + "defaultLens": "Arri Signature", + "focusType": "realism", + "example": "A silver concept car parked on a pure white infinity curve, continuous highlight lines tracing the aerodynamic bodywork." + }, + { + "value": "Knolling / Flat Lay", + "lighting": "Overhead soft diffuse light, shadowless cavity, high-key evenness, precise grid alignment", + "defaultCamera": "Fujifilm GFX 100", + "defaultLens": "Fujinon GF", + "focusType": "realism" + }, + { + "value": "Conflict Photography", + "lighting": "Harsh midday sun, atmospheric dust and smoke, high contrast, documentary style reality, blown highlights, raw and unpolished", + "defaultCamera": "Arriflex 416", + "defaultLens": "Zeiss Super Speed", + "focusType": "realism" + }, + { + "value": "NYC Street Editorial", + "lighting": "Natural city canyon light, bounce board fill for face, sharp modern contrast, motion blur in background, high-resolution gloss", + "defaultCamera": "Sony Venice 2", + "defaultLens": "Arri Signature", + "focusType": "stylistic" + }, + { + "value": "Underground Rave / Flash", + "lighting": "Direct on-camera flash with slow shutter drag (rear-curtain sync), light trails, laser rim lighting, sweaty atmosphere, darkness crushing the background", + "defaultCamera": "Red V-Raptor", + "defaultLens": "Helios 44-2", + "focusType": "stylistic" + }, + { + "value": "Architectural Digest Interior", + "lighting": "North-facing window soft light, large diffusion frames, negative fill for contrast, texture-raking angle, perfectly balanced exposure", + "defaultCamera": "Fujifilm GFX 100", + "defaultLens": "Canon TS-E", + "focusType": "realism" + }, + { + "value": "90s Grunge Editorial", + "lighting": "Hard direct flash, dirty green/yellow color cast, vignetting, unretouched skin texture, claustrophobic framing", + "defaultCamera": "Arriflex 416", + "defaultLens": "Canon K-35", + "focusType": "stylistic" + }, + { + "value": "Cassette Futurism (Retro Sci-Fi)", + "lighting": "Flickering CRT monitor glow, harsh overhead fluorescent strips, brutalist shadows, beige and grey color palette, industrial haze", + "defaultCamera": "Arriflex 416", + "defaultLens": "Panavision C-Series", + "focusType": "stylistic" + }, + { + "value": "Tech Commercial (Macro)", + "lighting": "Slow moving light sweep (motion control), brushed metal reflections, dramatic rim lighting in a black void, sub-surface scattering on materials", + "defaultCamera": "Arri Alexa 35", + "defaultLens": "Laowa Probe", + "focusType": "realism" + }, + { + "value": "Surreal Infrared", + "lighting": "Full spectrum daylight, false color infrared shift (foliage turns pink/white), deep blue skies, high contrast, dreamlike atmosphere", + "defaultCamera": "Arricam LT", + "defaultLens": "Canon K-35", + "focusType": "stylistic" + }, + { + "value": "Spaghetti Western", + "lighting": "Harsh high-noon sun, heat haze distortion, sweaty skin texture, extreme close-up on eyes, deep depth of field", + "defaultCamera": "Arricam LT", + "defaultLens": "Angénieux Optimo", + "focusType": "stylistic" + }, + { + "value": "Automotive: Process Trailer", + "lighting": "Dynamic passing street lights, rhythmic shadow movement, wet road reflections, motion blur on background only", + "defaultCamera": "Arri Alexa 35", + "defaultLens": "Angénieux Optimo", + "focusType": "stylistic", + "example": "A black sports car speeding through a tunnel, dynamic motion blur on the tunnel lights, sharp focus on the car badge." + }, + { + "value": "Product (Liquid/Splash)", + "lighting": "High-speed strobe lighting, frozen droplets, backlit fluid translucency, crystal clear refraction", + "defaultCamera": "Phantom Flex4K", + "defaultLens": "Laowa Probe", + "focusType": "stylistic", + "example": "A strawberry dropping into milk, creating a perfect crown splash, frozen in mid-air with high-speed strobe lighting." + }, + { + "value": "VFX / Green Screen", + "lighting": "Raw chromakey plate, perfectly flat shadowless green background, distinct rim light for separation, zero color spill, high-fidelity capture", + "defaultCamera": "Blackmagic URSA Cine 12K", + "defaultLens": "Arri Signature", + "focusType": "realism", + "example": "A raw chromakey plate of a superhero in a landing pose, completely isolated against a flat, pure digital green background, sharp focus, ready for compositing." + }, + { + "value": "Custom", + "lighting": "User-defined lighting setup", + "defaultCamera": None, + "defaultLens": None, + "focusType": "stylistic" + } +] + +# Aspect Ratio Map (Frontend -> Prose) +ASPECT_RATIO_PROSE = { + '16:9': 'A cinematic 16:9 composition featuring', + '2.39:1': 'A widescreen anamorphic-ratio composition featuring', + '4:3': 'A classic 4:3 format composition featuring', + '1:1': 'A square format composition featuring', + '9:16': 'A vertical full-screen composition featuring' +} + +# Negative Constraints +CONSTRAINT_MAP = { + 'Architecture': 'Strictly AVOID: messy, dirt, grime, imperfections, motion blur, handheld, shaky', + 'Product (Crisp)': 'Strictly AVOID: messy, dirt, grime, imperfections, motion blur, handheld, shaky', + 'Corporate Headshot': 'Strictly AVOID: shadow over eyes, silhouette, dark, moody, gritty, high contrast', + 'Portrait Studio': 'Strictly AVOID: shadow over eyes, silhouette, dark, moody, gritty, high contrast', + 'Cinematic Horror': 'Strictly AVOID: bright, cheerful, clean, pristine, high-key, sunshine', + 'Nostalgic Memory': 'Strictly AVOID: bright, cheerful, clean, pristine, high-key, sunshine' +} + +# Smart Fill Context +SMART_FILL_CONTEXT = { + 'Neon Cyberpunk': 'Hovering vehicle, rain, neon lights', + 'Golden Hour (Outdoor)': 'Vintage convertible, dust kicking up, lens flare', + 'Cinematic Horror': 'Distressed clothing, expressions of terror, flashlight beams cutting through fog, unseen threat in shadows', + 'Corporate Headshot': 'Business professional attire, confident posture, subtle smile, perfectly groomed', + 'Portrait Studio': 'Professional studio setup, controlled lighting, posed subject', + 'Fashion Editorial': 'High fashion couture, avant-garde styling, dramatic poses, editorial expression', + 'Street Photography': 'Authentic street fashion, candid moments, urban environment, real people', + 'Blue Hour (City)': 'Urban nightlife fashion, city lights reflecting, atmospheric fog, metropolitan energy', + 'Wildlife / Safari': 'Natural habitat, majestic animals, golden savanna light, environmental storytelling', + 'Symmetrical Whimsy': 'Perfectly centered vintage car, pastel luggage on roof, quirkily dressed driver', + 'IMAX Scale Epic': 'Lone rover traversing massive alien glacier, tiny against the landscape', + 'Clinical Thriller': 'Sterile hospital corridor, flickering fluorescent light, solitary figure', + 'Brutalist Atmosphere': 'Concrete monolith, lone figure dwarfed by structure, dust particles', + 'Technicolor Dream': 'Plastic fantastic furniture, bubble machines, candy-colored wardrobe', + 'Obsessive Symmetry': 'Identical twins in matching outfits, geometric patterns, perfect alignment', + 'Hong Kong Nostalgia': 'Taxi in heavy rain, neon lights reflecting on wet glass, lonely passenger', + 'Industrial Haze': 'Factory interior, steam pipes, worker silhouette against machinery', + 'Gothic Fantasy': 'Twisted architecture, dramatic cape, fog machine atmosphere' +} + +# ========================================== +# HELPER FUNCTIONS +# ========================================== + +def get_aspect_ratio_prose(ratio: str) -> str: + return ASPECT_RATIO_PROSE.get(ratio, '') + +def get_negative_constraints(app_name: str) -> str: + return CONSTRAINT_MAP.get(app_name, '') + +def get_smart_fill_context(app_name: str) -> str: + return SMART_FILL_CONTEXT.get(app_name, '') + +def get_camera_texture_keywords(camera_name: str) -> str: + for cam in CAMERA_DATA: + if cam["value"] == camera_name: + return cam["physics"] + return "" + +def get_lens_physics(lens_name: str) -> str: + for le in LENS_DATA: + if le["value"] == lens_name: + return le["physics"] + return "" + +def get_app_data(app_name: str) -> Optional[Dict]: + for app in APPLICATION_DATA: + if app["value"] == app_name: + return app + return None + +# ========================================== +# CINE PROMPT LOGIC +# ========================================== + +async def enhance_cine_prompt( + prompt: str, + application: str, + camera: str, + lens: str, + aspect_ratio: str, + creative_freedom: float, + language: str = "en" +) -> Dict[str, Any]: + """ + Generate the 'CinePrompt' high-fidelity prompt using Gemini. + Replicates the 'CinePromptStudio.jsx' logic on the backend. + """ + import google.generativeai as genai + + if not settings.google_api_key: + return {"note": "Google API Key missing for CinePrompt"} + + try: + genai.configure(api_key=settings.google_api_key) + # Use the most capable model + model = genai.GenerativeModel("gemini-2.0-flash-exp") + + # Gather context + app_data = get_app_data(application) + aspect_ratio_prose = get_aspect_ratio_prose(aspect_ratio) + negative_constraints = get_negative_constraints(application) + smart_fill_context = get_smart_fill_context(application) + camera_physics = get_camera_texture_keywords(camera) + lens_physics = get_lens_physics(lens) + word_count = len(prompt.split()) + + # Construct System Prompt (The 'Master System Prompt') + system_prompt = f"""You are an expert Cinematographer and Optical Physicist. +Your goal is to Create a "Thick Description" prompt that emphasizes TEXTURE, ATMOSPHERE, and LIGHT. + +INPUT VARIABLES: +- Scene: {prompt} +- Application: {application} +- Camera: {camera} +- Lens: {lens} +- Aspect Ratio Prose: {aspect_ratio_prose} +- Creative Freedom: {creative_freedom} +- Camera Physics: {camera_physics} +- Lens Physics: {lens_physics} + +EXECUTION STEPS: + +1. **SMART FILL CHECK:** + - IF input is short ({word_count} words < 10) AND Creative Freedom is High ({creative_freedom} > 0.5): + INVENT details based on Application: {f'"{smart_fill_context}"' if smart_fill_context else 'Use application context'}. + +2. **REALISM vs. STYLE CHECK:** + {'- Application is Architecture, Product, or Documentary: FORCE Deep Focus. Suppress "bokeh". Describe "clinical sharpness."' if app_data and app_data.get('focusType') == 'realism' else '- Application is Narrative/Portrait: ENHANCE "bokeh", "flares", stylistic elements.'} + +3. **SENSORY ENHANCEMENT (THE "DEEP TEXTURE" PASS):** + - **Skin:** If humans are present, describe skin texture (e.g., "visible pores," "slight sweat sheen," "natural imperfections"). + - **Air:** Describe the air quality (e.g., "humid haze," "crisp winter air," "dust motes in light beams"). + - **Light:** Describe the *quality* of light (e.g., "diffused window light," "specular highlights"). + - **Camera Sensor Physics:** Apply these characteristics: "{camera_physics}" + +4. **OPTICAL CHARACTERISTICS:** + - **Lens Physics:** Apply these optical traits: "{lens_physics}" + +5. **ASSEMBLY:** + - Start with {aspect_ratio_prose} + - Expand subject with Smart Fill + - Apply {application} lighting defaults + - Inject Camera Physics (Sensor/Film Stock characteristics) + - Finish with Lens Physics (Optical characteristics) + +OUTPUT FORMAT: +[Aspect Ratio Prose] + [Visual Subject] + [Sensory Atmosphere & Light] + [Camera Physics] + [Lens Physics]. + +STRICT OUTPUT FORMAT: Return ONLY the final visual description text. Do not use Markdown formatting. Do not include conversational filler. +{"Output in " + language if language != "en" else ""} +""" + + # Generate + response = model.generate_content(system_prompt) + enhanced_text = response.text.strip() + + return { + "enhanced_prompt": enhanced_text, + "negative_prompt": negative_constraints or "blurry, low quality, distorted, bad composition", + "style": application, + "technical_params": { + "camera": camera, + "lens": lens, + "aspect_ratio": aspect_ratio + } + } + + except Exception as e: + return {"note": f"CinePrompt enhancement failed: {str(e)}"} diff --git a/backend/app/services/image_generator.py b/backend/app/services/image_generator.py index a485c2f..db4f79d 100644 --- a/backend/app/services/image_generator.py +++ b/backend/app/services/image_generator.py @@ -62,6 +62,16 @@ from app.models.job import Job from app.models.asset import Asset from app.config import settings +def determine_mime_type(data: bytes) -> str: + """Detect MIME type from magic bytes""" + if data.startswith(b'\x89PNG\r\n\x1a\n'): + return 'image/png' + elif data.startswith(b'\xff\xd8'): + return 'image/jpeg' + elif data.startswith(b'RIFF') and data[8:12] == b'WEBP': + return 'image/webp' + return 'image/png' # Default fallback + # Provider configurations IMAGE_PROVIDERS = { "openai": { @@ -94,8 +104,8 @@ IMAGE_PROVIDERS = { }, "nano-banana": { "name": "Nano Banana (Gemini Image)", - "models": ["gemini-2.5-flash-image", "gemini-3-pro-image-preview"], - "default_model": "gemini-2.5-flash-image", + "models": ["gemini-3-pro-image-preview", "gemini-2.0-flash-exp"], + "default_model": "gemini-3-pro-image-preview", "aspect_ratios": ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"], "image_sizes": ["1K", "2K", "4K"], "supports_text_rendering": True, @@ -215,14 +225,18 @@ async def generate(job_id: str): # Fetch reference image if provided ref_id = input_data.get("reference_asset_id") ref_image_data = None + ref_mime_type = "image/png" # Default + if ref_id: ref_asset = db.query(Asset).filter(Asset.id == ref_id).first() if ref_asset and os.path.exists(ref_asset.file_path): with open(ref_asset.file_path, "rb") as f: ref_image_data = f.read() + if ref_asset.mime_type: + ref_mime_type = ref_asset.mime_type - image_data, filename = await _generate_nano_banana(input_data, ref_image_data) - job.api_model = input_data.get("model", "gemini-2.5-flash-image") + image_data, filename = await _generate_nano_banana(input_data, ref_image_data, ref_mime_type) + job.api_model = input_data.get("model", "gemini-3-pro-image-preview") elif provider == "stable-diffusion": image_data, filename = await _generate_stability(input_data) job.api_model = input_data.get("model", "sd3.5-large") @@ -281,6 +295,62 @@ async def generate(job_id: str): job.output_asset_ids = [asset.id] job.output_data = {"asset_id": str(asset.id), "file_path": file_path} + # Log Usage + try: + from app.utils.logging import log_model_usage + # Other imports are available globally + + # Placeholder values for logging, these would ideally be returned by _generate_ functions + # For now, we'll use what's available from input_data and job.api_model + model = job.api_model + width = input_data.get("width") + height = input_data.get("height") + n = input_data.get("n", 1) # Number of images requested + ext = "png" # Default, actual ext should come from _generate_ functions + + # Use existing asset data for logging + output_asset_ids = job.output_asset_ids or [] + output_paths = [] + if job.output_data and "file_path" in job.output_data: + output_paths.append(job.output_data["file_path"]) + + duration_ms = 0 + if job.started_at: + duration_ms = int((datetime.utcnow() - job.started_at).total_seconds() * 1000) + + log_model_usage( + db=db, + job_id=str(job.id), + user_id=str(job.user_id), + module="image_generator", + action="generate", + provider=provider, + model=model, + usage_stats={ + "images": len(output_asset_ids), + "processing_time_ms": duration_ms + }, + request_metadata={ + "prompt": prompt, + "negative_prompt": input_data.get("negative_prompt"), + "size": f"{width}x{height}" if width and height else None, + "n": n + }, + response_metadata={ + "output_assets": [str(a_id) for a_id in output_asset_ids], + "filenames": [os.path.basename(p) for p in output_paths] + } + ) + except Exception as log_e: + logger.error(f"Failed to log image generation usage: {log_e}") + + job.output_asset_ids = output_asset_ids + job.output_data = { + "prompt": prompt, + "provider": provider, + "model": model, + "image_paths": output_paths + } job.progress = 100 job.status = "completed" job.completed_at = datetime.utcnow() @@ -865,10 +935,68 @@ async def _generate_imagen(input_data: dict) -> tuple: return None, None -async def _generate_nano_banana(input_data: dict, image_data: Optional[bytes] = None) -> tuple: +async def _upload_file_http(media_data: bytes, mime_type: str) -> Optional[str]: """ - Generate image using Nano Banana (Gemini 2.5 Flash Image model) - Model: gemini-2.5-flash-image (native image generation) + Upload file using raw HTTP to Google Generative AI Files API + (Alternative to outdated google-generativeai library) + Returns: file_uri + """ + if not settings.google_api_key: + return None + + try: + url = f"https://generativelanguage.googleapis.com/upload/v1beta/files?key={settings.google_api_key}" + num_bytes = len(media_data) + + headers = { + "X-Goog-Upload-Protocol": "resumable", + "X-Goog-Upload-Command": "start", + "X-Goog-Upload-Header-Content-Length": str(num_bytes), + "X-Goog-Upload-Header-Content-Type": mime_type, + "Content-Type": "application/json" + } + + metadata = {"file": {"display_name": f"nano_banana_upload_{uuid4()}"}} + + async with httpx.AsyncClient(timeout=30.0) as client: + # 1. Start Upload + response = await client.post(url, headers=headers, json=metadata) + if response.status_code != 200: + logger.error(f"Failed to start upload: {response.status_code} - {response.text}") + return None + + upload_url = response.headers.get("x-goog-upload-url") + if not upload_url: + logger.error("No upload URL returned") + return None + + # 2. Upload Bytes + headers_upload = { + "Content-Length": str(num_bytes), + "X-Goog-Upload-Offset": "0", + "X-Goog-Upload-Command": "upload, finalize" + } + + response_upload = await client.post(upload_url, headers=headers_upload, content=media_data) + if response_upload.status_code != 200: + logger.error(f"Failed to upload data: {response_upload.status_code} - {response_upload.text}") + return None + + data = response_upload.json() + file_uri = data.get("file", {}).get("uri") + logger.info(f"File uploaded successfully: {file_uri}") + return file_uri + + except Exception as e: + logger.error(f"Upload error: {e}") + return None + + +async def _generate_nano_banana(input_data: dict, image_data: Optional[bytes] = None, mime_type: str = "image/png") -> tuple: + """ + Generate image using Nano Banana (Gemini 3 Pro Image) + Model: gemini-3-pro-image-preview + Uses File API for strict visual context adherence. """ if not settings.google_api_key: raise ValueError("GOOGLE_API_KEY not configured") @@ -877,30 +1005,65 @@ async def _generate_nano_banana(input_data: dict, image_data: Optional[bytes] = if not prompt: raise ValueError("Prompt is required") - # Use gemini-2.5-flash-image model for native image generation - model_name = input_data.get("model", "gemini-2.5-flash-image") + import google.generativeai as genai + import tempfile + import os + + import base64 + + genai.configure(api_key=settings.google_api_key) + + # Use gemini-3-pro-image-preview as requested by user + model_name = input_data.get("model", "gemini-3-pro-image-preview") + if model_name in ["gemini-2.5-flash-image", "gemini-2.0-flash-exp"]: + model_name = "gemini-3-pro-image-preview" + url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent" - # Build payload with text and optional image - # Build payload with image first (context) then text (instruction) + # Build payload - EXACTLY matching PHP structure (Image FIRST, then Text) parts = [] if image_data: - import base64 + # Robust MIME detection + real_mime_type = determine_mime_type(image_data) + + # PHP uses inline_data (snake_case) and base64 + # It forces image/jpeg in PHP. We will do the same to match the reference implementation exactly. + b64_image = base64.b64encode(image_data).decode("utf-8") parts.append({ - "inlineData": { - "mimeType": "image/png", + "inline_data": { + "mime_type": "image/jpeg", "data": b64_image } }) - logger.info(f"Nano Banana: Added reference image ({len(image_data)} bytes) to payload") + logger.info(f"Nano Banana: Added reference image (inline_data base64, {len(b64_image)} chars)") + # Text Instruction Second parts.append({"text": prompt}) + + # Construct generation config + gen_config = { + "responseModalities": ["IMAGE"] + } + + # Map aspect ratio if present + ar_map = { + "1:1": "1:1", "16:9": "16:9", "9:16": "9:16", + "4:3": "4:3", "3:4": "3:4" + } + input_ar = input_data.get("aspect_ratio", "1:1") + if input_ar in ar_map: + gen_config["imageConfig"] = { + "aspectRatio": ar_map[input_ar], + "imageSize": input_data.get("image_size", "2K") # PHP supports imageSize + } + payload = { "contents": [{ "parts": parts - }] + }], + "generationConfig": gen_config } try: @@ -916,29 +1079,46 @@ async def _generate_nano_banana(input_data: dict, image_data: Optional[bytes] = logger.info(f"Nano Banana response status: {response.status_code}") - if response.status_code == 200: - data = response.json() - logger.info(f"Nano Banana response keys: {data.keys() if isinstance(data, dict) else 'not a dict'}") - - # Extract image from response - candidates = data.get("candidates", []) - if candidates and len(candidates) > 0: - content = candidates[0].get("content", {}) - parts = content.get("parts", []) - - for part in parts: - if "inlineData" in part: - inline_data = part["inlineData"] - if "data" in inline_data: - import base64 - image_data = base64.b64decode(inline_data["data"]) - filename = f"nano_banana_{uuid4()}.png" - logger.info(f"✓ Nano Banana generated image: {len(image_data)} bytes") - return image_data, filename - - logger.warning(f"Nano Banana: No image data in response. Response: {str(data)[:200]}") - else: + if response.status_code != 200: logger.error(f"Nano Banana API error: {response.status_code} - {response.text}") + # Try to parse error message + try: + err_json = response.json() + err_msg = err_json.get("error", {}).get("message", response.text) + logger.error(f"Nano Banana Error Details: {err_msg}") + except: + pass + return None, None + + data = response.json() + # logger.info(f"Nano Banana response: {data}") + + # Extract image from response - supporting both inline_data and inlineData + candidates = data.get("candidates", []) + if candidates and len(candidates) > 0: + content = candidates[0].get("content", {}) + parts_resp = content.get("parts", []) + + for part in parts_resp: + # Check snake_case first (PHP match) + if "inline_data" in part: + inline_data = part["inline_data"] + if "data" in inline_data: + img_bytes = base64.b64decode(inline_data["data"]) + filename = f"nano_banana_{uuid4()}.png" + return img_bytes, filename + + # Check camelCase (Standard Gemini) + if "inlineData" in part: + inline_data = part["inlineData"] + if "data" in inline_data: + img_bytes = base64.b64decode(inline_data["data"]) + filename = f"nano_banana_{uuid4()}.png" + return img_bytes, filename + + logger.warning(f"Nano Banana: No image data in response. Content: {content}") + else: + logger.warning(f"Nano Banana: No candidates in response.") except Exception as e: logger.error(f"Nano Banana generation error: {e}") diff --git a/backend/app/services/image_generator.py.bak_nano_v1 b/backend/app/services/image_generator.py.bak_nano_v1 new file mode 100644 index 0000000..db4f79d --- /dev/null +++ b/backend/app/services/image_generator.py.bak_nano_v1 @@ -0,0 +1,1175 @@ +"""Image Generator Service - Multiple AI Providers + +Supported Providers: +- openai: GPT-Image-1 (latest) or DALL-E 3 +- imagen: Google Imagen 4 (Standard, Ultra, Fast) +- nano-banana: Gemini 2.5 Flash Image / Nano Banana Pro +- stable-diffusion: Stability AI SDXL, SD3, image-to-image +- leonardo: Leonardo.ai models +- ideogram: Ideogram v2 with text rendering +- flux: Black Forest Labs Flux Pro + +OpenAI GPT-Image-1 (April 2025): +- model: 'gpt-image-1' (default) or 'dall-e-3' +- quality: 'low', 'medium', 'high' (default high) +- size: 1024x1024, 1024x1536, 1536x1024 +- background: 'transparent', 'opaque', 'auto' (for PNG/WebP) +- output_format: 'png', 'jpeg', 'webp' +- n: 1-10 images per request +- Pricing: ~$0.02 (low), $0.07 (medium), $0.19 (high) per image + +Google Imagen 4 (December 2025): +- model: 'imagen-4.0-generate-001' (default), 'imagen-4.0-ultra-generate-001', 'imagen-4.0-fast-generate-001' +- image_size: '1K', '2K' (Ultra/Standard only) +- aspect_ratio: '1:1', '3:4', '4:3', '9:16', '16:9' +- number_of_images: 1-4 +- enhance_prompt: true/false (LLM prompt enhancement) +- person_generation: 'dont_allow', 'allow_adult', 'allow_all' +- Pricing: $0.02 (Fast), $0.04 (Standard), $0.06 (Ultra) per image + +Nano Banana / Gemini Image (December 2025): +- model: 'gemini-2.5-flash-image' (Nano Banana), 'gemini-3-pro-image-preview' (Nano Banana Pro) +- aspect_ratio: '1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', '21:9' +- image_size: '1K', '2K', '4K' (Pro only for 4K) +- Features: Text rendering, image editing, multi-turn conversation +- Pricing: ~$0.04 per 1MP image + +DALL-E 3 Options: +- quality: 'standard' or 'hd' (default hd) +- style: 'vivid' (hyper-real) or 'natural' (more realistic) +- size: 1024x1024, 1024x1792, 1792x1024 + +Stability AI Options: +- model: sd3.5-large, sd3.5-medium, sd3-large, sd3-medium, sdxl-1.0 +- aspect_ratio: 1:1, 16:9, 9:16, 4:3, 3:4, 21:9, 9:21 +- negative_prompt: What to avoid in generation +- image_to_image: Use input image as starting point +- strength: 0.0-1.0 for image-to-image (how much to change) +- style_preset: enhance, anime, photographic, digital-art, etc. +""" +import httpx +import os +import base64 +import logging +from uuid import uuid4 +from datetime import datetime +from typing import Optional, Dict, Any, Tuple + +logger = logging.getLogger(__name__) + +from app.database import SessionLocal +from app.models.job import Job +from app.models.asset import Asset +from app.config import settings + +def determine_mime_type(data: bytes) -> str: + """Detect MIME type from magic bytes""" + if data.startswith(b'\x89PNG\r\n\x1a\n'): + return 'image/png' + elif data.startswith(b'\xff\xd8'): + return 'image/jpeg' + elif data.startswith(b'RIFF') and data[8:12] == b'WEBP': + return 'image/webp' + return 'image/png' # Default fallback + +# Provider configurations +IMAGE_PROVIDERS = { + "openai": { + "name": "OpenAI Image Generation", + "models": ["gpt-image-1", "dall-e-3", "dall-e-2"], + "default_model": "gpt-image-1", + "gpt-image-1": { + "sizes": ["1024x1024", "1024x1536", "1536x1024"], + "qualities": ["low", "medium", "high"], + "output_formats": ["png", "jpeg", "webp"], + "backgrounds": ["auto", "transparent", "opaque"], + "max_images": 10 + }, + "dall-e-3": { + "sizes": ["1024x1024", "1024x1792", "1792x1024"], + "qualities": ["standard", "hd"], + "styles": ["vivid", "natural"] + }, + "supports_styles": True + }, + "imagen": { + "name": "Google Imagen 4", + "models": ["imagen-4.0-generate-001", "imagen-4.0-ultra-generate-001", "imagen-4.0-fast-generate-001"], + "default_model": "imagen-4.0-generate-001", + "aspect_ratios": ["1:1", "3:4", "4:3", "9:16", "16:9"], + "image_sizes": ["1K", "2K"], + "max_images": 4, + "supports_enhance_prompt": True, + "supports_person_generation": True + }, + "nano-banana": { + "name": "Nano Banana (Gemini Image)", + "models": ["gemini-3-pro-image-preview", "gemini-2.0-flash-exp"], + "default_model": "gemini-3-pro-image-preview", + "aspect_ratios": ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"], + "image_sizes": ["1K", "2K", "4K"], + "supports_text_rendering": True, + "supports_image_editing": True + }, + "stable-diffusion": { + "name": "Stability AI", + "models": ["sd3.5-large", "sd3.5-medium", "sd3-large", "sd3-medium", "sdxl-1.0"], + "default_model": "sd3.5-large", + "aspect_ratios": ["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"], + "supports_img2img": True, + "supports_negative_prompt": True + }, + "leonardo": { + "name": "Leonardo.ai", + "models": { + # Latest Models (2025) + # Phoenix: de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3 (Found in docs) + "de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3": "Leonardo Phoenix 1.0", + "7b592283-e8a7-4c5a-9ba6-d18c31f258b9": "Lucid Origin", + "05ce0082-2d80-4a2d-8653-4d1c85e2418e": "Lucid Realism", + "28aeddf8-bd19-4803-80fc-79602d1a9989": "FLUX.1 Kontext", + "b2614463-296c-462a-9586-aafdb8f00e36": "Flux Dev", + "1dd50843-d653-4516-a8e3-f0238ee453ff": "Flux Schnell", + + # XL Models + "aa77f04e-3eec-4034-9c07-d0f619684628": "Leonardo Kino XL", + "5c232a9e-9061-4777-980a-ddc8e65647c6": "Leonardo Vision XL", + "b24e16ff-06e3-43eb-8d33-4416c2d75876": "Leonardo Lightning XL", + "1e60896f-3c26-4296-8ecc-53e2afecc132": "Leonardo Diffusion XL", + + # Older/Other Support + "16e7060a-803e-4df3-97ee-edcfa5dc9cc8": "SDXL 1.0", + "ac614f96-1082-45bf-be9d-757f2d31c174": "DreamShaper v7", + "e316348f-7773-490e-adcd-46757c738eb7": "Absolute Reality v1.6" + }, + "default_model": "de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3", + # Explicit mapping for Aspect Ratio -> Dimensions (Width x Height) + # These are generally safe for SDXL/Phoenix models + "dimensions": { + "1:1": {"width": 1024, "height": 1024}, + "16:9": {"width": 1472, "height": 832}, + "9:16": {"width": 832, "height": 1472}, + "4:3": {"width": 1248, "height": 928}, # Approx for SDXL + "3:4": {"width": 928, "height": 1248}, + "21:9": {"width": 1536, "height": 640}, # Ultra wide + "9:21": {"width": 640, "height": 1536} + }, + "style_presets": [ + "ANIME", "BOKEH", "CINEMATIC", "CINEMATIC_CLOSEUP", "CREATIVE", + "DYNAMIC", "ENVIRONMENT", "FASHION", "FILM", "FOOD", "GENERAL", + "HDR", "ILLUSTRATION", "LEONARDO", "LONG_EXPOSURE", "MACRO", + "MINIMALISTIC", "MONOCHROME", "MOODY", "NONE", "NEUTRAL", + "PHOTOGRAPHY", "PORTRAIT", "RAYTRACED", "RENDER_3D", "RETRO", + "SKETCH_BW", "SKETCH_COLOR", "STOCK_PHOTO", "VIBRANT", "UNPROCESSED" + ], + "supports_img2img": True, + "supports_character_reference": True, + "supports_style_reference": True + }, + "bria": { + "name": "Bria AI", + "models": ["base", "fast"], + "default_model": "base", + "aspect_ratios": ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9"], + "mediums": ["photography", "art"], + "supports_prompt_enhancement": True, + "base_config": {"steps_num": [20, 50], "guidance_scale": [1, 10]}, + "fast_config": {"steps_num": [4, 10]} + }, + "ideogram": { + "name": "Ideogram", + "models": ["V_2", "V_2_TURBO"], + "supports_text_rendering": True + }, + "flux": { + "name": "Flux Pro", + "models": ["flux-pro-1.1", "flux-dev", "flux-schnell"], + "supports_img2img": True + } +} + +STABILITY_STYLE_PRESETS = [ + "enhance", "anime", "photographic", "digital-art", "comic-book", + "fantasy-art", "line-art", "analog-film", "neon-punk", "isometric", + "low-poly", "origami", "modeling-compound", "cinematic", "3d-model", "pixel-art" +] + + +async def generate(job_id: str): + """Generate image based on provider""" + db = SessionLocal() + try: + job = db.query(Job).filter(Job.id == job_id).first() + if not job: + return + + input_data = job.input_data + provider = input_data.get("provider", "openai") + prompt = input_data.get("prompt", "") + + # Update progress + job.progress = 10 + job.api_provider = provider + db.commit() + + image_data = None + filename = None + + if provider == "openai" or provider == "dalle3": + image_data, filename = await _generate_openai(input_data) + job.api_model = input_data.get("model", "gpt-image-1") + elif provider == "imagen": + image_data, filename = await _generate_imagen(input_data) + job.api_model = input_data.get("model", "imagen-4.0-generate-001") + elif provider == "nano-banana" or provider == "gemini": + # Fetch reference image if provided + ref_id = input_data.get("reference_asset_id") + ref_image_data = None + ref_mime_type = "image/png" # Default + + if ref_id: + ref_asset = db.query(Asset).filter(Asset.id == ref_id).first() + if ref_asset and os.path.exists(ref_asset.file_path): + with open(ref_asset.file_path, "rb") as f: + ref_image_data = f.read() + if ref_asset.mime_type: + ref_mime_type = ref_asset.mime_type + + image_data, filename = await _generate_nano_banana(input_data, ref_image_data, ref_mime_type) + job.api_model = input_data.get("model", "gemini-3-pro-image-preview") + elif provider == "stable-diffusion": + image_data, filename = await _generate_stability(input_data) + job.api_model = input_data.get("model", "sd3.5-large") + elif provider == "leonardo": + image_data, filename = await _generate_leonardo(input_data) + job.api_model = "leonardo" + elif provider == "ideogram": + image_data, filename = await _generate_ideogram(input_data) + job.api_model = "ideogram-v2" + elif provider == "flux": + image_data, filename = await _generate_flux(input_data) + job.api_model = "flux-pro" + elif provider == "bria": + image_data, filename = await _generate_bria(input_data) + job.api_model = input_data.get("model", "base") + elif provider == "runway-image": + image_data, filename = await _generate_runway_image(input_data) + job.api_model = "gen4_image" + else: + raise ValueError(f"Unknown provider: {provider}") + + job.progress = 80 + db.commit() + + # Save image + if image_data: + storage_path = os.path.join(settings.storage_path, "images") + os.makedirs(storage_path, exist_ok=True) + file_path = os.path.join(storage_path, filename) + + with open(file_path, "wb") as f: + f.write(image_data) + + # Create asset + asset = Asset( + user_id=job.user_id, + project_id=job.project_id, + original_filename=filename, + stored_filename=filename, + file_path=file_path, + file_type="image", + mime_type="image/png", + file_size_bytes=len(image_data), + source_module="image_generator", + source_job_id=job.id, + asset_metadata={ + "prompt": prompt, + "provider": provider, + "model": job.api_model + } + ) + db.add(asset) + db.commit() + db.refresh(asset) + + job.output_asset_ids = [asset.id] + job.output_data = {"asset_id": str(asset.id), "file_path": file_path} + + # Log Usage + try: + from app.utils.logging import log_model_usage + # Other imports are available globally + + # Placeholder values for logging, these would ideally be returned by _generate_ functions + # For now, we'll use what's available from input_data and job.api_model + model = job.api_model + width = input_data.get("width") + height = input_data.get("height") + n = input_data.get("n", 1) # Number of images requested + ext = "png" # Default, actual ext should come from _generate_ functions + + # Use existing asset data for logging + output_asset_ids = job.output_asset_ids or [] + output_paths = [] + if job.output_data and "file_path" in job.output_data: + output_paths.append(job.output_data["file_path"]) + + duration_ms = 0 + if job.started_at: + duration_ms = int((datetime.utcnow() - job.started_at).total_seconds() * 1000) + + log_model_usage( + db=db, + job_id=str(job.id), + user_id=str(job.user_id), + module="image_generator", + action="generate", + provider=provider, + model=model, + usage_stats={ + "images": len(output_asset_ids), + "processing_time_ms": duration_ms + }, + request_metadata={ + "prompt": prompt, + "negative_prompt": input_data.get("negative_prompt"), + "size": f"{width}x{height}" if width and height else None, + "n": n + }, + response_metadata={ + "output_assets": [str(a_id) for a_id in output_asset_ids], + "filenames": [os.path.basename(p) for p in output_paths] + } + ) + except Exception as log_e: + logger.error(f"Failed to log image generation usage: {log_e}") + + job.output_asset_ids = output_asset_ids + job.output_data = { + "prompt": prompt, + "provider": provider, + "model": model, + "image_paths": output_paths + } + job.progress = 100 + job.status = "completed" + job.completed_at = datetime.utcnow() + db.commit() + + except Exception as e: + job.status = "failed" + job.error_message = str(e) + db.commit() + finally: + db.close() + + +async def _generate_openai(input_data: dict) -> Tuple[Optional[bytes], Optional[str]]: + """Generate image using OpenAI GPT-Image-1 or DALL-E 3 + + GPT-Image-1 Parameters (default): + - prompt: Text description (max 32000 chars) + - quality: 'low', 'medium', 'high' (default: high) + - size: '1024x1024', '1024x1536', '1536x1024' + - background: 'transparent', 'opaque', 'auto' + - output_format: 'png', 'jpeg', 'webp' (default: png) + - output_compression: 0-100 for jpeg/webp + - moderation: 'auto' or 'low' (less restrictive) + - n: 1-10 images + + DALL-E 3 Parameters: + - prompt: Text description (max 4000 chars) + - quality: 'standard' or 'hd' (default: hd) + - style: 'vivid' or 'natural' (default: vivid) + - size: '1024x1024', '1024x1792', '1792x1024' + """ + prompt = input_data.get("prompt", "") + model = input_data.get("model", "gpt-image-1") + width = input_data.get("width", 1024) + height = input_data.get("height", 1024) + + # Determine size based on width/height + if width > height: + size = "1536x1024" if model == "gpt-image-1" else "1792x1024" + elif height > width: + size = "1024x1536" if model == "gpt-image-1" else "1024x1792" + else: + size = "1024x1024" + + async with httpx.AsyncClient(timeout=180) as client: + if model == "gpt-image-1": + # GPT-Image-1 (latest model) + quality = input_data.get("quality", "high") + background = input_data.get("background", "auto") + output_format = input_data.get("output_format", "png") + output_compression = input_data.get("output_compression", 100) + moderation = input_data.get("moderation", "auto") + n = min(input_data.get("n", 1), 10) + + payload = { + "model": "gpt-image-1", + "prompt": prompt, + "size": size, + "quality": quality, + "n": n + } + + # Add optional parameters + if background != "auto": + payload["background"] = background + if output_format != "png": + payload["output_format"] = output_format + if output_format in ["jpeg", "webp"] and output_compression != 100: + payload["output_compression"] = output_compression + if moderation != "auto": + payload["moderation"] = moderation + + response = await client.post( + "https://api.openai.com/v1/images/generations", + headers={ + "Authorization": f"Bearer {settings.openai_api_key}", + "Content-Type": "application/json" + }, + json=payload + ) + response.raise_for_status() + data = response.json() + + if data.get("data") and len(data["data"]) > 0: + # GPT-Image-1 always returns base64 + b64_image = data["data"][0].get("b64_json") + if b64_image: + ext = output_format if output_format in ["png", "jpeg", "webp"] else "png" + filename = f"gptimage1_{quality}_{uuid4()}.{ext}" + return base64.b64decode(b64_image), filename + + else: + # DALL-E 3 (or DALL-E 2) + quality = input_data.get("quality", "hd") + style = input_data.get("style", "vivid") + + payload = { + "model": model, + "prompt": prompt, + "size": size, + "n": 1, + "response_format": "b64_json" + } + + # DALL-E 3 specific options + if model == "dall-e-3": + payload["quality"] = quality + payload["style"] = style + + response = await client.post( + "https://api.openai.com/v1/images/generations", + headers={ + "Authorization": f"Bearer {settings.openai_api_key}", + "Content-Type": "application/json" + }, + json=payload + ) + response.raise_for_status() + data = response.json() + + if data.get("data") and len(data["data"]) > 0: + b64_image = data["data"][0].get("b64_json") + if b64_image: + filename = f"{model.replace('-', '')}_{style if model == 'dall-e-3' else 'gen'}_{uuid4()}.png" + return base64.b64decode(b64_image), filename + + return None, None + + +async def _generate_stability(input_data: dict, input_image_data: Optional[bytes] = None) -> Tuple[Optional[bytes], Optional[str]]: + """Generate image using Stability AI + + Parameters: + - prompt: Text description (required) + - negative_prompt: What to avoid in generation + - model: 'sd3.5-large', 'sd3.5-medium', 'sd3-large', 'sd3-medium' + - aspect_ratio: '1:1', '16:9', '9:16', '4:3', '3:4', '21:9', '9:21' + - seed: Optional seed for reproducibility (0-4294967294) + - mode: 'text-to-image' or 'image-to-image' + """ + if not settings.stability_api_key: + raise ValueError("Stability API key not configured") + + prompt = input_data.get("prompt", "") + if not prompt: + raise ValueError("Prompt is required") + + negative_prompt = input_data.get("negative_prompt", "") + model = input_data.get("model", "sd3.5-large") + aspect_ratio = input_data.get("aspect_ratio", "1:1") + seed = input_data.get("seed") + output_format = input_data.get("output_format", "png") + + async with httpx.AsyncClient(timeout=180) as client: + # Build multipart form data - Stability requires multipart/form-data + files = { + "prompt": (None, prompt), + "mode": (None, "text-to-image"), + "model": (None, model), + "aspect_ratio": (None, aspect_ratio), + "output_format": (None, output_format), + } + + if negative_prompt: + files["negative_prompt"] = (None, negative_prompt) + + if seed is not None: + files["seed"] = (None, str(seed)) + + # Image-to-image mode + if input_image_data: + files["mode"] = (None, "image-to-image") + files["strength"] = (None, str(input_data.get("strength", 0.7))) + files["image"] = ("input.png", input_image_data, "image/png") + + try: + response = await client.post( + "https://api.stability.ai/v2beta/stable-image/generate/sd3", + headers={ + "Authorization": f"Bearer {settings.stability_api_key}", + "Accept": "image/*" + }, + files=files + ) + + if response.status_code != 200: + error_text = response.text + logger.error(f"Stability AI error {response.status_code}: {error_text}") + raise Exception(f"Stability AI error: {error_text}") + + model_short = model.replace("-", "").replace(".", "") + filename = f"stability_{model_short}_{uuid4()}.{output_format}" + return response.content, filename + + except httpx.HTTPStatusError as e: + logger.error(f"Stability AI HTTP error: {e.response.status_code} - {e.response.text}") + raise + except Exception as e: + logger.error(f"Stability AI generation error: {e}") + raise + + +async def _generate_leonardo(input_data: dict) -> tuple: + """ + Generate image using Leonardo AI + + Parameters: + - prompt: Text description + - model: Leonardo model ID (default: Phoenix) + - width: Image width (512, 768, 1024, 1472) + - height: Image height (512, 768, 832, 1024) + - preset_style: Style preset (ANIME, CINEMATIC, PHOTOGRAPHY, etc.) + - num_images: Number of images to generate + - guidance_scale: How closely to follow prompt (7-15) + - num_inference_steps: Quality/speed tradeoff (30-60) + - negative_prompt: What to avoid + - init_image_id: For image-to-image + - init_strength: How much to change input image (0.1-0.9) + """ + # Default model is Leonardo Phoenix + model_id = input_data.get("model", "6b645e3a-d64f-4341-a6d8-7a3690fbf042") + + # Determine dimensions from aspect ratio + aspect_ratio = input_data.get("aspect_ratio", "1:1") + dims = IMAGE_PROVIDERS["leonardo"]["dimensions"].get(aspect_ratio, {"width": 1024, "height": 1024}) + + # Allow explicit override if provided (and valid int) + width = int(input_data.get("width", dims["width"])) + height = int(input_data.get("height", dims["height"])) + + # Build request payload + payload = { + "prompt": input_data.get("prompt"), + "modelId": model_id, + "width": width, + "height": height, + "num_images": min(input_data.get("num_images", 1), 4), # Cap at 4 for safety + "public": input_data.get("public", False) + } + + # Alchemy / PhotoReal Logic + # Phoenix (de7d3faf...) does NOT support Alchemy or PhotoReal (it has its own pipeline). + # Sending 'alchemy': True with Phoenix causes "Invalid response from authorization hook" (500). + + is_phoenix = model_id == "de7d3faf-762f-48e0-b3b7-9d0ac3a3fcf3" + + alchemy = input_data.get("alchemy", False) + photo_real = input_data.get("photo_real", False) + + if is_phoenix: + # Force disable legacy features for Phoenix + alchemy = False + photo_real = False + # Phoenix might support 'elements' or other new params, but definitely not legacy alchemy. + + if alchemy: + payload["alchemy"] = True + payload["contrastRatio"] = input_data.get("contrast_ratio", 0.5) + + if photo_real: + payload["photoReal"] = True + payload["photoRealStrength"] = input_data.get("photo_real_strength", 0.5) + # If PhotoReal is on, we remove modelId to rely on system default for PhotoReal. + if "modelId" in payload: + del payload["modelId"] + + # Log payload for debugging + logger.info(f"Leonardo Payload (Model: {model_id}): {payload}") + + if input_data.get("preset_style") and input_data.get("preset_style") != "NONE": + payload["presetStyle"] = input_data.get("preset_style") + + if input_data.get("guidance_scale"): + payload["guidance_scale"] = int(input_data.get("guidance_scale")) + + # Image-to-image / Reference + # Modern Leonardo uses 'imagePrompts' array for reference. + # 'init_image_id' is legacy but might still work for some models. + init_image_id = input_data.get("init_image_id") + if init_image_id: + # Legacy support + payload["init_image_id"] = init_image_id + payload["init_strength"] = float(input_data.get("init_strength", 0.5)) + + + async with httpx.AsyncClient(timeout=180) as client: + # Create generation + response = await client.post( + "https://cloud.leonardo.ai/api/rest/v1/generations", + headers={ + "Authorization": f"Bearer {settings.leonardo_api_key}", + "Content-Type": "application/json" + }, + json=payload + ) + if response.status_code != 200: + error_text = response.text + logger.error(f"Leonardo API error {response.status_code}: {error_text}") + raise ValueError(f"Leonardo API returned {response.status_code}: {error_text}") + + data = response.json() + logger.info(f"Leonardo response: {data}") + + # Poll for result + generation_id = data.get("sdGenerationJob", {}).get("generationId") + if generation_id: + import asyncio + for _ in range(90): # Wait up to 3 minutes + await asyncio.sleep(2) + status_response = await client.get( + f"https://cloud.leonardo.ai/api/rest/v1/generations/{generation_id}", + headers={"Authorization": f"Bearer {settings.leonardo_api_key}"} + ) + status_data = status_response.json() + generation = status_data.get("generations_by_pk", {}) + status = generation.get("status") + + if status == "COMPLETE": + images = generation.get("generated_images", []) + if images: + image_url = images[0].get("url") + if image_url: + img_response = await client.get(image_url) + model_name = IMAGE_PROVIDERS["leonardo"]["models"].get(model_id, "leonardo") + filename = f"leonardo_{model_name.replace(' ', '_').lower()}_{uuid4()}.png" + return img_response.content, filename + elif status == "FAILED": + raise Exception("Leonardo generation failed") + + return None, None + + +async def _generate_bria(input_data: dict) -> tuple: + """ + Generate image using Bria AI + + Parameters: + - prompt: Text description + - model: 'base' (Bria 2.3 Base) or 'fast' (Bria 2.3 Fast) + - aspect_ratio: Image aspect ratio + - medium: 'photography' or 'art' + - prompt_enhancement: Enable AI prompt enhancement + - steps_num: Number of inference steps + - guidance_scale: How closely to follow prompt + - negative_prompt: What to avoid + """ + model = input_data.get("model", "base") + base_url = "https://engine.prod.bria-api.com/v1/text-to-image" + + # Build request payload + payload = { + "prompt": input_data.get("prompt"), + "num_results": 1 + } + + # Add aspect ratio + if input_data.get("aspect_ratio"): + payload["aspect_ratio"] = input_data.get("aspect_ratio") + + # Add medium + if input_data.get("medium"): + payload["medium"] = input_data.get("medium") + + # Add prompt enhancement + if input_data.get("prompt_enhancement"): + payload["prompt_enhancement"] = True + + # Add negative prompt + if input_data.get("negative_prompt"): + payload["negative_prompt"] = input_data.get("negative_prompt") + + # Model-specific parameters + if model == "base": + url = f"{base_url}/base" + if input_data.get("steps_num"): + payload["steps_num"] = input_data.get("steps_num") + if input_data.get("guidance_scale"): + payload["text_guidance_scale"] = input_data.get("guidance_scale") + else: + url = f"{base_url}/fast" + if input_data.get("steps_num"): + payload["steps_num"] = min(input_data.get("steps_num"), 10) + + async with httpx.AsyncClient(timeout=120) as client: + response = await client.post( + url, + headers={ + "api_token": settings.bria_api_key, + "Content-Type": "application/json" + }, + json=payload + ) + response.raise_for_status() + data = response.json() + + # Get the result + result = data.get("result", []) + if result and len(result) > 0: + image_url = result[0].get("urls", {}).get("url") + if image_url: + img_response = await client.get(image_url) + filename = f"bria_{model}_{uuid4()}.png" + return img_response.content, filename + + return None, None + + +async def _generate_ideogram(input_data: dict) -> tuple: + """Generate image using Ideogram""" + async with httpx.AsyncClient(timeout=120) as client: + response = await client.post( + "https://api.ideogram.ai/generate", + headers={ + "Api-Key": settings.ideogram_api_key, + "Content-Type": "application/json" + }, + json={ + "image_request": { + "prompt": input_data.get("prompt"), + "model": "V_2", + "aspect_ratio": "ASPECT_1_1" + } + } + ) + response.raise_for_status() + data = response.json() + + if data.get("data") and len(data["data"]) > 0: + image_url = data["data"][0].get("url") + if image_url: + img_response = await client.get(image_url) + filename = f"ideogram_{uuid4()}.png" + return img_response.content, filename + + return None, None + + +async def _generate_flux(input_data: dict) -> tuple: + """Generate image using Flux (Black Forest Labs) + + Note: Requires FLUX_API_KEY from https://api.bfl.ml/ + May require paid account for flux-pro-1.1 model + """ + if not settings.flux_api_key: + raise ValueError("FLUX_API_KEY not configured") + + async with httpx.AsyncClient(timeout=120) as client: + try: + response = await client.post( + "https://api.bfl.ml/v1/flux-pro-1.1", + headers={ + "x-key": settings.flux_api_key, + "Content-Type": "application/json" + }, + json={ + "prompt": input_data.get("prompt"), + "width": input_data.get("width", 1024), + "height": input_data.get("height", 1024) + } + ) + + if response.status_code == 403: + logger.error("Flux API 403: Invalid API key or insufficient permissions") + raise ValueError("Flux API key is invalid or your account doesn't have access to flux-pro-1.1") + + response.raise_for_status() + data = response.json() + + # Poll for result + request_id = data.get("id") + if request_id: + import asyncio + for _ in range(60): + await asyncio.sleep(2) + status_response = await client.get( + f"https://api.bfl.ml/v1/get_result?id={request_id}", + headers={"x-key": settings.flux_api_key} + ) + status_data = status_response.json() + if status_data.get("status") == "Ready": + image_url = status_data.get("result", {}).get("sample") + if image_url: + img_response = await client.get(image_url) + filename = f"flux_{uuid4()}.png" + return img_response.content, filename + + except Exception as e: + logger.error(f"Flux generation error: {e}") + raise + + return None, None + + +async def _generate_gemini(input_data: dict) -> tuple: + """Generate image using Google Gemini""" + import google.generativeai as genai + + genai.configure(api_key=settings.google_api_key) + model = genai.GenerativeModel("gemini-2.0-flash-exp") + + response = model.generate_content( + input_data.get("prompt"), + generation_config=genai.types.GenerationConfig( + response_mime_type="image/png" + ) + ) + + if response.candidates and response.candidates[0].content.parts: + for part in response.candidates[0].content.parts: + if hasattr(part, 'inline_data') and part.inline_data: + filename = f"gemini_{uuid4()}.png" + return part.inline_data.data, filename + + return None, None + + +async def _generate_imagen(input_data: dict) -> tuple: + """ + Generate image using Google Imagen 3 via REST API + + Note: Imagen 3 is accessed through the generativelanguage API with API key. + + Parameters: + - prompt: Text description of the image + - aspect_ratio: "1:1", "3:4", "4:3", "9:16", "16:9" + - number_of_images: 1-4 + - negative_prompt: What to avoid in the image + """ + if not settings.google_api_key: + raise ValueError("GOOGLE_API_KEY not configured") + + prompt = input_data.get("prompt", "") + negative_prompt = input_data.get("negative_prompt", "") + aspect_ratio = input_data.get("aspect_ratio", "1:1") + number_of_images = min(input_data.get("number_of_images", 1), 4) + + # Use the Generative Language API for Imagen 4 + model_name = input_data.get("model", "imagen-4.0-generate-001") + url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:predict" + + payload = { + "instances": [{"prompt": prompt}], + "parameters": { + "sampleCount": number_of_images, + "aspectRatio": aspect_ratio, + } + } + + if negative_prompt: + payload["instances"][0]["negativePrompt"] = negative_prompt + + try: + async with httpx.AsyncClient(timeout=120.0) as client: + response = await client.post( + url, + headers={ + "Content-Type": "application/json", + "x-goog-api-key": settings.google_api_key + }, + json=payload + ) + + if response.status_code == 200: + data = response.json() + predictions = data.get("predictions", []) + if predictions and predictions[0].get("bytesBase64Encoded"): + image_data = base64.b64decode(predictions[0]["bytesBase64Encoded"]) + filename = f"imagen3_{uuid4()}.png" + return image_data, filename + else: + logger.warning(f"Imagen API error: {response.status_code} - {response.text}") + # Fall back to Nano Banana (Gemini native) + logger.info("Falling back to Nano Banana (Gemini native image generation)") + return await _generate_nano_banana(input_data) + + except Exception as e: + logger.error(f"Imagen generation error: {e}") + # Fallback to Gemini native image generation + return await _generate_nano_banana(input_data) + + return None, None + + +async def _upload_file_http(media_data: bytes, mime_type: str) -> Optional[str]: + """ + Upload file using raw HTTP to Google Generative AI Files API + (Alternative to outdated google-generativeai library) + Returns: file_uri + """ + if not settings.google_api_key: + return None + + try: + url = f"https://generativelanguage.googleapis.com/upload/v1beta/files?key={settings.google_api_key}" + num_bytes = len(media_data) + + headers = { + "X-Goog-Upload-Protocol": "resumable", + "X-Goog-Upload-Command": "start", + "X-Goog-Upload-Header-Content-Length": str(num_bytes), + "X-Goog-Upload-Header-Content-Type": mime_type, + "Content-Type": "application/json" + } + + metadata = {"file": {"display_name": f"nano_banana_upload_{uuid4()}"}} + + async with httpx.AsyncClient(timeout=30.0) as client: + # 1. Start Upload + response = await client.post(url, headers=headers, json=metadata) + if response.status_code != 200: + logger.error(f"Failed to start upload: {response.status_code} - {response.text}") + return None + + upload_url = response.headers.get("x-goog-upload-url") + if not upload_url: + logger.error("No upload URL returned") + return None + + # 2. Upload Bytes + headers_upload = { + "Content-Length": str(num_bytes), + "X-Goog-Upload-Offset": "0", + "X-Goog-Upload-Command": "upload, finalize" + } + + response_upload = await client.post(upload_url, headers=headers_upload, content=media_data) + if response_upload.status_code != 200: + logger.error(f"Failed to upload data: {response_upload.status_code} - {response_upload.text}") + return None + + data = response_upload.json() + file_uri = data.get("file", {}).get("uri") + logger.info(f"File uploaded successfully: {file_uri}") + return file_uri + + except Exception as e: + logger.error(f"Upload error: {e}") + return None + + +async def _generate_nano_banana(input_data: dict, image_data: Optional[bytes] = None, mime_type: str = "image/png") -> tuple: + """ + Generate image using Nano Banana (Gemini 3 Pro Image) + Model: gemini-3-pro-image-preview + Uses File API for strict visual context adherence. + """ + if not settings.google_api_key: + raise ValueError("GOOGLE_API_KEY not configured") + + prompt = input_data.get("prompt", "") + if not prompt: + raise ValueError("Prompt is required") + + import google.generativeai as genai + import tempfile + import os + + import base64 + + genai.configure(api_key=settings.google_api_key) + + # Use gemini-3-pro-image-preview as requested by user + model_name = input_data.get("model", "gemini-3-pro-image-preview") + if model_name in ["gemini-2.5-flash-image", "gemini-2.0-flash-exp"]: + model_name = "gemini-3-pro-image-preview" + + url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent" + + # Build payload - EXACTLY matching PHP structure (Image FIRST, then Text) + parts = [] + + if image_data: + # Robust MIME detection + real_mime_type = determine_mime_type(image_data) + + # PHP uses inline_data (snake_case) and base64 + # It forces image/jpeg in PHP. We will do the same to match the reference implementation exactly. + + b64_image = base64.b64encode(image_data).decode("utf-8") + parts.append({ + "inline_data": { + "mime_type": "image/jpeg", + "data": b64_image + } + }) + logger.info(f"Nano Banana: Added reference image (inline_data base64, {len(b64_image)} chars)") + + # Text Instruction Second + parts.append({"text": prompt}) + + # Construct generation config + gen_config = { + "responseModalities": ["IMAGE"] + } + + # Map aspect ratio if present + ar_map = { + "1:1": "1:1", "16:9": "16:9", "9:16": "9:16", + "4:3": "4:3", "3:4": "3:4" + } + input_ar = input_data.get("aspect_ratio", "1:1") + if input_ar in ar_map: + gen_config["imageConfig"] = { + "aspectRatio": ar_map[input_ar], + "imageSize": input_data.get("image_size", "2K") # PHP supports imageSize + } + + payload = { + "contents": [{ + "parts": parts + }], + "generationConfig": gen_config + } + + try: + async with httpx.AsyncClient(timeout=120.0) as client: + response = await client.post( + url, + headers={ + "Content-Type": "application/json", + "x-goog-api-key": settings.google_api_key + }, + json=payload + ) + + logger.info(f"Nano Banana response status: {response.status_code}") + + if response.status_code != 200: + logger.error(f"Nano Banana API error: {response.status_code} - {response.text}") + # Try to parse error message + try: + err_json = response.json() + err_msg = err_json.get("error", {}).get("message", response.text) + logger.error(f"Nano Banana Error Details: {err_msg}") + except: + pass + return None, None + + data = response.json() + # logger.info(f"Nano Banana response: {data}") + + # Extract image from response - supporting both inline_data and inlineData + candidates = data.get("candidates", []) + if candidates and len(candidates) > 0: + content = candidates[0].get("content", {}) + parts_resp = content.get("parts", []) + + for part in parts_resp: + # Check snake_case first (PHP match) + if "inline_data" in part: + inline_data = part["inline_data"] + if "data" in inline_data: + img_bytes = base64.b64decode(inline_data["data"]) + filename = f"nano_banana_{uuid4()}.png" + return img_bytes, filename + + # Check camelCase (Standard Gemini) + if "inlineData" in part: + inline_data = part["inlineData"] + if "data" in inline_data: + img_bytes = base64.b64decode(inline_data["data"]) + filename = f"nano_banana_{uuid4()}.png" + return img_bytes, filename + + logger.warning(f"Nano Banana: No image data in response. Content: {content}") + else: + logger.warning(f"Nano Banana: No candidates in response.") + + except Exception as e: + logger.error(f"Nano Banana generation error: {e}") + import traceback + traceback.print_exc() + + return None, None + + +async def _generate_runway_image(input_data: dict) -> tuple: + """Generate image using Runway Gen-4 Image""" + if not settings.runway_api_key: + raise ValueError("RUNWAY_API_KEY not configured") + + prompt = input_data.get("prompt", "") + ratio = input_data.get("ratio", "1360:768") + seed = input_data.get("seed") + + payload = {"model": "gen4_image", "promptText": prompt, "ratio": ratio if ratio in ["1024:1024", "1360:768"] else "1360:768"} + if seed and seed > 0: + payload["seed"] = seed + + async with httpx.AsyncClient(timeout=180) as client: + response = await client.post( + "https://api.dev.runwayml.com/v1/text_to_image", + headers={ + "Authorization": f"Bearer {settings.runway_api_key}", + "Content-Type": "application/json", + "X-Runway-Version": "2024-11-06" + }, + json=payload + ) + response.raise_for_status() + result = response.json() + task_id = result.get("id") + + # Poll for completion + import asyncio + for _ in range(90): + await asyncio.sleep(2) + status_resp = await client.get( + f"https://api.dev.runwayml.com/v1/tasks/{task_id}", + headers={"Authorization": f"Bearer {settings.runway_api_key}", "X-Runway-Version": "2024-11-06"} + ) + status_data = status_resp.json() + if status_data.get("status") == "SUCCEEDED": + url = status_data.get("output", [None])[0] + if url: + img_resp = await client.get(url) + return img_resp.content, f"runway_gen4_{uuid4()}.png" + elif status_data.get("status") == "FAILED": + raise ValueError(f"Runway failed: {status_data.get('error')}") + + return None, None diff --git a/backend/app/services/image_upscaler.py b/backend/app/services/image_upscaler.py index 2e8bdc5..86fa74f 100644 --- a/backend/app/services/image_upscaler.py +++ b/backend/app/services/image_upscaler.py @@ -328,6 +328,45 @@ async def upscale(job_id: str): job.output_asset_ids = [output_asset.id] job.output_data = {"asset_id": str(output_asset.id), "file_path": file_path} logger.info(f"✓ Topaz upscale completed: Asset {output_asset.id} created") + + # Log Usage + try: + from app.utils.logging import log_model_usage + + # Topaz typically charges per megapixel or image + # We seeded it as 'cost_per_image' ($0.20 buffer) for 'topaz' provider + + # Calculate duration + duration_ms = 0 + if job.started_at: + duration_ms = int((datetime.utcnow() - job.started_at).total_seconds() * 1000) + + log_model_usage( + db=db, + job_id=str(job.id), + user_id=str(job.user_id), + module="image_upscaler", + action="upscale", + provider="topaz", + model=model, # e.g. "Proteus" + usage_stats={ + "images": 1, + "processing_time_ms": duration_ms + }, + request_metadata={ + "input_file": input_asset.original_filename, + "scale": scale, + "original_dims": f"{original_width}x{original_height}", + "output_dims": f"{output_width}x{output_height}" + }, + response_metadata={ + "output_file": filename, + "topaz_request_id": request_id + } + ) + except Exception as log_e: + logger.error(f"Failed to log usage stats: {log_e}") + job.progress = 100 job.status = "completed" job.completed_at = datetime.utcnow() diff --git a/backend/app/services/prompt_studio.py b/backend/app/services/prompt_studio.py index f26daf0..8ee0d36 100644 --- a/backend/app/services/prompt_studio.py +++ b/backend/app/services/prompt_studio.py @@ -256,7 +256,13 @@ async def enhance( provider: str = "openai", include_negative: bool = True, include_technical: bool = True, - language: str = "en" + language: str = "en", + # Advanced CinePrompt params + application: Optional[str] = None, + camera: Optional[str] = None, + lens: Optional[str] = None, + aspect_ratio: Optional[str] = None, + creative_freedom: float = 0.3 ) -> dict: """Enhance a prompt using AI @@ -267,10 +273,28 @@ async def enhance( include_negative: Whether to generate negative prompts include_technical: Whether to include technical parameters language: Output language code + application: CinePrompt Application preset (triggers CinePrompt mode if set) + camera: CinePrompt Camera + lens: CinePrompt Lens + aspect_ratio: CinePrompt Aspect Ratio + creative_freedom: CinePrompt Creative Freedom (0.0 - 1.0) Returns: Dictionary with enhanced prompt, negative prompt, and metadata """ + + # Check if this is a CinePrompt request (Application/Camera set) + if application or camera: + from app.services import cine_prompt_studio + return await cine_prompt_studio.enhance_cine_prompt( + prompt=prompt, + application=application or "Custom", + camera=camera or "Arri Alexa 35", + lens=lens or "Cooke S7/i", + aspect_ratio=aspect_ratio or "16:9", + creative_freedom=creative_freedom, + language=language + ) # Get style configuration style_config = STYLE_CONFIGS.get(style, STYLE_CONFIGS["cinematic"]) @@ -512,3 +536,14 @@ def get_style_info(style: str) -> Optional[Dict[str, Any]]: "technical": config.get("technical", {}), "negative_base": config.get("negative_base", "") } + + +def get_cine_options() -> Dict[str, Any]: + """Get all CinePrompt Studio options""" + from app.services import cine_prompt_studio + return { + "cameras": cine_prompt_studio.CAMERA_DATA, + "lenses": cine_prompt_studio.LENS_DATA, + "applications": cine_prompt_studio.APPLICATION_DATA, + "aspect_ratios": sorted(list(cine_prompt_studio.ASPECT_RATIO_PROSE.keys())) + } diff --git a/backend/app/services/subtitle_processor.py b/backend/app/services/subtitle_processor.py index c49a8bb..c1aa745 100644 --- a/backend/app/services/subtitle_processor.py +++ b/backend/app/services/subtitle_processor.py @@ -129,6 +129,9 @@ FONT_PRESETS = { def get_available_fonts(): """Get list of available fonts on the system""" try: + # Check if fc-list exists + subprocess.check_call(['which', 'fc-list'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + output = subprocess.check_output(['fc-list', ':', 'family'], stderr=subprocess.DEVNULL).decode('utf-8') fonts = set() for line in output.splitlines(): @@ -141,9 +144,32 @@ def get_available_fonts(): return [ 'Arial', 'Helvetica', 'Times New Roman', 'Courier New', 'Verdana', 'Georgia', 'Palatino', 'Garamond', 'Comic Sans MS', 'Trebuchet MS', - 'Arial Black', 'Impact', 'Tahoma', 'Roboto', 'Open Sans' + 'Arial Black', 'Impact', 'Tahoma', 'Roboto', 'Open Sans', 'DejaVu Sans' ] +def _check_font_availability(font_name: str) -> str: + """Check if font exists, return validated font or fallback""" + available = get_available_fonts() + + # Direct match + if font_name in available: + return font_name + + # Case insensitive match + font_lower = font_name.lower() + for f in available: + if f.lower() == font_lower: + return f + + # Fallback to defaults if specific font not found + defaults = ['Arial', 'Helvetica', 'DejaVu Sans', 'FreeSans'] + for default in defaults: + if default in available: + return default + + # Last resort (ffmpeg typically has a default sans serif) + return 'Arial' + def get_subtitle_config(): """Return available configuration options for subtitles""" @@ -217,14 +243,19 @@ async def process(job_id: str): font_preset = input_data.get("font_preset") if font_preset and font_preset in FONT_PRESETS: preset = FONT_PRESETS[font_preset] - font = input_data.get("font", preset['font']) + font_req = input_data.get("font", preset['font']) font_size = input_data.get("font_size", preset['size']) outline_width = input_data.get("outline_width", preset['outline']) else: - font = input_data.get("font", "Arial") + font_req = input_data.get("font", "Arial") font_size = input_data.get("font_size", 24) outline_width = input_data.get("outline_width", 2) + # Validate font availability + font = _check_font_availability(font_req) + if font != font_req: + logger.warning(f"Font '{font_req}' not found, falling back to '{font}'") + text_color = input_data.get("text_color", "white") outline_color = input_data.get("outline_color", "black") background_color = input_data.get("background_color") @@ -284,6 +315,90 @@ async def process(job_id: str): subtitle_content = _generate_srt(segments) subtitle_ext = "srt" + # Helper validation for font + font = _check_font_availability(font_req) + + # Check for provided subtitle file + subtitle_asset_id = input_data.get("subtitle_asset_id") + + segments = [] + detected_language = source_language + subtitle_content = "" + transcription_result = {} # To store result for output_data + + if subtitle_asset_id: + logger.info("Using provided subtitle asset", asset_id=subtitle_asset_id) + sub_asset = db.query(Asset).filter(Asset.id == subtitle_asset_id).first() + if not sub_asset or not os.path.exists(sub_asset.file_path): + raise ValueError("Provided subtitle asset not found") + + with open(sub_asset.file_path, "r", encoding="utf-8") as f: + subtitle_content = f.read() + + # Simple assumption: Input is SRT if we are parsing it + # In future we might check extension + segments = _parse_srt(subtitle_content) + job.progress = 50 # Skip transcription + transcription_result = {"text": "Imported from SRT", "language": source_language} + + else: + # Transcribe with Whisper + job.api_provider = f"whisper-{whisper_model}" + + import whisper + model = whisper.load_model(whisper_model) + + # Extract audio to temp file + temp_audio = os.path.join(settings.storage_path, "temp", f"temp_{uuid4()}.wav") + os.makedirs(os.path.dirname(temp_audio), exist_ok=True) + + try: + subprocess.run([ + "ffmpeg", "-i", input_asset.file_path, + "-ar", "16000", + "-ac", "1", + "-c:a", "pcm_s16le", + "-y", temp_audio + ], check=True, capture_output=True) + + job.progress = 20 + db.commit() + + # Transcribe + transcribe_options = { + "language": None if source_language == "auto" else source_language, + "verbose": False, + "word_timestamps": word_timestamps + } + result = model.transcribe(temp_audio, **transcribe_options) + + segments = result["segments"] + detected_language = result["language"] + transcription_result = result + + # Generate initial subtitle content + if output_format == "vtt": + subtitle_content = _generate_vtt(segments, word_timestamps) + elif output_format == "ass": + subtitle_content = _generate_ass(segments, font, font_size, text_color, + outline_color, outline_width, position, alignment, + margin_v, margin_h, shadow, bold, italic, + background_color, background_opacity) + else: + subtitle_content = _generate_srt(segments) + + finally: + if os.path.exists(temp_audio): + os.remove(temp_audio) + + job.progress = 60 + db.commit() + + # Update subtitle extension based on format + subtitle_ext = "srt" + if output_format == "vtt": subtitle_ext = "vtt" + elif output_format == "ass": subtitle_ext = "ass" + # Translate if needed translated_content = None if target_language: @@ -323,7 +438,18 @@ async def process(job_id: str): output_assets = [] # Save original subtitle file - subtitle_filename = f"subtitles_{uuid4()}.{subtitle_ext}" + # Use simple naming convention + base_name = os.path.splitext(input_asset.original_filename)[0] + # Clean basename of special chars if needed, but for now just use it + + subtitle_filename = f"{base_name}-subtitles-{detected_language}.{subtitle_ext}" + subtitle_path = os.path.join(settings.storage_path, "documents", subtitle_filename) + # Handle duplicate filenames by appending uuid if needed, or just overwrite since uuid approach was replaced + # Actually user wants to "see whats been done", so readable names are key. + # If we overwrite, that's fine as per previous discussion, but unique names prevent collision in shared storage? + # Let's stick to unique names but readable: base-subtitles-lang-uuid_short.ext + short_id = str(uuid4())[:8] + subtitle_filename = f"{base_name}-subtitles-{detected_language}-{short_id}.{subtitle_ext}" subtitle_path = os.path.join(settings.storage_path, "documents", subtitle_filename) os.makedirs(os.path.dirname(subtitle_path), exist_ok=True) @@ -357,7 +483,8 @@ async def process(job_id: str): # Save translated subtitle if exists trans_path = None if translated_content: - trans_filename = f"subtitles_translated_{uuid4()}.{subtitle_ext}" + short_id = str(uuid4())[:8] + trans_filename = f"{base_name}-subtitles-{target_language}-{short_id}.{subtitle_ext}" trans_path = os.path.join(settings.storage_path, "documents", trans_filename) with open(trans_path, "w", encoding="utf-8") as f: @@ -392,7 +519,10 @@ async def process(job_id: str): # Burn subtitles if requested if burn_subtitles: burn_path = trans_path if translated_content else subtitle_path - output_filename = f"subtitled_{uuid4()}.mp4" + # Burned video + lang_code = target_language if translated_content else detected_language + short_id = str(uuid4())[:8] + output_filename = f"{base_name}-subtitled-{lang_code}-{short_id}.mp4" output_path = os.path.join(settings.storage_path, "videos", output_filename) os.makedirs(os.path.dirname(output_path), exist_ok=True) @@ -403,12 +533,16 @@ async def process(job_id: str): shadow, bold, italic, background_color, background_opacity ) - subprocess.run([ - "ffmpeg", "-i", input_asset.file_path, - "-vf", subtitle_filter, - "-c:a", "copy", - "-y", output_path - ], check=True, capture_output=True) + try: + result = subprocess.run([ + "ffmpeg", "-i", input_asset.file_path, + "-vf", subtitle_filter, + "-c:a", "copy", + "-y", output_path + ], check=True, capture_output=True) + except subprocess.CalledProcessError as e: + logger.error("FFmpeg burning failed", stderr=e.stderr.decode('utf-8')) + raise RuntimeError(f"FFmpeg error: {e.stderr.decode('utf-8')[-500:]}") video_size = os.path.getsize(output_path) @@ -444,12 +578,12 @@ async def process(job_id: str): output_assets.append(video_asset.id) # Cleanup temp audio - if os.path.exists(audio_path): + if 'audio_path' in locals() and audio_path and os.path.exists(audio_path): os.remove(audio_path) job.output_asset_ids = output_assets job.output_data = { - "transcript": result.get("text", ""), + "transcript": transcription_result.get("text", ""), "language": detected_language, "segments_count": len(segments), "word_timestamps": word_timestamps, @@ -464,7 +598,7 @@ async def process(job_id: str): db.commit() except Exception as e: - logger.error(f"Subtitle processing error: {e}") + logger.error(f"Subtitle processing error: {e}", exc_info=True) job.status = "failed" job.error_message = str(e) db.commit() @@ -555,6 +689,39 @@ Style: Default,{font},{font_size},&H00{primary_hex},&H00{primary_hex},&H00{outli Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text """ + +def _parse_srt(content: str) -> list: + """Parse SRT content into segments""" + segments = [] + blocks = content.strip().split('\n\n') + + for block in blocks: + lines = block.strip().split('\n') + if len(lines) >= 3: + # Parse timestamp line + times = lines[1].split(' --> ') + if len(times) != 2: + continue + + start_str, end_str = times + + def parse_time(t_str): + t_str = t_str.replace(',', '.') + parts = t_str.split(':') + return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2]) + + start = parse_time(start_str) + end = parse_time(end_str) + text = "\n".join(lines[2:]) + + segments.append({ + 'start': start, + 'end': end, + 'text': text + }) + + return segments + for segment in segments: start = _format_ass_timestamp(segment['start']) end = _format_ass_timestamp(segment['end']) @@ -564,6 +731,39 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text return ass_content +def _parse_srt(content: str) -> list: + """Parse SRT content into segments""" + segments = [] + blocks = content.strip().split('\n\n') + + for block in blocks: + lines = block.strip().split('\n') + if len(lines) >= 3: + # Parse timestamp line + times = lines[1].split(' --> ') + if len(times) != 2: + continue + + start_str, end_str = times + + def parse_time(t_str): + t_str = t_str.replace(',', '.') + parts = t_str.split(':') + return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2]) + + start = parse_time(start_str) + end = parse_time(end_str) + text = "\n".join(lines[2:]) + + segments.append({ + 'start': start, + 'end': end, + 'text': text + }) + + return segments + + def _format_srt_timestamp(seconds: float) -> str: """Convert seconds to SRT timestamp format (HH:MM:SS,mmm)""" td = timedelta(seconds=seconds) diff --git a/backend/app/services/video_generator.py b/backend/app/services/video_generator.py index a5f6103..c832644 100644 --- a/backend/app/services/video_generator.py +++ b/backend/app/services/video_generator.py @@ -251,6 +251,55 @@ async def generate(job_id: str): job.output_asset_ids = [asset.id] job.output_data = {"asset_id": str(asset.id), "file_path": file_path} + # Log Usage + try: + from app.utils.logging import log_model_usage + + # Calculate duration + duration_ms = 0 + if job.started_at: + duration_ms = int((datetime.utcnow() - job.started_at).total_seconds() * 1000) + + # Determine actual parameters used (defaults logic) + # Runway defaults + used_duration = input_data.get("duration") + used_resolution = input_data.get("resolution") + used_aspect = input_data.get("aspect_ratio") + + if provider == "runway": + if not used_duration: used_duration = 5 + if not used_resolution: used_resolution = "1280x768" + elif provider == "veo": + if not used_duration: used_duration = 8 + if not used_resolution: used_resolution = "720p" + if not used_aspect: used_aspect = "16:9" + + log_model_usage( + db=db, + job_id=str(job.id), + user_id=str(job.user_id), + module="video_generator", + action="generate", + provider=provider, + model=job.api_model, + usage_stats={ + "seconds": used_duration, + "processing_time_ms": duration_ms + }, + request_metadata={ + "prompt": prompt, + "resolution": used_resolution, + "duration": used_duration, + "aspect_ratio": used_aspect + }, + response_metadata={ + "output_assets": [str(asset.id)] if video_data and 'asset' in locals() else [], + "filenames": [filename] if filename else [] + } + ) + except Exception as log_e: + logger.error(f"Failed to log video generation usage: {log_e}") + job.progress = 100 job.status = "completed" job.completed_at = datetime.utcnow() diff --git a/backend/app/services/voice_to_text.py b/backend/app/services/voice_to_text.py index dc94bec..fb656ea 100644 --- a/backend/app/services/voice_to_text.py +++ b/backend/app/services/voice_to_text.py @@ -35,11 +35,47 @@ async def transcribe(job_id: str): translate = input_data.get("translate", False) target_language = input_data.get("target_language", "EN-US") - # Transcribe with Whisper - import whisper + # Extract audio to temp file first for better compatibility + import subprocess + + temp_audio = f"temp_{uuid4()}.wav" + temp_audio_path = os.path.join(settings.storage_path, "temp", temp_audio) + os.makedirs(os.path.dirname(temp_audio_path), exist_ok=True) + + if not os.path.exists(input_asset.file_path): + raise ValueError(f"Input file not found at path: {input_asset.file_path}") - model = whisper.load_model("base") - result = model.transcribe(input_asset.file_path, verbose=False) + try: + # Extract audio: 16khz mono wav (Whisper native format) + cmd = [ + "ffmpeg", "-i", input_asset.file_path, + "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", + "-y", temp_audio_path + ] + + # Run ffmpeg with capture_output to log errors if it fails + process = subprocess.run( + cmd, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + + transcribe_input = temp_audio_path + + except subprocess.CalledProcessError as e: + # Raise the specific error to debug + error_msg = f"FFmpeg extraction failed. Stderr: {e.stderr.decode()[:500]}..." # Truncate if too long + raise ValueError(error_msg) + + import whisper + try: + model = whisper.load_model("base") + result = model.transcribe(transcribe_input, verbose=False) + finally: + # Cleanup temp file + if os.path.exists(temp_audio_path): + os.remove(temp_audio_path) job.progress = 60 db.commit() diff --git a/backend/app/utils/logging.py b/backend/app/utils/logging.py new file mode 100644 index 0000000..d8f9fab --- /dev/null +++ b/backend/app/utils/logging.py @@ -0,0 +1,90 @@ +"""Usage Logging Utility""" +from sqlalchemy.orm import Session +from app.models.usage import UsageLog +from app.models.pricing import ModelPricing +from app.models.job import Job +from typing import Optional, Dict, Any +import logging + +logger = logging.getLogger(__name__) + +def log_model_usage( + db: Session, + job_id: str, + user_id: str, + module: str, + action: str, + provider: str, + model: str, + request_metadata: Dict[str, Any] = {}, + response_metadata: Dict[str, Any] = {}, + usage_stats: Dict[str, Any] = {} +) -> Optional[UsageLog]: + """ + Log model usage and calculate estimated cost. + + usage_stats keys: + - input_tokens (int) + - output_tokens (int) + - images (int) + - seconds (float) + - characters (int) + """ + try: + # 1. Calculate Cost + cost = 0.0 + + # Find pricing record + pricing = db.query(ModelPricing).filter( + ModelPricing.provider == provider, + ModelPricing.model_name == model + ).first() + + # If specific model not found, try generic provider/default fallback? + # For now, just log 0 if not found, or maybe try mapping aliases. + + if pricing: + if usage_stats.get("input_tokens"): + cost += float(pricing.cost_per_input_token) * usage_stats["input_tokens"] + + if usage_stats.get("output_tokens"): + cost += float(pricing.cost_per_output_token) * usage_stats["output_tokens"] + + if usage_stats.get("images"): + cost += float(pricing.cost_per_image) * usage_stats["images"] + + if usage_stats.get("seconds"): + cost += float(pricing.cost_per_second) * usage_stats["seconds"] + + if usage_stats.get("characters"): + cost += float(pricing.cost_per_1k_chars) * (usage_stats["characters"] / 1000.0) + + if usage_stats.get("requests"): # generic per-request + cost += float(pricing.cost_per_request) * usage_stats["requests"] + + # Special case for "per request" if not specified but implies 1? + # If generated 1 image and pricing is per image... handled above. + + # 2. Create Log Record + log_entry = UsageLog( + job_id=job_id, + user_id=user_id, + module=module, + action=action, + api_provider=provider, + api_model=model, + tokens_input=usage_stats.get("input_tokens"), + tokens_output=usage_stats.get("output_tokens"), + estimated_cost_usd=cost, + processing_time_ms=usage_stats.get("processing_time_ms"), + request_metadata=request_metadata, + response_metadata=response_metadata + ) + + db.add(log_entry) + db.commit() + return log_entry + + except Exception as e: + logger.error(f"Failed to log usage: {e}") + return None diff --git a/backend/scripts/check_capabilities.py b/backend/scripts/check_capabilities.py new file mode 100644 index 0000000..4645c83 --- /dev/null +++ b/backend/scripts/check_capabilities.py @@ -0,0 +1,7 @@ +import sys +import os +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from app.services.image_generator import IMAGE_PROVIDERS + +print("Nano Banana Config:") +print(IMAGE_PROVIDERS.get("nano-banana")) diff --git a/backend/scripts/check_logs.py b/backend/scripts/check_logs.py new file mode 100644 index 0000000..4058cca --- /dev/null +++ b/backend/scripts/check_logs.py @@ -0,0 +1,24 @@ +"""Check Usage Logs""" +import sys +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from app.database import SessionLocal +from app.models.usage import UsageLog +from sqlalchemy import desc + +def check_logs(): + db = SessionLocal() + try: + count = db.query(UsageLog).count() + print(f"Total Usage Logs: {count}") + + if count > 0: + latest = db.query(UsageLog).order_by(desc(UsageLog.created_at)).first() + print(f"Latest Log: {latest.module} | {latest.action} | {latest.created_at}") + print(f"Metadata: {latest.request_metadata}") + finally: + db.close() + +if __name__ == "__main__": + check_logs() diff --git a/backend/scripts/debug_real_edit.py b/backend/scripts/debug_real_edit.py new file mode 100644 index 0000000..44bbbc2 --- /dev/null +++ b/backend/scripts/debug_real_edit.py @@ -0,0 +1,124 @@ +import asyncio +import os +import httpx +import base64 +import json +from dotenv import load_dotenv +from typing import Optional +import uuid + +load_dotenv() +API_KEY = os.getenv("GOOGLE_API_KEY") + +async def _upload_file_http(media_data: bytes, mime_type: str) -> Optional[str]: + """Helper for testing upload""" + url = f"https://generativelanguage.googleapis.com/upload/v1beta/files?key={API_KEY}" + num_bytes = len(media_data) + + headers = { + "X-Goog-Upload-Protocol": "resumable", + "X-Goog-Upload-Command": "start", + "X-Goog-Upload-Header-Content-Length": str(num_bytes), + "X-Goog-Upload-Header-Content-Type": mime_type, + "Content-Type": "application/json" + } + + metadata = {"file": {"display_name": f"debug_upload_{uuid.uuid4()}"}} + + async with httpx.AsyncClient(timeout=30.0) as client: + # 1. Start Upload + print("Starting resumable upload...") + response = await client.post(url, headers=headers, json=metadata) + if response.status_code != 200: + print(f"Failed to start upload: {response.text}") + return None + + upload_url = response.headers.get("x-goog-upload-url") + if not upload_url: + print("No upload URL returned") + return None + + # 2. Upload Bytes + print(f"Uploading {num_bytes} bytes to {upload_url[:50]}...") + headers_upload = { + "Content-Length": str(num_bytes), + "X-Goog-Upload-Offset": "0", + "X-Goog-Upload-Command": "upload, finalize" + } + + response_upload = await client.post(upload_url, headers=headers_upload, content=media_data) + if response_upload.status_code != 200: + print(f"Failed to upload data: {response_upload.text}") + return None + + data = response_upload.json() + file_uri = data.get("file", {}).get("uri") + print(f"File uploaded successfully: {file_uri}") + return file_uri + +async def test_edit(): + # Use gemini-3-pro-image-preview + model_name = "gemini-3-pro-image-preview" + url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent?key={API_KEY}" + + print(f"Testing Model: {model_name} with Raw HTTP File API") + + # Generate valid JPEG using PIL + from PIL import Image + import io + + img = Image.new('RGB', (100, 100), color = 'red') + buf = io.BytesIO() + img.save(buf, format='JPEG') + pixel_data = buf.getvalue() + + print(f"Generated valid JPEG ({len(pixel_data)} bytes)") + + # Upload File + file_uri = await _upload_file_http(pixel_data, "image/jpeg") + if not file_uri: + print("Skipping generation due to upload failure") + return + + prompt = "Make the image blue" + final_prompt = f"Edit this image: {prompt}" + + parts = [] + + # 1. File Data Part + parts.append({ + "fileData": { + "mimeType": "image/jpeg", + "fileUri": file_uri + } + }) + + parts.append({"text": final_prompt}) + + payload = { + "contents": [{ + "parts": parts + }] + } + + async with httpx.AsyncClient(timeout=30) as client: + print("Generating content...") + response = await client.post( + url, + headers={"Content-Type": "application/json"}, + json=payload + ) + + print(f"Status: {response.status_code}") + # print(response.text) + + if response.status_code == 200: + print("✓ Success! API accepted the request.") + data = response.json() + if "candidates" in data and data["candidates"]: + print("Candidates found.") + else: + print(f"✗ Failed: {response.text}") + +if __name__ == "__main__": + asyncio.run(test_edit()) diff --git a/backend/scripts/list_gemini_models.py b/backend/scripts/list_gemini_models.py new file mode 100644 index 0000000..87102ac --- /dev/null +++ b/backend/scripts/list_gemini_models.py @@ -0,0 +1,26 @@ +import asyncio +import os +import httpx +import json +from dotenv import load_dotenv + +load_dotenv() +API_KEY = os.getenv("GOOGLE_API_KEY") + +async def list_models(): + url = f"https://generativelanguage.googleapis.com/v1beta/models?key={API_KEY}" + async with httpx.AsyncClient() as client: + response = await client.get(url) + if response.status_code == 200: + data = response.json() + models = data.get("models", []) + print(f"Found {len(models)} models:") + for m in models: + name = m.get("name") + display = m.get("displayName", "No Display Name") + print(f" - {name} ({display})") + else: + print(f"Error: {response.status_code} - {response.text}") + +if __name__ == "__main__": + asyncio.run(list_models()) diff --git a/backend/scripts/seed_pricing.py b/backend/scripts/seed_pricing.py new file mode 100644 index 0000000..11edd0c --- /dev/null +++ b/backend/scripts/seed_pricing.py @@ -0,0 +1,155 @@ +"""Seed Model Pricing Data""" +import sys +import os + +# Add parent dir to path +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from app.database import SessionLocal +from app.models.pricing import ModelPricing +from app.database import engine, Base + +def seed_pricing(): + # Create tables if they don't exist + Base.metadata.create_all(bind=engine) + + db = SessionLocal() + + # Pricing Data (from model_costs.md) + pricing_data = [ + # 1. Text & Vision + { + "provider": "openai", "model_name": "gpt-4o", + "cost_per_input_token": 0.0000025, # $2.50 / 1M + "cost_per_output_token": 0.0000100 # $10.00 / 1M + }, + { + "provider": "openai", "model_name": "gpt-4o-mini", + "cost_per_input_token": 0.00000015, # $0.15 / 1M + "cost_per_output_token": 0.00000060 # $0.60 / 1M + }, + { + "provider": "google", "model_name": "gemini-2.0-flash-exp", + "cost_per_request": 0.0 # Currently free/preview + }, + + # 2. Image Generation + { + "provider": "openai", "model_name": "dall-e-3", + "cost_per_image": 0.080 # Standard HD avg + }, + { + "provider": "openai", "model_name": "gpt-image-1", + "cost_per_image": 0.040 + }, + { + "provider": "google", "model_name": "imagen-3.0", + "cost_per_image": 0.040 + }, + { + "provider": "nano-banana", "model_name": "gemini-2.5-flash-image", + "cost_per_image": 0.040 + }, + { + "provider": "stability", "model_name": "sd3.5-large", + "cost_per_image": 0.065 + }, + { + "provider": "ideogram", "model_name": "V_2", + "cost_per_image": 0.080 + }, + { + "provider": "flux", "model_name": "flux-pro-1.1", + "cost_per_image": 0.050 + }, + + # 3. Video Generation + { + "provider": "runway", "model_name": "gen3_alpha", + "cost_per_second": 0.10 + }, + { + "provider": "runway", "model_name": "gen3_alpha_turbo", + "cost_per_second": 0.05 + }, + { + "provider": "runway", "model_name": "gen4_turbo", + "cost_per_second": 0.15 # Estimate + }, + { + "provider": "google", "model_name": "veo-3.1", + "cost_per_second": 0.15 + }, + { + "provider": "veo", "model_name": "veo-3.1-generate-preview", # Provider 'veo' or 'google'? Log said 'veo' + "cost_per_second": 0.15 + }, + { + "provider": "google", "model_name": "veo-3.1-generate-preview", # Alias for safety + "cost_per_second": 0.15 + }, + { + "provider": "google", "model_name": "veo-3", + "cost_per_second": 0.10 + }, + + # 3b. Misc Image + { + "provider": "nano-banana", "model_name": "gemini-3-pro-image-preview", + "cost_per_image": 0.040 + }, + + # 4. Audio + { + "provider": "elevenlabs", "model_name": "eleven_multilingual_v2", + "cost_per_1k_chars": 0.30 + }, + { + "provider": "elevenlabs", "model_name": "eleven_flash_v2_5", + "cost_per_1k_chars": 0.15 + }, + { + "provider": "elevenlabs", "model_name": "sound-generation", + "cost_per_request": 0.10 # Per generation estimate + }, + { + "provider": "deepl", "model_name": "api-pro", + "cost_per_1k_chars": 0.025 # $25 per 1M + }, + + # 5. Media Editing + { + "provider": "clipping_magic", "model_name": "api-v1", + "cost_per_image": 0.10 + }, + { + "provider": "topaz", "model_name": "image-v1", + "cost_per_image": 0.20 # Buffer estimate + }, + { + "provider": "topaz", "model_name": "video-v1", + "cost_per_second": 0.30 # Buffer estimate + } + ] + + print(f"Seeding {len(pricing_data)} pricing records...") + + try: + # Clear existing + db.query(ModelPricing).delete() + + for data in pricing_data: + pricing = ModelPricing(**data) + db.add(pricing) + + db.commit() + print("✓ Pricing data seeded successfully.") + + except Exception as e: + print(f"Error seeding data: {e}") + db.rollback() + finally: + db.close() + +if __name__ == "__main__": + seed_pricing() diff --git a/backend/scripts/test_logging.py b/backend/scripts/test_logging.py new file mode 100644 index 0000000..5afa7ef --- /dev/null +++ b/backend/scripts/test_logging.py @@ -0,0 +1,73 @@ +"""Test Logging Utility Manually""" +import sys +import os +import uuid +from datetime import datetime +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from app.database import SessionLocal +from app.utils.logging import log_model_usage +from app.models.user import User + + +from app.models.job import Job + +def test_logging(): + db = SessionLocal() + try: + # Get a user + user = db.query(User).first() + if not user: + print("No fake user found, cannot log.") + return + + print(f"Logging for user: {user.email}") + + # Create a dummy Job + job = Job( + user_id=user.id, + module="image_generator", + action="generate", + input_data={"prompt": "test"}, + status="completed" + ) + db.add(job) + db.commit() + + # Log a fake image generation + log = log_model_usage( + db=db, + job_id=str(job.id), + user_id=str(user.id), + module="image_generator", + action="generate", + provider="openai", + model="dall-e-3", + usage_stats={ + "images": 1, + "processing_time_ms": 4500 + }, + request_metadata={ + "prompt": "A futuristic city with flying cars and neon lights, cyberpunk style", + "size": "1024x1024", + "quality": "hd" + }, + response_metadata={ + "status": "success" + } + ) + + if log: + print(f"✓ Log created with ID: {log.id}") + print(f" Cost: ${log.estimated_cost_usd}") + else: + print("✗ Log creation failed (returned None)") + + except Exception as e: + print(f"Error: {e}") + db.rollback() + finally: + db.close() + +if __name__ == "__main__": + test_logging() diff --git a/backend/scripts/test_logging_filenames.py b/backend/scripts/test_logging_filenames.py new file mode 100644 index 0000000..97961ed --- /dev/null +++ b/backend/scripts/test_logging_filenames.py @@ -0,0 +1,74 @@ +"""Test Logging Filenames""" +import sys +import os +import uuid +from datetime import datetime +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from app.database import SessionLocal +from app.utils.logging import log_model_usage +from app.models.user import User +from app.models.job import Job + +def test_logging_filenames(): + db = SessionLocal() + try: + # Get a user + user = db.query(User).first() + if not user: + print("No fake user found, cannot log.") + return + + print(f"Logging for user: {user.email}") + + # Create a dummy Job + job = Job( + user_id=user.id, + module="image_generator", + action="generate", + input_data={"prompt": "filename test"}, + status="completed" + ) + db.add(job) + db.commit() + + # Log a fake image generation with filenames + log = log_model_usage( + db=db, + job_id=str(job.id), + user_id=str(user.id), + module="image_generator", + action="generate", + provider="openai", + model="dall-e-3", + usage_stats={ + "images": 2, + "processing_time_ms": 5000 + }, + request_metadata={ + "prompt": "Two cats playing chess", + "size": "1024x1024" + }, + response_metadata={ + "output_assets": ["asset_1", "asset_2"], + "filenames": ["cat_chess_01.png", "cat_chess_02.png"] + } + ) + + if log: + print(f"✓ Log created with ID: {log.id}") + if log.response_metadata.get("filenames"): + print(f" Filenames found: {log.response_metadata['filenames']}") + else: + print("✗ Filenames missing from log!") + else: + print("✗ Log creation failed (returned None)") + + except Exception as e: + print(f"Error: {e}") + db.rollback() + finally: + db.close() + +if __name__ == "__main__": + test_logging_filenames() diff --git a/backend/scripts/test_nano_banana_simple.py b/backend/scripts/test_nano_banana_simple.py new file mode 100644 index 0000000..54aac14 --- /dev/null +++ b/backend/scripts/test_nano_banana_simple.py @@ -0,0 +1,76 @@ +import asyncio +import os +import sys +import logging +from dotenv import load_dotenv + +# Add backend to path +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) + +load_dotenv() + +# Mock settings +class Settings: + google_api_key = os.getenv("GOOGLE_API_KEY") + +import app.services.image_generator as img_gen +img_gen.settings = Settings() + +# Configure logging +logging.basicConfig(level=logging.INFO) + +async def test_generation(): + prompt = "A cyberpunk street scene" + + # 1. First generate an image (or use a dummy one if we had one, but let's generate) + print("--- Testing Generation (Text to Image) ---") + input_data = { + "prompt": prompt, + "model": "gemini-2.0-flash-exp" # Try 2.0 first + } + + # We can't easily call _generate_nano_banana without mocking httpx or having a real key. + # Assuming the user has a real key since they are getting results. + + try: + image_data, filename = await img_gen._generate_nano_banana(input_data) + if image_data: + print(f"✓ Generated base image: {len(image_data)} bytes") + + # 2. Now try to 'Edit' it + print("\n--- Testing Edit (Image + Text to Image) ---") + edit_prompt = "Make it daytime" + + # Test Gemini 3 Variants + variants = [ + "gemini-3.0-pro-image-preview", + "gemini-3.0-pro-image", + "gemini-3.0-flash-image", + "gemini-3.0-flash-image-preview" + ] + + for model_name in variants: + print(f"\n--- Testing Model: {model_name} ---") + input_data_test = { + "prompt": edit_prompt, + "model": model_name + } + try: + # We pass 'image/png' because our fake input_image is implied to be handled + img, fname = await img_gen._generate_nano_banana(input_data_test, image_data, "image/png") + if img: + print(f"✓ Success with {model_name}: {len(img)} bytes") + break # Stop on first success just to know valid one + else: + print(f"✗ Failed with {model_name} (No data)") + except Exception as e: + print(f"✗ Error with {model_name}: {e}") + + else: + print("✗ Initial generation failed") + + except Exception as e: + print(f"✗ Error: {e}") + +if __name__ == "__main__": + asyncio.run(test_generation()) diff --git a/forge-ai.code-workspace b/forge-ai.code-workspace new file mode 100644 index 0000000..876a149 --- /dev/null +++ b/forge-ai.code-workspace @@ -0,0 +1,8 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": {} +} \ No newline at end of file diff --git a/frontend/app/admin/usage/page.tsx b/frontend/app/admin/usage/page.tsx new file mode 100644 index 0000000..d1b130f --- /dev/null +++ b/frontend/app/admin/usage/page.tsx @@ -0,0 +1,267 @@ +'use client'; + +import { useState, useEffect } from 'react'; +import { toast } from 'react-hot-toast'; +import { + Search, + Filter, + ChevronDown, + ChevronUp, + DollarSign, + Clock, + FileText, + Image as ImageIcon, + Video, + Mic, + Cpu +} from 'lucide-react'; +import AdminGuard from '@/components/AdminGuard'; +import api from '@/lib/api'; + +interface UsageLog { + id: string; + timestamp: string; + user: { + id: string; + email: string; + name: string; + }; + service: { + module: string; + provider: string; + model: string; + }; + metrics: { + tokens_in?: number; + tokens_out?: number; + cost_usd: number; + latency_ms: number; + }; + request_details: any; + response_details: any; +} + +export default function UsageSearchPage() { + const [logs, setLogs] = useState([]); + const [loading, setLoading] = useState(false); + const [searchQuery, setSearchQuery] = useState(''); + const [expandedRow, setExpandedRow] = useState(null); + + // Filters + const [providerFilter, setProviderFilter] = useState(''); + const [startDate, setStartDate] = useState(''); + + const searchLogs = async () => { + setLoading(true); + try { + const params: any = { + limit: 50 + }; + + if (searchQuery) params.query = searchQuery; + if (providerFilter) params.provider = providerFilter; + if (startDate) params.start_date = new Date(startDate).toISOString(); + + const response = await api.get('/admin/logs/search', { params }); + setLogs(response.data.items || []); + } catch (error) { + console.error('Search failed', error); + toast.error('Failed to search logs'); + } finally { + setLoading(false); + } + }; + + useEffect(() => { + searchLogs(); + }, []); // Initial load + + const toggleRow = (id: string) => { + setExpandedRow(expandedRow === id ? null : id); + }; + + const getModuleIcon = (module: string) => { + if (module.includes('image')) return ; + if (module.includes('video')) return