Implement Nano Banana Pro with integrated editing loop, asset picker, and detailed usage logging

This commit is contained in:
DJP 2025-12-12 15:48:12 -05:00
parent c3dc50f607
commit e896aa50a1
35 changed files with 5252 additions and 483 deletions

View file

@ -247,6 +247,86 @@ async def get_usage_reports(
}
@router.get("/logs/search")
async def search_usage_logs(
query: Optional[str] = None,
provider: Optional[str] = None,
user_id: Optional[str] = None,
start_date: Optional[str] = None, # ISO format
page: int = Query(1, ge=1),
limit: int = Query(20, le=100),
db: Session = Depends(get_db),
admin: User = Depends(get_current_admin_user)
):
"""
Search usage logs by filename, prompt, user, or provider.
Surface detailed cost and metadata.
"""
sql_query = db.query(UsageLog, User).join(User, UsageLog.user_id == User.id)
# 1. Text Search (Metadata)
if query:
# Search inside JSONB metadata fields (filename, prompt, etc)
# Cast JSONB to text for searching
search_term = f"%{query}%"
sql_query = sql_query.filter(
func.cast(UsageLog.request_metadata, String).ilike(search_term) |
func.cast(UsageLog.response_metadata, String).ilike(search_term) |
UsageLog.action.ilike(search_term)
)
# 2. Filters
if provider:
sql_query = sql_query.filter(UsageLog.api_provider == provider)
if user_id:
sql_query = sql_query.filter(UsageLog.user_id == user_id)
if start_date:
try:
dt = datetime.fromisoformat(start_date.replace('Z', '+00:00'))
sql_query = sql_query.filter(UsageLog.created_at >= dt)
except ValueError:
pass
# Pagination
total = sql_query.count()
logs = sql_query.order_by(desc(UsageLog.created_at)).offset((page - 1) * limit).limit(limit).all()
items = []
for log, user in logs:
items.append({
"id": str(log.id),
"timestamp": log.created_at.isoformat(),
"user": {
"id": str(user.id),
"email": user.email,
"name": user.display_name
},
"service": {
"module": log.module,
"provider": log.api_provider,
"model": log.api_model
},
"metrics": {
"tokens_in": log.tokens_input,
"tokens_out": log.tokens_output,
"cost_usd": float(log.estimated_cost_usd or 0),
"latency_ms": log.processing_time_ms
},
# Return specific metadata fields relevant for UI
"request_details": log.request_metadata,
"response_details": log.response_metadata
})
return {
"items": items,
"total": total,
"page": page,
"limit": limit
}
@router.get("/audit-logs")
async def get_audit_logs(
page: int = Query(1, ge=1),

View file

@ -192,6 +192,146 @@ def download_asset(asset_id: UUID, db: Session = Depends(get_db)):
)
async def process_upload(
file: UploadFile,
db: Session,
user: Optional[User] = None,
project_id: Optional[str] = None,
source_module: Optional[str] = None,
is_temporary: bool = False,
overwrite: bool = False
) -> Asset:
"""Core logic for uploading/saving an asset"""
# Check for duplicates if not temporary
existing_asset = None
if not is_temporary and user:
existing_asset = db.query(Asset).filter(
Asset.user_id == user.id,
Asset.original_filename == file.filename,
Asset.is_temporary == False
).first()
if existing_asset:
if not overwrite:
# Return conflict with existing ID
# We interpret 409 specially in frontend
raise HTTPException(
status_code=409,
detail={"message": "File exists", "asset_id": str(existing_asset.id)}
)
else:
# Overwrite: Delete existing file on disk but KEEP the record
if os.path.exists(existing_asset.file_path):
try:
os.remove(existing_asset.file_path)
except OSError:
pass
if existing_asset.thumbnail_path and os.path.exists(existing_asset.thumbnail_path):
try:
os.remove(existing_asset.thumbnail_path)
except OSError:
pass
# Reuse the existing ID
asset_id = existing_asset.id
# Determine file type
file_type = get_file_type(file.content_type)
# Generate unique ID if new, otherwise reuse
if not 'asset_id' in locals():
asset_id = uuid4()
ext = os.path.splitext(file.filename)[1] if file.filename else ""
stored_filename = f"{asset_id}{ext}"
# Determine storage path
storage_dir = os.path.join(settings.storage_path, f"{file_type}s")
os.makedirs(storage_dir, exist_ok=True)
file_path = os.path.join(storage_dir, stored_filename)
# Save file
try:
with open(file_path, "wb") as buffer:
# Read in chunks to handle large files
while content := await file.read(1024 * 1024):
buffer.write(content)
await file.seek(0) # Reset cursor
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to save file: {str(e)}")
# Extract metadata
width, height, duration_seconds = None, None, None
try:
if file_type == "image":
with Image.open(file_path) as img:
width, height = img.size
elif file_type == "video":
# Placeholder for video metadata
# In production, use ffmpeg probe
from app.utils.video import extract_video_metadata
metadata = extract_video_metadata(file_path)
width = metadata.get('width')
height = metadata.get('height')
duration_seconds = metadata.get('duration_seconds')
except Exception:
pass # Ignore metadata extraction errors
# Generate thumbnail
thumbnail_path = generate_thumbnail(file_path, file_type, str(asset_id))
# Create or Update Asset record
file_size = os.path.getsize(file_path)
if existing_asset:
# Update existing record
existing_asset.stored_filename = stored_filename
existing_asset.file_path = file_path
existing_asset.thumbnail_path = thumbnail_path
existing_asset.file_type = file_type
existing_asset.mime_type = file.content_type
existing_asset.file_size_bytes = file_size
existing_asset.width = width
existing_asset.height = height
existing_asset.duration_seconds = duration_seconds
existing_asset.source_module = source_module
# Don't update project_id unless specified? For now keep it simple.
if project_id:
existing_asset.project_id = UUID(project_id)
db.commit()
db.refresh(existing_asset)
return existing_asset
else:
# Create new record
asset = Asset(
id=asset_id,
user_id=user.id if user else None,
project_id=UUID(project_id) if project_id else None,
original_filename=file.filename,
stored_filename=stored_filename,
file_path=file_path,
thumbnail_path=thumbnail_path,
file_type=file_type,
mime_type=file.content_type,
file_size_bytes=file_size,
width=width,
height=height,
duration_seconds=duration_seconds,
source_module=source_module,
is_temporary=is_temporary
)
db.add(asset)
db.commit()
db.refresh(asset)
return asset
@router.post("/upload", response_model=AssetResponse)
async def upload_asset(
file: UploadFile = File(...),
@ -205,108 +345,16 @@ async def upload_asset(
# Get test user
user = db.query(User).filter(User.email == "test@forge.ai").first()
# Check for duplicates if not temporary
if not is_temporary and user:
existing = db.query(Asset).filter(
Asset.user_id == user.id,
Asset.original_filename == file.filename,
Asset.is_temporary == False
).first()
if existing:
if not overwrite:
# Return conflict with existing ID
# We interpret 409 specially in frontend
raise HTTPException(
status_code=409,
detail={"message": "File exists", "asset_id": str(existing.id)}
)
else:
# Overwrite: Delete existing file and record
if os.path.exists(existing.file_path):
try:
os.remove(existing.file_path)
except OSError:
pass
if existing.thumbnail_path and os.path.exists(existing.thumbnail_path):
try:
os.remove(existing.thumbnail_path)
except OSError:
pass
db.delete(existing)
db.commit()
# Determine file type
file_type = get_file_type(file.content_type)
# Generate unique ID and filename
asset_id = uuid4()
ext = os.path.splitext(file.filename)[1] if file.filename else ""
stored_filename = f"{asset_id}{ext}"
# Determine storage path
storage_dir = os.path.join(settings.storage_path, f"{file_type}s")
os.makedirs(storage_dir, exist_ok=True)
file_path = os.path.join(storage_dir, stored_filename)
# Save file
with open(file_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
# Get file size
file_size = os.path.getsize(file_path)
# Get media dimensions and duration if applicable
width = None
height = None
duration_seconds = None
if file_type == "image":
try:
with Image.open(file_path) as img:
width, height = img.size
except Exception:
pass
elif file_type == "video":
try:
from app.utils.video import extract_video_metadata
metadata = extract_video_metadata(file_path)
width = metadata.get('width')
height = metadata.get('height')
duration_seconds = metadata.get('duration_seconds')
except Exception as e:
print(f"Failed to extract video metadata: {e}")
# Generate thumbnail
thumbnail_path = generate_thumbnail(file_path, file_type, str(asset_id))
# Create asset record
asset = Asset(
id=asset_id,
user_id=user.id if user else None,
project_id=UUID(project_id) if (project_id and isinstance(project_id, str)) else None,
original_filename=file.filename,
stored_filename=stored_filename,
file_path=file_path,
thumbnail_path=thumbnail_path,
file_type=file_type,
mime_type=file.content_type,
file_size_bytes=file_size,
width=width,
height=height,
duration_seconds=duration_seconds,
return await process_upload(
file=file,
db=db,
user=user,
project_id=project_id,
source_module=source_module,
is_temporary=is_temporary
is_temporary=is_temporary,
overwrite=overwrite
)
db.add(asset)
db.commit()
db.refresh(asset)
return asset
@router.patch("/{asset_id}", response_model=AssetResponse)
def update_asset(asset_id: UUID, asset_update: AssetUpdate, db: Session = Depends(get_db)):

View file

@ -204,6 +204,13 @@ class PromptEnhanceRequest(BaseModel):
include_negative: bool = True
include_technical: bool = True
language: str = "en"
# CinePrompt Studio Advanced Fields
application: Optional[str] = None
camera: Optional[str] = None
lens: Optional[str] = None
aspect_ratio: Optional[str] = "16:9"
creative_freedom: float = 0.3
class MermaidRenderRequest(BaseModel):
@ -520,21 +527,23 @@ async def generate_subtitles(
italic: bool = Form(False),
font_preset: Optional[str] = Form(None),
word_timestamps: bool = Form(False),
subtitle_file: UploadFile = File(None),
subtitle_asset_id: Optional[str] = Form(None),
background_tasks: BackgroundTasks = None,
db: Session = Depends(get_db)
):
"""
Generate subtitles for video using Whisper + DeepL
Parameters:
- source_language: Source language code or "auto" for detection
- target_language: Target language code for translation (optional)
- burn_subtitles: Whether to burn subtitles into video
- whisper_model: Whisper model (tiny/base/small/medium/large/large-v2/large-v3)
- output_format: Output format (srt/vtt/ass)
Styling (for burning):
- font: Font family name
Generate subtitles for a video using OpenAI Whisper.
- **file**: Video file to process
- **source_language**: Language of the video (auto for detection)
- **target_language**: Language to translate to (optional)
- **burn_subtitles**: Burn subtitles into the video
- **whisper_model**: Whisper model size (tiny, base, small, medium, large)
- **output_format**: Output subtitle format (srt, vtt, ass)
Styling Options (for burning):
- font: Font family (Arial, Helvetica, etc.) - checks system availability
- font_size: Font size in points
- text_color: Primary text color
- outline_color: Text outline color
@ -550,50 +559,80 @@ async def generate_subtitles(
- italic: Use italic text
- font_preset: Predefined style preset (default/cinematic/documentary/news/social_media/minimal/bold)
- word_timestamps: Include word-level timestamps
- subtitle_file: Optional subtitle file (SRT) to burn instead of generating
- subtitle_asset_id: Optional asset ID of existing subtitle to burn
"""
user = db.query(User).filter(User.email == "test@forge.ai").first()
# Fallback to a default user or handle None if test user doesn't exist
if not user:
# Try to find any admin user or proceed with None (if Asset/Job models allow null user_id)
user = db.query(User).order_by(User.id).first()
import structlog
logger = structlog.get_logger()
logger.info("Subtitle generation request received",
filename=file.filename,
source_language=source_language,
target_language=target_language,
burn_subtitles=burn_subtitles,
font=font,
has_subtitle_file=bool(subtitle_file),
subtitle_asset_id=subtitle_asset_id)
from app.api.v1.assets import upload_asset
asset = await upload_asset(file=file, source_module="subtitle_processor", db=db)
from app.api.v1.assets import process_upload
try:
asset = await process_upload(file=file, source_module="subtitle_processor", db=db, user=user, overwrite=True)
job = Job(
user_id=user.id if user else None,
module="subtitle_processor",
action="generate",
input_data={
"source_language": source_language,
"target_language": target_language,
"burn_subtitles": burn_subtitles,
"whisper_model": whisper_model,
"output_format": output_format,
"font": font,
"font_size": font_size,
"text_color": text_color,
"outline_color": outline_color,
"outline_width": outline_width,
"background_color": background_color,
"background_opacity": background_opacity,
"position": position,
"alignment": alignment,
"margin_v": margin_v,
"margin_h": margin_h,
"shadow": shadow,
"bold": bold,
"italic": italic,
"font_preset": font_preset,
"word_timestamps": word_timestamps
},
input_asset_ids=[asset.id],
status="queued"
)
db.add(job)
db.commit()
db.refresh(job)
# Process optional subtitle file upload
input_sub_id = subtitle_asset_id
if subtitle_file:
sub_asset = await process_upload(file=subtitle_file, source_module="subtitle_processor", db=db, user=user, overwrite=True, allow_extensions=['srt', 'vtt', 'ass'])
input_sub_id = str(sub_asset.id)
if background_tasks:
background_tasks.add_task(subtitle_processor.process, str(job.id))
job = Job(
user_id=user.id if user else None,
module="subtitle_processor",
action="generate",
input_data={
"source_language": source_language,
"target_language": target_language,
"burn_subtitles": burn_subtitles,
"whisper_model": whisper_model,
"output_format": output_format,
"font": font,
"font_size": font_size,
"text_color": text_color,
"outline_color": outline_color,
"outline_width": outline_width,
"background_color": background_color,
"background_opacity": background_opacity,
"position": position,
"alignment": alignment,
"margin_v": margin_v,
"margin_h": margin_h,
"shadow": shadow,
"bold": bold,
"italic": italic,
"font_preset": font_preset,
"word_timestamps": word_timestamps,
"subtitle_asset_id": input_sub_id
},
input_asset_ids=[asset.id],
status="pending"
)
db.add(job)
db.commit()
db.refresh(job)
return job_response(job)
if background_tasks:
background_tasks.add_task(subtitle_processor.process, str(job.id))
return job_response(job)
except Exception as e:
logger.error("Failed to initiate subtitle job", error=str(e), exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to start subtitle processing: {str(e)}")
# ============== AUDIO MODULES ==============
@ -673,8 +712,8 @@ async def convert_voice(
"""Convert voice to another voice using ElevenLabs"""
user = db.query(User).filter(User.email == "test@forge.ai").first()
from app.api.v1.assets import upload_asset
asset = await upload_asset(file=file, source_module="speech_to_speech", db=db)
from app.api.v1.assets import process_upload
asset = await process_upload(file=file, source_module="speech_to_speech", db=db, user=user, overwrite=True)
job = Job(
user_id=user.id if user else None,
@ -828,7 +867,13 @@ async def enhance_prompt(
provider=request.provider,
include_negative=request.include_negative,
include_technical=request.include_technical,
language=request.language
language=request.language,
# Advanced CinePrompt params
application=request.application,
camera=request.camera,
lens=request.lens,
aspect_ratio=request.aspect_ratio,
creative_freedom=request.creative_freedom
)
return result
@ -839,6 +884,12 @@ async def get_prompt_styles():
return prompt_studio.get_available_styles()
@router.get("/text/cine-options")
async def get_cine_options():
"""Get CinePrompt Studio data (cameras, lenses, etc.)"""
return prompt_studio.get_cine_options()
# ============== MARKDOWN & MERMAID MODULES ==============
@router.post("/text/mermaid/render")

View file

@ -0,0 +1,26 @@
"""Model Pricing Model"""
from sqlalchemy import Column, String, Numeric, Enum, Boolean
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.sql import func
import uuid
from app.database import Base
class ModelPricing(Base):
__tablename__ = "model_pricing"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
provider = Column(String(100), nullable=False) # openai, google, runway...
model_name = Column(String(100), nullable=False) # gpt-4o, gen-3-alpha...
# Pricing Metrics
cost_per_input_token = Column(Numeric(10, 8), default=0)
cost_per_output_token = Column(Numeric(10, 8), default=0)
cost_per_image = Column(Numeric(10, 4), default=0)
cost_per_second = Column(Numeric(10, 4), default=0)
cost_per_1k_chars = Column(Numeric(10, 4), default=0)
cost_per_request = Column(Numeric(10, 4), default=0)
# Metadata
currency = Column(String(3), default="USD")
effective_date = Column(String(20)) # e.g. "2024-12-01"
is_active = Column(Boolean, default=True)

View file

@ -0,0 +1,639 @@
from typing import Dict, Any, List, Optional
from app.config import settings
# ==========================================
# CONSTANTS & DATA
# ==========================================
CAMERA_DATA = [
{
"value": "Arri Alexa 35",
"display": "Arri Alexa 35",
"sensorFormat": "Super 35",
"tooltip": "The Hollywood Standard. Best for natural skin tones and a classic cinematic 'blockbuster' feel.",
"tags": "Narrative / Drama",
"compatibleLenses": ["Panavision C-Series", "Cooke S7/i", "Canon K-35", "Laowa Probe"],
"physics": "ArriRaw sensor readout, high dynamic range, natural noise floor, thick color science"
},
{
"value": "Sony Venice 2",
"display": "Sony Venice 2",
"sensorFormat": "Full Frame",
"tooltip": "The Low-Light King. Excellent for night scenes, clean shadows, and a modern aesthetic.",
"tags": "Commercial / Night",
"compatibleLenses": ["Panavision C-Series", "Cooke S7/i", "Canon K-35", "Arri Signature", "Laowa Probe"],
"physics": "Dual ISO digital sensor, clean shadows, modern color science, high frequency detail"
},
{
"value": "Red V-Raptor",
"display": "Red V-Raptor",
"sensorFormat": "Full Frame",
"tooltip": "Hyper-Real Action. Perfect for high-speed motion, sports, and razor-sharp detail.",
"tags": "Action / Sports",
"compatibleLenses": ["Panavision C-Series", "Cooke S7/i", "Canon K-35", "Arri Signature", "Laowa Probe"],
"physics": "RedCode RAW 8K, clinical sharpness, high shutter angle clarity, hyper-realistic texture"
},
{
"value": "Arriflex 416",
"display": "Arriflex 416",
"sensorFormat": "Super 16 (Film)",
"tooltip": "Gritty & Nostalgic. High grain, soft focus, and vibrant, messy colors. The 'Indie' look.",
"tags": "Vintage / Music Video",
"compatibleLenses": ["Zeiss Super Speed", "Laowa Probe"],
"physics": "Super 16mm film gate, heavy grain structure, soft optical resolution, vibrant chemical color"
},
{
"value": "Arricam LT",
"display": "Arricam LT",
"sensorFormat": "35mm (Film)",
"tooltip": "The Golden Age. Fine grain, organic texture, and rich colors. The classic movie look before digital.",
"tags": "Period Piece / Premium",
"compatibleLenses": ["Panavision C-Series", "Cooke S7/i", "Canon K-35", "Laowa Probe"],
"physics": "35mm motion picture film stock, organic grain structure, halation on highlights, photochemical dynamic range"
},
{
"value": "Fujifilm GFX 100",
"display": "Fujifilm GFX 100",
"sensorFormat": "Medium Format",
"tooltip": "The Studio Master. Massive resolution and depth. Unbeatable for print-quality stills.",
"tags": "Product / Fashion",
"compatibleLenses": ["Fujinon GF", "Arri Signature", "Laowa Probe"],
"physics": "Medium format digital sensor, zero circle of confusion, extreme resolution, pore-level detail"
},
{
"value": "Phantom Flex4K",
"display": "Phantom Flex4K",
"sensorFormat": "Super 35",
"tooltip": "The Time Machine. 1000fps slow motion.",
"tags": "High-Speed / Sports",
"compatibleLenses": ["Panavision C-Series", "Cooke S7/i", "Canon K-35", "Laowa Probe", "Angénieux Optimo"],
"physics": "High-speed global shutter sensor, frozen fluid dynamics, zero motion blur, deep saturation, specialized for 1000fps playback"
},
{
"value": "Blackmagic URSA Cine 12K",
"display": "URSA Cine 12K",
"sensorFormat": "Full Frame",
"tooltip": "Resolution Monster. Infinite reframing capability.",
"tags": "Future-Proof / VFX",
"compatibleLenses": ["Panavision C-Series", "Cooke S7/i", "Canon K-35", "Arri Signature", "Laowa Probe", "Canon TS-E", "Angénieux Optimo"],
"physics": "12K RGB sensor, extreme resolution, zero aliasing, distinct non-bayer pattern texture, analytics-grade sharpness"
}
]
LENS_DATA = [
{
"value": "Panavision C-Series",
"display": "Panavision C-Series",
"compatibleFormats": ["Super 35", "35mm"],
"tooltip": "Classic Widescreen. Horizontal blue flares, oval bokeh. The sci-fi blockbuster look.",
"keywords": "Flares, Oval Bokeh",
"physics": "anamorphic optics, characteristic oval bokeh, horizontal blue lens flares, slight barrel distortion"
},
{
"value": "Cooke S7/i",
"display": "Cooke S7/i",
"compatibleFormats": ["Full Frame", "Super 35", "35mm"],
"tooltip": "The 'Cooke Look.' Warm, gentle, and incredibly flattering. Gold standard for portraits.",
"keywords": "Warmth, Face Focus",
"physics": "Cooke speed panchrio look, warm color rendering, gentle focus falloff, flattering face compression"
},
{
"value": "Canon K-35",
"display": "Canon K-35",
"compatibleFormats": ["Full Frame", "Super 35", "35mm"],
"tooltip": "Dreamy & Retro. Low contrast, glowing highlights. 1970s/80s vibe.",
"keywords": "Glow, Retro",
"physics": "vintage aspherical elements, glowing highlights, low contrast, rainbow flaring, soft sharpness"
},
{
"value": "Arri Signature",
"display": "Arri Signature",
"compatibleFormats": ["Large Format", "Full Frame"],
"tooltip": "Modern Perfection. Ultra-clean, no distortion, pure reality. The invisible lens.",
"keywords": "Clean, Realistic",
"physics": "telecentric optical design, zero breathing, ultra-flat field, modern rendering, pure black levels"
},
{
"value": "Zeiss Super Speed",
"display": "Zeiss Super Speed",
"compatibleFormats": ["Super 16 ONLY"],
"tooltip": "The 16mm Classic. Sharp but textured. Designed specifically for the smaller 16mm film frame.",
"keywords": "Triangular Bokeh, Grit",
"physics": "vintage high-speed glass, triangular bokeh at wide apertures, chromatic aberration, gritty texture"
},
{
"value": "Fujinon GF",
"display": "Fujinon GF",
"compatibleFormats": ["Medium Format ONLY"],
"tooltip": "Studio Glass. Clinically sharp, specifically designed for the massive GFX sensor.",
"keywords": "Clinical Sharpness",
"physics": "modern medium format optics, clinical edge-to-edge sharpness, zero distortion, high micro-contrast"
},
{
"value": "Laowa Probe",
"display": "Laowa Probe",
"compatibleFormats": ["All Formats"],
"tooltip": "Insect-Eye View. Extreme close-ups of small objects/textures.",
"keywords": "Macro",
"physics": "macro bug-eye perspective, extreme depth of field, tubular lens construction, surreal wide-angle macro"
},
{
"value": "Helios 44-2",
"display": "Helios 44-2 (Vintage)",
"compatibleFormats": ["Full Frame", "Super 35", "35mm"],
"tooltip": "Swirly Bokeh. The cult classic.",
"keywords": "Swirly Bokeh, Vintage",
"physics": "Vintage Soviet glass, characteristic swirly bokeh at edges, low contrast flaring, soft center focus, dreamlike aberrations"
},
{
"value": "Canon TS-E",
"display": "Canon Tilt-Shift",
"compatibleFormats": ["Full Frame", "Medium Format"],
"tooltip": "Miniature Effect. Selective focus control.",
"keywords": "Tilt-Shift, Miniature",
"physics": "Tilted focal plane, miniature faking effect, selective focus slice, corrected perspective lines, architectural rigidity"
},
{
"value": "Angénieux Optimo",
"display": "Angénieux Optimo",
"compatibleFormats": ["Super 35", "Full Frame"],
"tooltip": "The Hollywood Zoom. Perfect versatility.",
"keywords": "Cinema Zoom",
"physics": "Cinema zoom optics, warm organic contrast, breathing-free focus pulls, uniform field illumination"
}
]
APPLICATION_DATA = [
{
"value": "Portrait Studio",
"lighting": "Rembrandt lighting, softbox diffusion, 3-point setup",
"defaultCamera": "Arri Alexa 35",
"defaultLens": "Cooke S7/i",
"focusType": "stylistic"
},
{
"value": "Product (Crisp)",
"lighting": "Infinity curve, bright diffuse lighting, shadowless, high key",
"defaultCamera": "Fujifilm GFX 100",
"defaultLens": "Fujinon GF",
"focusType": "realism"
},
{
"value": "Food Photography",
"lighting": "Natural window light simulation, back-lighting for steam/texture, warm reflector fill, medium depth of field, focus on texture",
"defaultCamera": "Sony Venice 2",
"defaultLens": "Cooke S7/i",
"focusType": "stylistic"
},
{
"value": "Golden Hour (Outdoor)",
"lighting": "Sun low on horizon, warm orange glow, long dramatic shadows, volumetric backlight, magic hour atmosphere, cinematic depth",
"defaultCamera": "Arricam LT",
"defaultLens": "Cooke S7/i",
"focusType": "stylistic",
"example": "A vintage Lancia Stratos rally car drifting sideways on a dirt track, kicking up a massive wall of dust that glows incandescent gold in the backlight, creating a dramatic silhouette against the sunset."
},
{
"value": "Blue Hour (City)",
"lighting": "Twilight, deep blue ambient sky light contrasting with warm practical street lamps, moody, atmospheric, balanced exposure",
"defaultCamera": "Sony Venice 2",
"defaultLens": "Arri Signature",
"focusType": "stylistic"
},
{
"value": "Neon Cyberpunk",
"lighting": "Harsh neon signage, mixed color temp, wet reflections",
"defaultCamera": "Red V-Raptor",
"defaultLens": "Panavision C-Series",
"focusType": "stylistic"
},
{
"value": "Nostalgic Memory",
"lighting": "Hazy atmosphere, overexposed highlights, light leaks, warm color grade, sentimental mood, soft focus throughout",
"defaultCamera": "Arriflex 416",
"defaultLens": "Zeiss Super Speed",
"focusType": "stylistic"
},
{
"value": "Corporate Headshot",
"lighting": "Clean white background, high-key lighting, professional balanced fill, sharp focus on eyes, moderate depth of field",
"defaultCamera": "Fujifilm GFX 100",
"defaultLens": "Cooke S7/i",
"focusType": "realism"
},
{
"value": "Macro: Luxury Jewelry",
"lighting": "Sparkling point-source lighting, black velvet background, high contrast reflection control, focus stacking simulation for complete sharpness",
"defaultCamera": "Fujifilm GFX 100",
"defaultLens": "Laowa Probe",
"focusType": "realism"
},
{
"value": "Macro: Nature Details",
"lighting": "Diffused natural sunlight, shallow depth of field, vibrant greens, morning dew, microscopic texture",
"defaultCamera": "Arri Alexa 35",
"defaultLens": "Laowa Probe",
"focusType": "stylistic"
},
{
"value": "Wildlife / Safari",
"lighting": "Telephoto compression, frozen motion, golden hour backlight, natural habitat, separation from background",
"defaultCamera": "Red V-Raptor",
"defaultLens": "Cooke S7/i",
"focusType": "stylistic"
},
{
"value": "Sports Action",
"lighting": "High shutter speed, frozen particles/sweat, stadium floodlights, dynamic composition, sharp subject focus",
"defaultCamera": "Red V-Raptor",
"defaultLens": "Panavision C-Series",
"focusType": "stylistic"
},
{
"value": "Street Photography",
"lighting": "Candid moment, natural available light, messy urban background, hyperfocal distance, deep depth of field, everything in focus",
"defaultCamera": "Arriflex 416",
"defaultLens": "Canon K-35",
"focusType": "realism"
},
{
"value": "Architecture",
"lighting": "Balanced mixed lighting, straight lines, airy atmosphere",
"defaultCamera": "Fujifilm GFX 100",
"defaultLens": "Fujinon GF",
"focusType": "realism"
},
{
"value": "Fashion Editorial",
"lighting": "Avant-garde lighting, colored gels, stark shadows, high fashion pose, studio backdrop, stylized depth",
"defaultCamera": "Sony Venice 2",
"defaultLens": "Canon K-35",
"focusType": "stylistic"
},
{
"value": "Cinematic Horror",
"lighting": "Underexposed, single harsh source (flashlight), heavy shadows",
"defaultCamera": "Arricam LT",
"defaultLens": "Canon K-35",
"focusType": "stylistic"
},
{
"value": "Docu / Realism",
"lighting": "Natural window key light, negative fill, messy authentic background",
"defaultCamera": "Arri Alexa 35",
"defaultLens": "Arri Signature",
"focusType": "realism"
},
{
"value": "Symmetrical Whimsy",
"lighting": "Shadowless high-key lighting, flat diorama aesthetic, vibrant pastel color palette, 90-degree planimetric composition",
"defaultCamera": "Arricam LT",
"defaultLens": "Cooke S7/i",
"focusType": "stylistic"
},
{
"value": "IMAX Scale Epic",
"lighting": "Naturalistic practical lighting, cool color temperature, high contrast, immense sense of scale, deep depth of field",
"defaultCamera": "Arri Alexa 35",
"defaultLens": "Arri Signature",
"focusType": "realism"
},
{
"value": "Clinical Thriller",
"lighting": "Low-key chiaroscuro, controlled shadows, sickly green/yellow color grade, precise stabilized motion",
"defaultCamera": "Red V-Raptor",
"defaultLens": "Arri Signature",
"focusType": "stylistic"
},
{
"value": "Brutalist Atmosphere",
"lighting": "Single source silhouette, atmospheric haze, monochromatic orange/sepia tones, stark geometry, visual silence",
"defaultCamera": "Arri Alexa 35",
"defaultLens": "Panavision C-Series",
"focusType": "stylistic"
},
{
"value": "Technicolor Dream",
"lighting": "Artificial studio lighting, high saturation, vibrant pinks and cyans, glossy plastic textures, high-key brightness",
"defaultCamera": "Arri Alexa 35",
"defaultLens": "Cooke S7/i",
"focusType": "stylistic"
},
{
"value": "Obsessive Symmetry",
"lighting": "One-point perspective, deep focus, wide angle distortion, cold practical lighting, clinical perfection",
"defaultCamera": "Arricam LT",
"defaultLens": "Arri Signature",
"focusType": "realism"
},
{
"value": "Hong Kong Nostalgia",
"lighting": "Step-printing effect, motion blur, neon-soaked humidity, intimate handheld, rain-slicked textures",
"defaultCamera": "Arriflex 416",
"defaultLens": "Zeiss Super Speed",
"focusType": "stylistic"
},
{
"value": "Industrial Haze",
"lighting": "Volumetric lighting, visible shafts of light (god rays), atmospheric haze, high-density industrial detail",
"defaultCamera": "Arri Alexa 35",
"defaultLens": "Panavision C-Series",
"focusType": "stylistic"
},
{
"value": "Gothic Fantasy",
"lighting": "German Expressionist lighting, high contrast long shadows, twisted geometry, desaturated palette",
"defaultCamera": "Arri Alexa 35",
"defaultLens": "Panavision C-Series",
"focusType": "stylistic"
},
{
"value": "LED Volume (Virtual Production)",
"lighting": "Interactive environmental lighting, soft ambient wrap from LED panels, perfect reflection matching, zero green spill",
"defaultCamera": "Arri Alexa 35",
"defaultLens": "Arri Signature",
"focusType": "realism"
},
{
"value": "Automotive: Showroom",
"lighting": "Massive softbox ceiling, continuous highlight lines along bodywork, negative fill to shape curves, pure white infinity cove",
"defaultCamera": "Sony Venice 2",
"defaultLens": "Arri Signature",
"focusType": "realism",
"example": "A silver concept car parked on a pure white infinity curve, continuous highlight lines tracing the aerodynamic bodywork."
},
{
"value": "Knolling / Flat Lay",
"lighting": "Overhead soft diffuse light, shadowless cavity, high-key evenness, precise grid alignment",
"defaultCamera": "Fujifilm GFX 100",
"defaultLens": "Fujinon GF",
"focusType": "realism"
},
{
"value": "Conflict Photography",
"lighting": "Harsh midday sun, atmospheric dust and smoke, high contrast, documentary style reality, blown highlights, raw and unpolished",
"defaultCamera": "Arriflex 416",
"defaultLens": "Zeiss Super Speed",
"focusType": "realism"
},
{
"value": "NYC Street Editorial",
"lighting": "Natural city canyon light, bounce board fill for face, sharp modern contrast, motion blur in background, high-resolution gloss",
"defaultCamera": "Sony Venice 2",
"defaultLens": "Arri Signature",
"focusType": "stylistic"
},
{
"value": "Underground Rave / Flash",
"lighting": "Direct on-camera flash with slow shutter drag (rear-curtain sync), light trails, laser rim lighting, sweaty atmosphere, darkness crushing the background",
"defaultCamera": "Red V-Raptor",
"defaultLens": "Helios 44-2",
"focusType": "stylistic"
},
{
"value": "Architectural Digest Interior",
"lighting": "North-facing window soft light, large diffusion frames, negative fill for contrast, texture-raking angle, perfectly balanced exposure",
"defaultCamera": "Fujifilm GFX 100",
"defaultLens": "Canon TS-E",
"focusType": "realism"
},
{
"value": "90s Grunge Editorial",
"lighting": "Hard direct flash, dirty green/yellow color cast, vignetting, unretouched skin texture, claustrophobic framing",
"defaultCamera": "Arriflex 416",
"defaultLens": "Canon K-35",
"focusType": "stylistic"
},
{
"value": "Cassette Futurism (Retro Sci-Fi)",
"lighting": "Flickering CRT monitor glow, harsh overhead fluorescent strips, brutalist shadows, beige and grey color palette, industrial haze",
"defaultCamera": "Arriflex 416",
"defaultLens": "Panavision C-Series",
"focusType": "stylistic"
},
{
"value": "Tech Commercial (Macro)",
"lighting": "Slow moving light sweep (motion control), brushed metal reflections, dramatic rim lighting in a black void, sub-surface scattering on materials",
"defaultCamera": "Arri Alexa 35",
"defaultLens": "Laowa Probe",
"focusType": "realism"
},
{
"value": "Surreal Infrared",
"lighting": "Full spectrum daylight, false color infrared shift (foliage turns pink/white), deep blue skies, high contrast, dreamlike atmosphere",
"defaultCamera": "Arricam LT",
"defaultLens": "Canon K-35",
"focusType": "stylistic"
},
{
"value": "Spaghetti Western",
"lighting": "Harsh high-noon sun, heat haze distortion, sweaty skin texture, extreme close-up on eyes, deep depth of field",
"defaultCamera": "Arricam LT",
"defaultLens": "Angénieux Optimo",
"focusType": "stylistic"
},
{
"value": "Automotive: Process Trailer",
"lighting": "Dynamic passing street lights, rhythmic shadow movement, wet road reflections, motion blur on background only",
"defaultCamera": "Arri Alexa 35",
"defaultLens": "Angénieux Optimo",
"focusType": "stylistic",
"example": "A black sports car speeding through a tunnel, dynamic motion blur on the tunnel lights, sharp focus on the car badge."
},
{
"value": "Product (Liquid/Splash)",
"lighting": "High-speed strobe lighting, frozen droplets, backlit fluid translucency, crystal clear refraction",
"defaultCamera": "Phantom Flex4K",
"defaultLens": "Laowa Probe",
"focusType": "stylistic",
"example": "A strawberry dropping into milk, creating a perfect crown splash, frozen in mid-air with high-speed strobe lighting."
},
{
"value": "VFX / Green Screen",
"lighting": "Raw chromakey plate, perfectly flat shadowless green background, distinct rim light for separation, zero color spill, high-fidelity capture",
"defaultCamera": "Blackmagic URSA Cine 12K",
"defaultLens": "Arri Signature",
"focusType": "realism",
"example": "A raw chromakey plate of a superhero in a landing pose, completely isolated against a flat, pure digital green background, sharp focus, ready for compositing."
},
{
"value": "Custom",
"lighting": "User-defined lighting setup",
"defaultCamera": None,
"defaultLens": None,
"focusType": "stylistic"
}
]
# Aspect Ratio Map (Frontend -> Prose)
ASPECT_RATIO_PROSE = {
'16:9': 'A cinematic 16:9 composition featuring',
'2.39:1': 'A widescreen anamorphic-ratio composition featuring',
'4:3': 'A classic 4:3 format composition featuring',
'1:1': 'A square format composition featuring',
'9:16': 'A vertical full-screen composition featuring'
}
# Negative Constraints
CONSTRAINT_MAP = {
'Architecture': 'Strictly AVOID: messy, dirt, grime, imperfections, motion blur, handheld, shaky',
'Product (Crisp)': 'Strictly AVOID: messy, dirt, grime, imperfections, motion blur, handheld, shaky',
'Corporate Headshot': 'Strictly AVOID: shadow over eyes, silhouette, dark, moody, gritty, high contrast',
'Portrait Studio': 'Strictly AVOID: shadow over eyes, silhouette, dark, moody, gritty, high contrast',
'Cinematic Horror': 'Strictly AVOID: bright, cheerful, clean, pristine, high-key, sunshine',
'Nostalgic Memory': 'Strictly AVOID: bright, cheerful, clean, pristine, high-key, sunshine'
}
# Smart Fill Context
SMART_FILL_CONTEXT = {
'Neon Cyberpunk': 'Hovering vehicle, rain, neon lights',
'Golden Hour (Outdoor)': 'Vintage convertible, dust kicking up, lens flare',
'Cinematic Horror': 'Distressed clothing, expressions of terror, flashlight beams cutting through fog, unseen threat in shadows',
'Corporate Headshot': 'Business professional attire, confident posture, subtle smile, perfectly groomed',
'Portrait Studio': 'Professional studio setup, controlled lighting, posed subject',
'Fashion Editorial': 'High fashion couture, avant-garde styling, dramatic poses, editorial expression',
'Street Photography': 'Authentic street fashion, candid moments, urban environment, real people',
'Blue Hour (City)': 'Urban nightlife fashion, city lights reflecting, atmospheric fog, metropolitan energy',
'Wildlife / Safari': 'Natural habitat, majestic animals, golden savanna light, environmental storytelling',
'Symmetrical Whimsy': 'Perfectly centered vintage car, pastel luggage on roof, quirkily dressed driver',
'IMAX Scale Epic': 'Lone rover traversing massive alien glacier, tiny against the landscape',
'Clinical Thriller': 'Sterile hospital corridor, flickering fluorescent light, solitary figure',
'Brutalist Atmosphere': 'Concrete monolith, lone figure dwarfed by structure, dust particles',
'Technicolor Dream': 'Plastic fantastic furniture, bubble machines, candy-colored wardrobe',
'Obsessive Symmetry': 'Identical twins in matching outfits, geometric patterns, perfect alignment',
'Hong Kong Nostalgia': 'Taxi in heavy rain, neon lights reflecting on wet glass, lonely passenger',
'Industrial Haze': 'Factory interior, steam pipes, worker silhouette against machinery',
'Gothic Fantasy': 'Twisted architecture, dramatic cape, fog machine atmosphere'
}
# ==========================================
# HELPER FUNCTIONS
# ==========================================
def get_aspect_ratio_prose(ratio: str) -> str:
return ASPECT_RATIO_PROSE.get(ratio, '')
def get_negative_constraints(app_name: str) -> str:
return CONSTRAINT_MAP.get(app_name, '')
def get_smart_fill_context(app_name: str) -> str:
return SMART_FILL_CONTEXT.get(app_name, '')
def get_camera_texture_keywords(camera_name: str) -> str:
for cam in CAMERA_DATA:
if cam["value"] == camera_name:
return cam["physics"]
return ""
def get_lens_physics(lens_name: str) -> str:
for le in LENS_DATA:
if le["value"] == lens_name:
return le["physics"]
return ""
def get_app_data(app_name: str) -> Optional[Dict]:
for app in APPLICATION_DATA:
if app["value"] == app_name:
return app
return None
# ==========================================
# CINE PROMPT LOGIC
# ==========================================
async def enhance_cine_prompt(
prompt: str,
application: str,
camera: str,
lens: str,
aspect_ratio: str,
creative_freedom: float,
language: str = "en"
) -> Dict[str, Any]:
"""
Generate the 'CinePrompt' high-fidelity prompt using Gemini.
Replicates the 'CinePromptStudio.jsx' logic on the backend.
"""
import google.generativeai as genai
if not settings.google_api_key:
return {"note": "Google API Key missing for CinePrompt"}
try:
genai.configure(api_key=settings.google_api_key)
# Use the most capable model
model = genai.GenerativeModel("gemini-2.0-flash-exp")
# Gather context
app_data = get_app_data(application)
aspect_ratio_prose = get_aspect_ratio_prose(aspect_ratio)
negative_constraints = get_negative_constraints(application)
smart_fill_context = get_smart_fill_context(application)
camera_physics = get_camera_texture_keywords(camera)
lens_physics = get_lens_physics(lens)
word_count = len(prompt.split())
# Construct System Prompt (The 'Master System Prompt')
system_prompt = f"""You are an expert Cinematographer and Optical Physicist.
Your goal is to Create a "Thick Description" prompt that emphasizes TEXTURE, ATMOSPHERE, and LIGHT.
INPUT VARIABLES:
- Scene: {prompt}
- Application: {application}
- Camera: {camera}
- Lens: {lens}
- Aspect Ratio Prose: {aspect_ratio_prose}
- Creative Freedom: {creative_freedom}
- Camera Physics: {camera_physics}
- Lens Physics: {lens_physics}
EXECUTION STEPS:
1. **SMART FILL CHECK:**
- IF input is short ({word_count} words < 10) AND Creative Freedom is High ({creative_freedom} > 0.5):
INVENT details based on Application: {f'"{smart_fill_context}"' if smart_fill_context else 'Use application context'}.
2. **REALISM vs. STYLE CHECK:**
{'- Application is Architecture, Product, or Documentary: FORCE Deep Focus. Suppress "bokeh". Describe "clinical sharpness."' if app_data and app_data.get('focusType') == 'realism' else '- Application is Narrative/Portrait: ENHANCE "bokeh", "flares", stylistic elements.'}
3. **SENSORY ENHANCEMENT (THE "DEEP TEXTURE" PASS):**
- **Skin:** If humans are present, describe skin texture (e.g., "visible pores," "slight sweat sheen," "natural imperfections").
- **Air:** Describe the air quality (e.g., "humid haze," "crisp winter air," "dust motes in light beams").
- **Light:** Describe the *quality* of light (e.g., "diffused window light," "specular highlights").
- **Camera Sensor Physics:** Apply these characteristics: "{camera_physics}"
4. **OPTICAL CHARACTERISTICS:**
- **Lens Physics:** Apply these optical traits: "{lens_physics}"
5. **ASSEMBLY:**
- Start with {aspect_ratio_prose}
- Expand subject with Smart Fill
- Apply {application} lighting defaults
- Inject Camera Physics (Sensor/Film Stock characteristics)
- Finish with Lens Physics (Optical characteristics)
OUTPUT FORMAT:
[Aspect Ratio Prose] + [Visual Subject] + [Sensory Atmosphere & Light] + [Camera Physics] + [Lens Physics].
STRICT OUTPUT FORMAT: Return ONLY the final visual description text. Do not use Markdown formatting. Do not include conversational filler.
{"Output in " + language if language != "en" else ""}
"""
# Generate
response = model.generate_content(system_prompt)
enhanced_text = response.text.strip()
return {
"enhanced_prompt": enhanced_text,
"negative_prompt": negative_constraints or "blurry, low quality, distorted, bad composition",
"style": application,
"technical_params": {
"camera": camera,
"lens": lens,
"aspect_ratio": aspect_ratio
}
}
except Exception as e:
return {"note": f"CinePrompt enhancement failed: {str(e)}"}

View file

@ -62,6 +62,16 @@ from app.models.job import Job
from app.models.asset import Asset
from app.config import settings
def determine_mime_type(data: bytes) -> str:
"""Detect MIME type from magic bytes"""
if data.startswith(b'\x89PNG\r\n\x1a\n'):
return 'image/png'
elif data.startswith(b'\xff\xd8'):
return 'image/jpeg'
elif data.startswith(b'RIFF') and data[8:12] == b'WEBP':
return 'image/webp'
return 'image/png' # Default fallback
# Provider configurations
IMAGE_PROVIDERS = {
"openai": {
@ -94,8 +104,8 @@ IMAGE_PROVIDERS = {
},
"nano-banana": {
"name": "Nano Banana (Gemini Image)",
"models": ["gemini-2.5-flash-image", "gemini-3-pro-image-preview"],
"default_model": "gemini-2.5-flash-image",
"models": ["gemini-3-pro-image-preview", "gemini-2.0-flash-exp"],
"default_model": "gemini-3-pro-image-preview",
"aspect_ratios": ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"],
"image_sizes": ["1K", "2K", "4K"],
"supports_text_rendering": True,
@ -215,14 +225,18 @@ async def generate(job_id: str):
# Fetch reference image if provided
ref_id = input_data.get("reference_asset_id")
ref_image_data = None
ref_mime_type = "image/png" # Default
if ref_id:
ref_asset = db.query(Asset).filter(Asset.id == ref_id).first()
if ref_asset and os.path.exists(ref_asset.file_path):
with open(ref_asset.file_path, "rb") as f:
ref_image_data = f.read()
if ref_asset.mime_type:
ref_mime_type = ref_asset.mime_type
image_data, filename = await _generate_nano_banana(input_data, ref_image_data)
job.api_model = input_data.get("model", "gemini-2.5-flash-image")
image_data, filename = await _generate_nano_banana(input_data, ref_image_data, ref_mime_type)
job.api_model = input_data.get("model", "gemini-3-pro-image-preview")
elif provider == "stable-diffusion":
image_data, filename = await _generate_stability(input_data)
job.api_model = input_data.get("model", "sd3.5-large")
@ -281,6 +295,62 @@ async def generate(job_id: str):
job.output_asset_ids = [asset.id]
job.output_data = {"asset_id": str(asset.id), "file_path": file_path}
# Log Usage
try:
from app.utils.logging import log_model_usage
# Other imports are available globally
# Placeholder values for logging, these would ideally be returned by _generate_ functions
# For now, we'll use what's available from input_data and job.api_model
model = job.api_model
width = input_data.get("width")
height = input_data.get("height")
n = input_data.get("n", 1) # Number of images requested
ext = "png" # Default, actual ext should come from _generate_ functions
# Use existing asset data for logging
output_asset_ids = job.output_asset_ids or []
output_paths = []
if job.output_data and "file_path" in job.output_data:
output_paths.append(job.output_data["file_path"])
duration_ms = 0
if job.started_at:
duration_ms = int((datetime.utcnow() - job.started_at).total_seconds() * 1000)
log_model_usage(
db=db,
job_id=str(job.id),
user_id=str(job.user_id),
module="image_generator",
action="generate",
provider=provider,
model=model,
usage_stats={
"images": len(output_asset_ids),
"processing_time_ms": duration_ms
},
request_metadata={
"prompt": prompt,
"negative_prompt": input_data.get("negative_prompt"),
"size": f"{width}x{height}" if width and height else None,
"n": n
},
response_metadata={
"output_assets": [str(a_id) for a_id in output_asset_ids],
"filenames": [os.path.basename(p) for p in output_paths]
}
)
except Exception as log_e:
logger.error(f"Failed to log image generation usage: {log_e}")
job.output_asset_ids = output_asset_ids
job.output_data = {
"prompt": prompt,
"provider": provider,
"model": model,
"image_paths": output_paths
}
job.progress = 100
job.status = "completed"
job.completed_at = datetime.utcnow()
@ -865,10 +935,68 @@ async def _generate_imagen(input_data: dict) -> tuple:
return None, None
async def _generate_nano_banana(input_data: dict, image_data: Optional[bytes] = None) -> tuple:
async def _upload_file_http(media_data: bytes, mime_type: str) -> Optional[str]:
"""
Generate image using Nano Banana (Gemini 2.5 Flash Image model)
Model: gemini-2.5-flash-image (native image generation)
Upload file using raw HTTP to Google Generative AI Files API
(Alternative to outdated google-generativeai library)
Returns: file_uri
"""
if not settings.google_api_key:
return None
try:
url = f"https://generativelanguage.googleapis.com/upload/v1beta/files?key={settings.google_api_key}"
num_bytes = len(media_data)
headers = {
"X-Goog-Upload-Protocol": "resumable",
"X-Goog-Upload-Command": "start",
"X-Goog-Upload-Header-Content-Length": str(num_bytes),
"X-Goog-Upload-Header-Content-Type": mime_type,
"Content-Type": "application/json"
}
metadata = {"file": {"display_name": f"nano_banana_upload_{uuid4()}"}}
async with httpx.AsyncClient(timeout=30.0) as client:
# 1. Start Upload
response = await client.post(url, headers=headers, json=metadata)
if response.status_code != 200:
logger.error(f"Failed to start upload: {response.status_code} - {response.text}")
return None
upload_url = response.headers.get("x-goog-upload-url")
if not upload_url:
logger.error("No upload URL returned")
return None
# 2. Upload Bytes
headers_upload = {
"Content-Length": str(num_bytes),
"X-Goog-Upload-Offset": "0",
"X-Goog-Upload-Command": "upload, finalize"
}
response_upload = await client.post(upload_url, headers=headers_upload, content=media_data)
if response_upload.status_code != 200:
logger.error(f"Failed to upload data: {response_upload.status_code} - {response_upload.text}")
return None
data = response_upload.json()
file_uri = data.get("file", {}).get("uri")
logger.info(f"File uploaded successfully: {file_uri}")
return file_uri
except Exception as e:
logger.error(f"Upload error: {e}")
return None
async def _generate_nano_banana(input_data: dict, image_data: Optional[bytes] = None, mime_type: str = "image/png") -> tuple:
"""
Generate image using Nano Banana (Gemini 3 Pro Image)
Model: gemini-3-pro-image-preview
Uses File API for strict visual context adherence.
"""
if not settings.google_api_key:
raise ValueError("GOOGLE_API_KEY not configured")
@ -877,30 +1005,65 @@ async def _generate_nano_banana(input_data: dict, image_data: Optional[bytes] =
if not prompt:
raise ValueError("Prompt is required")
# Use gemini-2.5-flash-image model for native image generation
model_name = input_data.get("model", "gemini-2.5-flash-image")
import google.generativeai as genai
import tempfile
import os
import base64
genai.configure(api_key=settings.google_api_key)
# Use gemini-3-pro-image-preview as requested by user
model_name = input_data.get("model", "gemini-3-pro-image-preview")
if model_name in ["gemini-2.5-flash-image", "gemini-2.0-flash-exp"]:
model_name = "gemini-3-pro-image-preview"
url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent"
# Build payload with text and optional image
# Build payload with image first (context) then text (instruction)
# Build payload - EXACTLY matching PHP structure (Image FIRST, then Text)
parts = []
if image_data:
import base64
# Robust MIME detection
real_mime_type = determine_mime_type(image_data)
# PHP uses inline_data (snake_case) and base64
# It forces image/jpeg in PHP. We will do the same to match the reference implementation exactly.
b64_image = base64.b64encode(image_data).decode("utf-8")
parts.append({
"inlineData": {
"mimeType": "image/png",
"inline_data": {
"mime_type": "image/jpeg",
"data": b64_image
}
})
logger.info(f"Nano Banana: Added reference image ({len(image_data)} bytes) to payload")
logger.info(f"Nano Banana: Added reference image (inline_data base64, {len(b64_image)} chars)")
# Text Instruction Second
parts.append({"text": prompt})
# Construct generation config
gen_config = {
"responseModalities": ["IMAGE"]
}
# Map aspect ratio if present
ar_map = {
"1:1": "1:1", "16:9": "16:9", "9:16": "9:16",
"4:3": "4:3", "3:4": "3:4"
}
input_ar = input_data.get("aspect_ratio", "1:1")
if input_ar in ar_map:
gen_config["imageConfig"] = {
"aspectRatio": ar_map[input_ar],
"imageSize": input_data.get("image_size", "2K") # PHP supports imageSize
}
payload = {
"contents": [{
"parts": parts
}]
}],
"generationConfig": gen_config
}
try:
@ -916,29 +1079,46 @@ async def _generate_nano_banana(input_data: dict, image_data: Optional[bytes] =
logger.info(f"Nano Banana response status: {response.status_code}")
if response.status_code == 200:
data = response.json()
logger.info(f"Nano Banana response keys: {data.keys() if isinstance(data, dict) else 'not a dict'}")
# Extract image from response
candidates = data.get("candidates", [])
if candidates and len(candidates) > 0:
content = candidates[0].get("content", {})
parts = content.get("parts", [])
for part in parts:
if "inlineData" in part:
inline_data = part["inlineData"]
if "data" in inline_data:
import base64
image_data = base64.b64decode(inline_data["data"])
filename = f"nano_banana_{uuid4()}.png"
logger.info(f"✓ Nano Banana generated image: {len(image_data)} bytes")
return image_data, filename
logger.warning(f"Nano Banana: No image data in response. Response: {str(data)[:200]}")
else:
if response.status_code != 200:
logger.error(f"Nano Banana API error: {response.status_code} - {response.text}")
# Try to parse error message
try:
err_json = response.json()
err_msg = err_json.get("error", {}).get("message", response.text)
logger.error(f"Nano Banana Error Details: {err_msg}")
except:
pass
return None, None
data = response.json()
# logger.info(f"Nano Banana response: {data}")
# Extract image from response - supporting both inline_data and inlineData
candidates = data.get("candidates", [])
if candidates and len(candidates) > 0:
content = candidates[0].get("content", {})
parts_resp = content.get("parts", [])
for part in parts_resp:
# Check snake_case first (PHP match)
if "inline_data" in part:
inline_data = part["inline_data"]
if "data" in inline_data:
img_bytes = base64.b64decode(inline_data["data"])
filename = f"nano_banana_{uuid4()}.png"
return img_bytes, filename
# Check camelCase (Standard Gemini)
if "inlineData" in part:
inline_data = part["inlineData"]
if "data" in inline_data:
img_bytes = base64.b64decode(inline_data["data"])
filename = f"nano_banana_{uuid4()}.png"
return img_bytes, filename
logger.warning(f"Nano Banana: No image data in response. Content: {content}")
else:
logger.warning(f"Nano Banana: No candidates in response.")
except Exception as e:
logger.error(f"Nano Banana generation error: {e}")

File diff suppressed because it is too large Load diff

View file

@ -328,6 +328,45 @@ async def upscale(job_id: str):
job.output_asset_ids = [output_asset.id]
job.output_data = {"asset_id": str(output_asset.id), "file_path": file_path}
logger.info(f"✓ Topaz upscale completed: Asset {output_asset.id} created")
# Log Usage
try:
from app.utils.logging import log_model_usage
# Topaz typically charges per megapixel or image
# We seeded it as 'cost_per_image' ($0.20 buffer) for 'topaz' provider
# Calculate duration
duration_ms = 0
if job.started_at:
duration_ms = int((datetime.utcnow() - job.started_at).total_seconds() * 1000)
log_model_usage(
db=db,
job_id=str(job.id),
user_id=str(job.user_id),
module="image_upscaler",
action="upscale",
provider="topaz",
model=model, # e.g. "Proteus"
usage_stats={
"images": 1,
"processing_time_ms": duration_ms
},
request_metadata={
"input_file": input_asset.original_filename,
"scale": scale,
"original_dims": f"{original_width}x{original_height}",
"output_dims": f"{output_width}x{output_height}"
},
response_metadata={
"output_file": filename,
"topaz_request_id": request_id
}
)
except Exception as log_e:
logger.error(f"Failed to log usage stats: {log_e}")
job.progress = 100
job.status = "completed"
job.completed_at = datetime.utcnow()

View file

@ -256,7 +256,13 @@ async def enhance(
provider: str = "openai",
include_negative: bool = True,
include_technical: bool = True,
language: str = "en"
language: str = "en",
# Advanced CinePrompt params
application: Optional[str] = None,
camera: Optional[str] = None,
lens: Optional[str] = None,
aspect_ratio: Optional[str] = None,
creative_freedom: float = 0.3
) -> dict:
"""Enhance a prompt using AI
@ -267,10 +273,28 @@ async def enhance(
include_negative: Whether to generate negative prompts
include_technical: Whether to include technical parameters
language: Output language code
application: CinePrompt Application preset (triggers CinePrompt mode if set)
camera: CinePrompt Camera
lens: CinePrompt Lens
aspect_ratio: CinePrompt Aspect Ratio
creative_freedom: CinePrompt Creative Freedom (0.0 - 1.0)
Returns:
Dictionary with enhanced prompt, negative prompt, and metadata
"""
# Check if this is a CinePrompt request (Application/Camera set)
if application or camera:
from app.services import cine_prompt_studio
return await cine_prompt_studio.enhance_cine_prompt(
prompt=prompt,
application=application or "Custom",
camera=camera or "Arri Alexa 35",
lens=lens or "Cooke S7/i",
aspect_ratio=aspect_ratio or "16:9",
creative_freedom=creative_freedom,
language=language
)
# Get style configuration
style_config = STYLE_CONFIGS.get(style, STYLE_CONFIGS["cinematic"])
@ -512,3 +536,14 @@ def get_style_info(style: str) -> Optional[Dict[str, Any]]:
"technical": config.get("technical", {}),
"negative_base": config.get("negative_base", "")
}
def get_cine_options() -> Dict[str, Any]:
"""Get all CinePrompt Studio options"""
from app.services import cine_prompt_studio
return {
"cameras": cine_prompt_studio.CAMERA_DATA,
"lenses": cine_prompt_studio.LENS_DATA,
"applications": cine_prompt_studio.APPLICATION_DATA,
"aspect_ratios": sorted(list(cine_prompt_studio.ASPECT_RATIO_PROSE.keys()))
}

View file

@ -129,6 +129,9 @@ FONT_PRESETS = {
def get_available_fonts():
"""Get list of available fonts on the system"""
try:
# Check if fc-list exists
subprocess.check_call(['which', 'fc-list'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
output = subprocess.check_output(['fc-list', ':', 'family'], stderr=subprocess.DEVNULL).decode('utf-8')
fonts = set()
for line in output.splitlines():
@ -141,9 +144,32 @@ def get_available_fonts():
return [
'Arial', 'Helvetica', 'Times New Roman', 'Courier New', 'Verdana',
'Georgia', 'Palatino', 'Garamond', 'Comic Sans MS', 'Trebuchet MS',
'Arial Black', 'Impact', 'Tahoma', 'Roboto', 'Open Sans'
'Arial Black', 'Impact', 'Tahoma', 'Roboto', 'Open Sans', 'DejaVu Sans'
]
def _check_font_availability(font_name: str) -> str:
"""Check if font exists, return validated font or fallback"""
available = get_available_fonts()
# Direct match
if font_name in available:
return font_name
# Case insensitive match
font_lower = font_name.lower()
for f in available:
if f.lower() == font_lower:
return f
# Fallback to defaults if specific font not found
defaults = ['Arial', 'Helvetica', 'DejaVu Sans', 'FreeSans']
for default in defaults:
if default in available:
return default
# Last resort (ffmpeg typically has a default sans serif)
return 'Arial'
def get_subtitle_config():
"""Return available configuration options for subtitles"""
@ -217,14 +243,19 @@ async def process(job_id: str):
font_preset = input_data.get("font_preset")
if font_preset and font_preset in FONT_PRESETS:
preset = FONT_PRESETS[font_preset]
font = input_data.get("font", preset['font'])
font_req = input_data.get("font", preset['font'])
font_size = input_data.get("font_size", preset['size'])
outline_width = input_data.get("outline_width", preset['outline'])
else:
font = input_data.get("font", "Arial")
font_req = input_data.get("font", "Arial")
font_size = input_data.get("font_size", 24)
outline_width = input_data.get("outline_width", 2)
# Validate font availability
font = _check_font_availability(font_req)
if font != font_req:
logger.warning(f"Font '{font_req}' not found, falling back to '{font}'")
text_color = input_data.get("text_color", "white")
outline_color = input_data.get("outline_color", "black")
background_color = input_data.get("background_color")
@ -284,6 +315,90 @@ async def process(job_id: str):
subtitle_content = _generate_srt(segments)
subtitle_ext = "srt"
# Helper validation for font
font = _check_font_availability(font_req)
# Check for provided subtitle file
subtitle_asset_id = input_data.get("subtitle_asset_id")
segments = []
detected_language = source_language
subtitle_content = ""
transcription_result = {} # To store result for output_data
if subtitle_asset_id:
logger.info("Using provided subtitle asset", asset_id=subtitle_asset_id)
sub_asset = db.query(Asset).filter(Asset.id == subtitle_asset_id).first()
if not sub_asset or not os.path.exists(sub_asset.file_path):
raise ValueError("Provided subtitle asset not found")
with open(sub_asset.file_path, "r", encoding="utf-8") as f:
subtitle_content = f.read()
# Simple assumption: Input is SRT if we are parsing it
# In future we might check extension
segments = _parse_srt(subtitle_content)
job.progress = 50 # Skip transcription
transcription_result = {"text": "Imported from SRT", "language": source_language}
else:
# Transcribe with Whisper
job.api_provider = f"whisper-{whisper_model}"
import whisper
model = whisper.load_model(whisper_model)
# Extract audio to temp file
temp_audio = os.path.join(settings.storage_path, "temp", f"temp_{uuid4()}.wav")
os.makedirs(os.path.dirname(temp_audio), exist_ok=True)
try:
subprocess.run([
"ffmpeg", "-i", input_asset.file_path,
"-ar", "16000",
"-ac", "1",
"-c:a", "pcm_s16le",
"-y", temp_audio
], check=True, capture_output=True)
job.progress = 20
db.commit()
# Transcribe
transcribe_options = {
"language": None if source_language == "auto" else source_language,
"verbose": False,
"word_timestamps": word_timestamps
}
result = model.transcribe(temp_audio, **transcribe_options)
segments = result["segments"]
detected_language = result["language"]
transcription_result = result
# Generate initial subtitle content
if output_format == "vtt":
subtitle_content = _generate_vtt(segments, word_timestamps)
elif output_format == "ass":
subtitle_content = _generate_ass(segments, font, font_size, text_color,
outline_color, outline_width, position, alignment,
margin_v, margin_h, shadow, bold, italic,
background_color, background_opacity)
else:
subtitle_content = _generate_srt(segments)
finally:
if os.path.exists(temp_audio):
os.remove(temp_audio)
job.progress = 60
db.commit()
# Update subtitle extension based on format
subtitle_ext = "srt"
if output_format == "vtt": subtitle_ext = "vtt"
elif output_format == "ass": subtitle_ext = "ass"
# Translate if needed
translated_content = None
if target_language:
@ -323,7 +438,18 @@ async def process(job_id: str):
output_assets = []
# Save original subtitle file
subtitle_filename = f"subtitles_{uuid4()}.{subtitle_ext}"
# Use simple naming convention
base_name = os.path.splitext(input_asset.original_filename)[0]
# Clean basename of special chars if needed, but for now just use it
subtitle_filename = f"{base_name}-subtitles-{detected_language}.{subtitle_ext}"
subtitle_path = os.path.join(settings.storage_path, "documents", subtitle_filename)
# Handle duplicate filenames by appending uuid if needed, or just overwrite since uuid approach was replaced
# Actually user wants to "see whats been done", so readable names are key.
# If we overwrite, that's fine as per previous discussion, but unique names prevent collision in shared storage?
# Let's stick to unique names but readable: base-subtitles-lang-uuid_short.ext
short_id = str(uuid4())[:8]
subtitle_filename = f"{base_name}-subtitles-{detected_language}-{short_id}.{subtitle_ext}"
subtitle_path = os.path.join(settings.storage_path, "documents", subtitle_filename)
os.makedirs(os.path.dirname(subtitle_path), exist_ok=True)
@ -357,7 +483,8 @@ async def process(job_id: str):
# Save translated subtitle if exists
trans_path = None
if translated_content:
trans_filename = f"subtitles_translated_{uuid4()}.{subtitle_ext}"
short_id = str(uuid4())[:8]
trans_filename = f"{base_name}-subtitles-{target_language}-{short_id}.{subtitle_ext}"
trans_path = os.path.join(settings.storage_path, "documents", trans_filename)
with open(trans_path, "w", encoding="utf-8") as f:
@ -392,7 +519,10 @@ async def process(job_id: str):
# Burn subtitles if requested
if burn_subtitles:
burn_path = trans_path if translated_content else subtitle_path
output_filename = f"subtitled_{uuid4()}.mp4"
# Burned video
lang_code = target_language if translated_content else detected_language
short_id = str(uuid4())[:8]
output_filename = f"{base_name}-subtitled-{lang_code}-{short_id}.mp4"
output_path = os.path.join(settings.storage_path, "videos", output_filename)
os.makedirs(os.path.dirname(output_path), exist_ok=True)
@ -403,12 +533,16 @@ async def process(job_id: str):
shadow, bold, italic, background_color, background_opacity
)
subprocess.run([
"ffmpeg", "-i", input_asset.file_path,
"-vf", subtitle_filter,
"-c:a", "copy",
"-y", output_path
], check=True, capture_output=True)
try:
result = subprocess.run([
"ffmpeg", "-i", input_asset.file_path,
"-vf", subtitle_filter,
"-c:a", "copy",
"-y", output_path
], check=True, capture_output=True)
except subprocess.CalledProcessError as e:
logger.error("FFmpeg burning failed", stderr=e.stderr.decode('utf-8'))
raise RuntimeError(f"FFmpeg error: {e.stderr.decode('utf-8')[-500:]}")
video_size = os.path.getsize(output_path)
@ -444,12 +578,12 @@ async def process(job_id: str):
output_assets.append(video_asset.id)
# Cleanup temp audio
if os.path.exists(audio_path):
if 'audio_path' in locals() and audio_path and os.path.exists(audio_path):
os.remove(audio_path)
job.output_asset_ids = output_assets
job.output_data = {
"transcript": result.get("text", ""),
"transcript": transcription_result.get("text", ""),
"language": detected_language,
"segments_count": len(segments),
"word_timestamps": word_timestamps,
@ -464,7 +598,7 @@ async def process(job_id: str):
db.commit()
except Exception as e:
logger.error(f"Subtitle processing error: {e}")
logger.error(f"Subtitle processing error: {e}", exc_info=True)
job.status = "failed"
job.error_message = str(e)
db.commit()
@ -555,6 +689,39 @@ Style: Default,{font},{font_size},&H00{primary_hex},&H00{primary_hex},&H00{outli
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
"""
def _parse_srt(content: str) -> list:
"""Parse SRT content into segments"""
segments = []
blocks = content.strip().split('\n\n')
for block in blocks:
lines = block.strip().split('\n')
if len(lines) >= 3:
# Parse timestamp line
times = lines[1].split(' --> ')
if len(times) != 2:
continue
start_str, end_str = times
def parse_time(t_str):
t_str = t_str.replace(',', '.')
parts = t_str.split(':')
return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
start = parse_time(start_str)
end = parse_time(end_str)
text = "\n".join(lines[2:])
segments.append({
'start': start,
'end': end,
'text': text
})
return segments
for segment in segments:
start = _format_ass_timestamp(segment['start'])
end = _format_ass_timestamp(segment['end'])
@ -564,6 +731,39 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
return ass_content
def _parse_srt(content: str) -> list:
"""Parse SRT content into segments"""
segments = []
blocks = content.strip().split('\n\n')
for block in blocks:
lines = block.strip().split('\n')
if len(lines) >= 3:
# Parse timestamp line
times = lines[1].split(' --> ')
if len(times) != 2:
continue
start_str, end_str = times
def parse_time(t_str):
t_str = t_str.replace(',', '.')
parts = t_str.split(':')
return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
start = parse_time(start_str)
end = parse_time(end_str)
text = "\n".join(lines[2:])
segments.append({
'start': start,
'end': end,
'text': text
})
return segments
def _format_srt_timestamp(seconds: float) -> str:
"""Convert seconds to SRT timestamp format (HH:MM:SS,mmm)"""
td = timedelta(seconds=seconds)

View file

@ -251,6 +251,55 @@ async def generate(job_id: str):
job.output_asset_ids = [asset.id]
job.output_data = {"asset_id": str(asset.id), "file_path": file_path}
# Log Usage
try:
from app.utils.logging import log_model_usage
# Calculate duration
duration_ms = 0
if job.started_at:
duration_ms = int((datetime.utcnow() - job.started_at).total_seconds() * 1000)
# Determine actual parameters used (defaults logic)
# Runway defaults
used_duration = input_data.get("duration")
used_resolution = input_data.get("resolution")
used_aspect = input_data.get("aspect_ratio")
if provider == "runway":
if not used_duration: used_duration = 5
if not used_resolution: used_resolution = "1280x768"
elif provider == "veo":
if not used_duration: used_duration = 8
if not used_resolution: used_resolution = "720p"
if not used_aspect: used_aspect = "16:9"
log_model_usage(
db=db,
job_id=str(job.id),
user_id=str(job.user_id),
module="video_generator",
action="generate",
provider=provider,
model=job.api_model,
usage_stats={
"seconds": used_duration,
"processing_time_ms": duration_ms
},
request_metadata={
"prompt": prompt,
"resolution": used_resolution,
"duration": used_duration,
"aspect_ratio": used_aspect
},
response_metadata={
"output_assets": [str(asset.id)] if video_data and 'asset' in locals() else [],
"filenames": [filename] if filename else []
}
)
except Exception as log_e:
logger.error(f"Failed to log video generation usage: {log_e}")
job.progress = 100
job.status = "completed"
job.completed_at = datetime.utcnow()

View file

@ -35,11 +35,47 @@ async def transcribe(job_id: str):
translate = input_data.get("translate", False)
target_language = input_data.get("target_language", "EN-US")
# Transcribe with Whisper
import whisper
# Extract audio to temp file first for better compatibility
import subprocess
temp_audio = f"temp_{uuid4()}.wav"
temp_audio_path = os.path.join(settings.storage_path, "temp", temp_audio)
os.makedirs(os.path.dirname(temp_audio_path), exist_ok=True)
if not os.path.exists(input_asset.file_path):
raise ValueError(f"Input file not found at path: {input_asset.file_path}")
model = whisper.load_model("base")
result = model.transcribe(input_asset.file_path, verbose=False)
try:
# Extract audio: 16khz mono wav (Whisper native format)
cmd = [
"ffmpeg", "-i", input_asset.file_path,
"-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1",
"-y", temp_audio_path
]
# Run ffmpeg with capture_output to log errors if it fails
process = subprocess.run(
cmd,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
transcribe_input = temp_audio_path
except subprocess.CalledProcessError as e:
# Raise the specific error to debug
error_msg = f"FFmpeg extraction failed. Stderr: {e.stderr.decode()[:500]}..." # Truncate if too long
raise ValueError(error_msg)
import whisper
try:
model = whisper.load_model("base")
result = model.transcribe(transcribe_input, verbose=False)
finally:
# Cleanup temp file
if os.path.exists(temp_audio_path):
os.remove(temp_audio_path)
job.progress = 60
db.commit()

View file

@ -0,0 +1,90 @@
"""Usage Logging Utility"""
from sqlalchemy.orm import Session
from app.models.usage import UsageLog
from app.models.pricing import ModelPricing
from app.models.job import Job
from typing import Optional, Dict, Any
import logging
logger = logging.getLogger(__name__)
def log_model_usage(
db: Session,
job_id: str,
user_id: str,
module: str,
action: str,
provider: str,
model: str,
request_metadata: Dict[str, Any] = {},
response_metadata: Dict[str, Any] = {},
usage_stats: Dict[str, Any] = {}
) -> Optional[UsageLog]:
"""
Log model usage and calculate estimated cost.
usage_stats keys:
- input_tokens (int)
- output_tokens (int)
- images (int)
- seconds (float)
- characters (int)
"""
try:
# 1. Calculate Cost
cost = 0.0
# Find pricing record
pricing = db.query(ModelPricing).filter(
ModelPricing.provider == provider,
ModelPricing.model_name == model
).first()
# If specific model not found, try generic provider/default fallback?
# For now, just log 0 if not found, or maybe try mapping aliases.
if pricing:
if usage_stats.get("input_tokens"):
cost += float(pricing.cost_per_input_token) * usage_stats["input_tokens"]
if usage_stats.get("output_tokens"):
cost += float(pricing.cost_per_output_token) * usage_stats["output_tokens"]
if usage_stats.get("images"):
cost += float(pricing.cost_per_image) * usage_stats["images"]
if usage_stats.get("seconds"):
cost += float(pricing.cost_per_second) * usage_stats["seconds"]
if usage_stats.get("characters"):
cost += float(pricing.cost_per_1k_chars) * (usage_stats["characters"] / 1000.0)
if usage_stats.get("requests"): # generic per-request
cost += float(pricing.cost_per_request) * usage_stats["requests"]
# Special case for "per request" if not specified but implies 1?
# If generated 1 image and pricing is per image... handled above.
# 2. Create Log Record
log_entry = UsageLog(
job_id=job_id,
user_id=user_id,
module=module,
action=action,
api_provider=provider,
api_model=model,
tokens_input=usage_stats.get("input_tokens"),
tokens_output=usage_stats.get("output_tokens"),
estimated_cost_usd=cost,
processing_time_ms=usage_stats.get("processing_time_ms"),
request_metadata=request_metadata,
response_metadata=response_metadata
)
db.add(log_entry)
db.commit()
return log_entry
except Exception as e:
logger.error(f"Failed to log usage: {e}")
return None

View file

@ -0,0 +1,7 @@
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from app.services.image_generator import IMAGE_PROVIDERS
print("Nano Banana Config:")
print(IMAGE_PROVIDERS.get("nano-banana"))

View file

@ -0,0 +1,24 @@
"""Check Usage Logs"""
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from app.database import SessionLocal
from app.models.usage import UsageLog
from sqlalchemy import desc
def check_logs():
db = SessionLocal()
try:
count = db.query(UsageLog).count()
print(f"Total Usage Logs: {count}")
if count > 0:
latest = db.query(UsageLog).order_by(desc(UsageLog.created_at)).first()
print(f"Latest Log: {latest.module} | {latest.action} | {latest.created_at}")
print(f"Metadata: {latest.request_metadata}")
finally:
db.close()
if __name__ == "__main__":
check_logs()

View file

@ -0,0 +1,124 @@
import asyncio
import os
import httpx
import base64
import json
from dotenv import load_dotenv
from typing import Optional
import uuid
load_dotenv()
API_KEY = os.getenv("GOOGLE_API_KEY")
async def _upload_file_http(media_data: bytes, mime_type: str) -> Optional[str]:
"""Helper for testing upload"""
url = f"https://generativelanguage.googleapis.com/upload/v1beta/files?key={API_KEY}"
num_bytes = len(media_data)
headers = {
"X-Goog-Upload-Protocol": "resumable",
"X-Goog-Upload-Command": "start",
"X-Goog-Upload-Header-Content-Length": str(num_bytes),
"X-Goog-Upload-Header-Content-Type": mime_type,
"Content-Type": "application/json"
}
metadata = {"file": {"display_name": f"debug_upload_{uuid.uuid4()}"}}
async with httpx.AsyncClient(timeout=30.0) as client:
# 1. Start Upload
print("Starting resumable upload...")
response = await client.post(url, headers=headers, json=metadata)
if response.status_code != 200:
print(f"Failed to start upload: {response.text}")
return None
upload_url = response.headers.get("x-goog-upload-url")
if not upload_url:
print("No upload URL returned")
return None
# 2. Upload Bytes
print(f"Uploading {num_bytes} bytes to {upload_url[:50]}...")
headers_upload = {
"Content-Length": str(num_bytes),
"X-Goog-Upload-Offset": "0",
"X-Goog-Upload-Command": "upload, finalize"
}
response_upload = await client.post(upload_url, headers=headers_upload, content=media_data)
if response_upload.status_code != 200:
print(f"Failed to upload data: {response_upload.text}")
return None
data = response_upload.json()
file_uri = data.get("file", {}).get("uri")
print(f"File uploaded successfully: {file_uri}")
return file_uri
async def test_edit():
# Use gemini-3-pro-image-preview
model_name = "gemini-3-pro-image-preview"
url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent?key={API_KEY}"
print(f"Testing Model: {model_name} with Raw HTTP File API")
# Generate valid JPEG using PIL
from PIL import Image
import io
img = Image.new('RGB', (100, 100), color = 'red')
buf = io.BytesIO()
img.save(buf, format='JPEG')
pixel_data = buf.getvalue()
print(f"Generated valid JPEG ({len(pixel_data)} bytes)")
# Upload File
file_uri = await _upload_file_http(pixel_data, "image/jpeg")
if not file_uri:
print("Skipping generation due to upload failure")
return
prompt = "Make the image blue"
final_prompt = f"Edit this image: {prompt}"
parts = []
# 1. File Data Part
parts.append({
"fileData": {
"mimeType": "image/jpeg",
"fileUri": file_uri
}
})
parts.append({"text": final_prompt})
payload = {
"contents": [{
"parts": parts
}]
}
async with httpx.AsyncClient(timeout=30) as client:
print("Generating content...")
response = await client.post(
url,
headers={"Content-Type": "application/json"},
json=payload
)
print(f"Status: {response.status_code}")
# print(response.text)
if response.status_code == 200:
print("✓ Success! API accepted the request.")
data = response.json()
if "candidates" in data and data["candidates"]:
print("Candidates found.")
else:
print(f"✗ Failed: {response.text}")
if __name__ == "__main__":
asyncio.run(test_edit())

View file

@ -0,0 +1,26 @@
import asyncio
import os
import httpx
import json
from dotenv import load_dotenv
load_dotenv()
API_KEY = os.getenv("GOOGLE_API_KEY")
async def list_models():
url = f"https://generativelanguage.googleapis.com/v1beta/models?key={API_KEY}"
async with httpx.AsyncClient() as client:
response = await client.get(url)
if response.status_code == 200:
data = response.json()
models = data.get("models", [])
print(f"Found {len(models)} models:")
for m in models:
name = m.get("name")
display = m.get("displayName", "No Display Name")
print(f" - {name} ({display})")
else:
print(f"Error: {response.status_code} - {response.text}")
if __name__ == "__main__":
asyncio.run(list_models())

View file

@ -0,0 +1,155 @@
"""Seed Model Pricing Data"""
import sys
import os
# Add parent dir to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from app.database import SessionLocal
from app.models.pricing import ModelPricing
from app.database import engine, Base
def seed_pricing():
# Create tables if they don't exist
Base.metadata.create_all(bind=engine)
db = SessionLocal()
# Pricing Data (from model_costs.md)
pricing_data = [
# 1. Text & Vision
{
"provider": "openai", "model_name": "gpt-4o",
"cost_per_input_token": 0.0000025, # $2.50 / 1M
"cost_per_output_token": 0.0000100 # $10.00 / 1M
},
{
"provider": "openai", "model_name": "gpt-4o-mini",
"cost_per_input_token": 0.00000015, # $0.15 / 1M
"cost_per_output_token": 0.00000060 # $0.60 / 1M
},
{
"provider": "google", "model_name": "gemini-2.0-flash-exp",
"cost_per_request": 0.0 # Currently free/preview
},
# 2. Image Generation
{
"provider": "openai", "model_name": "dall-e-3",
"cost_per_image": 0.080 # Standard HD avg
},
{
"provider": "openai", "model_name": "gpt-image-1",
"cost_per_image": 0.040
},
{
"provider": "google", "model_name": "imagen-3.0",
"cost_per_image": 0.040
},
{
"provider": "nano-banana", "model_name": "gemini-2.5-flash-image",
"cost_per_image": 0.040
},
{
"provider": "stability", "model_name": "sd3.5-large",
"cost_per_image": 0.065
},
{
"provider": "ideogram", "model_name": "V_2",
"cost_per_image": 0.080
},
{
"provider": "flux", "model_name": "flux-pro-1.1",
"cost_per_image": 0.050
},
# 3. Video Generation
{
"provider": "runway", "model_name": "gen3_alpha",
"cost_per_second": 0.10
},
{
"provider": "runway", "model_name": "gen3_alpha_turbo",
"cost_per_second": 0.05
},
{
"provider": "runway", "model_name": "gen4_turbo",
"cost_per_second": 0.15 # Estimate
},
{
"provider": "google", "model_name": "veo-3.1",
"cost_per_second": 0.15
},
{
"provider": "veo", "model_name": "veo-3.1-generate-preview", # Provider 'veo' or 'google'? Log said 'veo'
"cost_per_second": 0.15
},
{
"provider": "google", "model_name": "veo-3.1-generate-preview", # Alias for safety
"cost_per_second": 0.15
},
{
"provider": "google", "model_name": "veo-3",
"cost_per_second": 0.10
},
# 3b. Misc Image
{
"provider": "nano-banana", "model_name": "gemini-3-pro-image-preview",
"cost_per_image": 0.040
},
# 4. Audio
{
"provider": "elevenlabs", "model_name": "eleven_multilingual_v2",
"cost_per_1k_chars": 0.30
},
{
"provider": "elevenlabs", "model_name": "eleven_flash_v2_5",
"cost_per_1k_chars": 0.15
},
{
"provider": "elevenlabs", "model_name": "sound-generation",
"cost_per_request": 0.10 # Per generation estimate
},
{
"provider": "deepl", "model_name": "api-pro",
"cost_per_1k_chars": 0.025 # $25 per 1M
},
# 5. Media Editing
{
"provider": "clipping_magic", "model_name": "api-v1",
"cost_per_image": 0.10
},
{
"provider": "topaz", "model_name": "image-v1",
"cost_per_image": 0.20 # Buffer estimate
},
{
"provider": "topaz", "model_name": "video-v1",
"cost_per_second": 0.30 # Buffer estimate
}
]
print(f"Seeding {len(pricing_data)} pricing records...")
try:
# Clear existing
db.query(ModelPricing).delete()
for data in pricing_data:
pricing = ModelPricing(**data)
db.add(pricing)
db.commit()
print("✓ Pricing data seeded successfully.")
except Exception as e:
print(f"Error seeding data: {e}")
db.rollback()
finally:
db.close()
if __name__ == "__main__":
seed_pricing()

View file

@ -0,0 +1,73 @@
"""Test Logging Utility Manually"""
import sys
import os
import uuid
from datetime import datetime
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from app.database import SessionLocal
from app.utils.logging import log_model_usage
from app.models.user import User
from app.models.job import Job
def test_logging():
db = SessionLocal()
try:
# Get a user
user = db.query(User).first()
if not user:
print("No fake user found, cannot log.")
return
print(f"Logging for user: {user.email}")
# Create a dummy Job
job = Job(
user_id=user.id,
module="image_generator",
action="generate",
input_data={"prompt": "test"},
status="completed"
)
db.add(job)
db.commit()
# Log a fake image generation
log = log_model_usage(
db=db,
job_id=str(job.id),
user_id=str(user.id),
module="image_generator",
action="generate",
provider="openai",
model="dall-e-3",
usage_stats={
"images": 1,
"processing_time_ms": 4500
},
request_metadata={
"prompt": "A futuristic city with flying cars and neon lights, cyberpunk style",
"size": "1024x1024",
"quality": "hd"
},
response_metadata={
"status": "success"
}
)
if log:
print(f"✓ Log created with ID: {log.id}")
print(f" Cost: ${log.estimated_cost_usd}")
else:
print("✗ Log creation failed (returned None)")
except Exception as e:
print(f"Error: {e}")
db.rollback()
finally:
db.close()
if __name__ == "__main__":
test_logging()

View file

@ -0,0 +1,74 @@
"""Test Logging Filenames"""
import sys
import os
import uuid
from datetime import datetime
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from app.database import SessionLocal
from app.utils.logging import log_model_usage
from app.models.user import User
from app.models.job import Job
def test_logging_filenames():
db = SessionLocal()
try:
# Get a user
user = db.query(User).first()
if not user:
print("No fake user found, cannot log.")
return
print(f"Logging for user: {user.email}")
# Create a dummy Job
job = Job(
user_id=user.id,
module="image_generator",
action="generate",
input_data={"prompt": "filename test"},
status="completed"
)
db.add(job)
db.commit()
# Log a fake image generation with filenames
log = log_model_usage(
db=db,
job_id=str(job.id),
user_id=str(user.id),
module="image_generator",
action="generate",
provider="openai",
model="dall-e-3",
usage_stats={
"images": 2,
"processing_time_ms": 5000
},
request_metadata={
"prompt": "Two cats playing chess",
"size": "1024x1024"
},
response_metadata={
"output_assets": ["asset_1", "asset_2"],
"filenames": ["cat_chess_01.png", "cat_chess_02.png"]
}
)
if log:
print(f"✓ Log created with ID: {log.id}")
if log.response_metadata.get("filenames"):
print(f" Filenames found: {log.response_metadata['filenames']}")
else:
print("✗ Filenames missing from log!")
else:
print("✗ Log creation failed (returned None)")
except Exception as e:
print(f"Error: {e}")
db.rollback()
finally:
db.close()
if __name__ == "__main__":
test_logging_filenames()

View file

@ -0,0 +1,76 @@
import asyncio
import os
import sys
import logging
from dotenv import load_dotenv
# Add backend to path
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
load_dotenv()
# Mock settings
class Settings:
google_api_key = os.getenv("GOOGLE_API_KEY")
import app.services.image_generator as img_gen
img_gen.settings = Settings()
# Configure logging
logging.basicConfig(level=logging.INFO)
async def test_generation():
prompt = "A cyberpunk street scene"
# 1. First generate an image (or use a dummy one if we had one, but let's generate)
print("--- Testing Generation (Text to Image) ---")
input_data = {
"prompt": prompt,
"model": "gemini-2.0-flash-exp" # Try 2.0 first
}
# We can't easily call _generate_nano_banana without mocking httpx or having a real key.
# Assuming the user has a real key since they are getting results.
try:
image_data, filename = await img_gen._generate_nano_banana(input_data)
if image_data:
print(f"✓ Generated base image: {len(image_data)} bytes")
# 2. Now try to 'Edit' it
print("\n--- Testing Edit (Image + Text to Image) ---")
edit_prompt = "Make it daytime"
# Test Gemini 3 Variants
variants = [
"gemini-3.0-pro-image-preview",
"gemini-3.0-pro-image",
"gemini-3.0-flash-image",
"gemini-3.0-flash-image-preview"
]
for model_name in variants:
print(f"\n--- Testing Model: {model_name} ---")
input_data_test = {
"prompt": edit_prompt,
"model": model_name
}
try:
# We pass 'image/png' because our fake input_image is implied to be handled
img, fname = await img_gen._generate_nano_banana(input_data_test, image_data, "image/png")
if img:
print(f"✓ Success with {model_name}: {len(img)} bytes")
break # Stop on first success just to know valid one
else:
print(f"✗ Failed with {model_name} (No data)")
except Exception as e:
print(f"✗ Error with {model_name}: {e}")
else:
print("✗ Initial generation failed")
except Exception as e:
print(f"✗ Error: {e}")
if __name__ == "__main__":
asyncio.run(test_generation())

8
forge-ai.code-workspace Normal file
View file

@ -0,0 +1,8 @@
{
"folders": [
{
"path": "."
}
],
"settings": {}
}

View file

@ -0,0 +1,267 @@
'use client';
import { useState, useEffect } from 'react';
import { toast } from 'react-hot-toast';
import {
Search,
Filter,
ChevronDown,
ChevronUp,
DollarSign,
Clock,
FileText,
Image as ImageIcon,
Video,
Mic,
Cpu
} from 'lucide-react';
import AdminGuard from '@/components/AdminGuard';
import api from '@/lib/api';
interface UsageLog {
id: string;
timestamp: string;
user: {
id: string;
email: string;
name: string;
};
service: {
module: string;
provider: string;
model: string;
};
metrics: {
tokens_in?: number;
tokens_out?: number;
cost_usd: number;
latency_ms: number;
};
request_details: any;
response_details: any;
}
export default function UsageSearchPage() {
const [logs, setLogs] = useState<UsageLog[]>([]);
const [loading, setLoading] = useState(false);
const [searchQuery, setSearchQuery] = useState('');
const [expandedRow, setExpandedRow] = useState<string | null>(null);
// Filters
const [providerFilter, setProviderFilter] = useState('');
const [startDate, setStartDate] = useState('');
const searchLogs = async () => {
setLoading(true);
try {
const params: any = {
limit: 50
};
if (searchQuery) params.query = searchQuery;
if (providerFilter) params.provider = providerFilter;
if (startDate) params.start_date = new Date(startDate).toISOString();
const response = await api.get('/admin/logs/search', { params });
setLogs(response.data.items || []);
} catch (error) {
console.error('Search failed', error);
toast.error('Failed to search logs');
} finally {
setLoading(false);
}
};
useEffect(() => {
searchLogs();
}, []); // Initial load
const toggleRow = (id: string) => {
setExpandedRow(expandedRow === id ? null : id);
};
const getModuleIcon = (module: string) => {
if (module.includes('image')) return <ImageIcon className="w-4 h-4 text-purple-400" />;
if (module.includes('video')) return <Video className="w-4 h-4 text-pink-400" />;
if (module.includes('voice') || module.includes('speech')) return <Mic className="w-4 h-4 text-yellow-400" />;
if (module.includes('text')) return <FileText className="w-4 h-4 text-blue-400" />;
return <Cpu className="w-4 h-4 text-gray-400" />;
};
const formatMetadata = (data: any) => {
if (!data) return <span className="text-gray-500 italic">No data</span>;
return (
<div className="grid grid-cols-1 gap-2 text-sm">
{Object.entries(data).map(([key, value]) => (
<div key={key} className="flex flex-col">
<span className="text-gray-500 uppercase text-xs font-bold">{key.replace(/_/g, ' ')}</span>
<span className="text-gray-300 break-words font-mono bg-black/20 p-1 rounded mt-1">
{typeof value === 'object' ? JSON.stringify(value, null, 2) : String(value)}
</span>
</div>
))}
</div>
);
};
return (
<AdminGuard>
<div className="space-y-6">
<div className="flex items-center justify-between">
<div>
<h1 className="text-2xl font-bold text-white">AI Usage Explorer</h1>
<p className="text-gray-500">Search prompts, files, and costs</p>
</div>
</div>
{/* Search Bar */}
<div className="bg-forge-dark p-4 rounded-xl border border-gray-800 flex flex-wrap gap-4 items-end">
<div className="flex-1 min-w-[300px]">
<label className="block text-sm text-gray-500 mb-1">Search Keywords</label>
<div className="relative">
<Search className="absolute left-3 top-3 w-5 h-5 text-gray-500" />
<input
type="text"
placeholder="Search prompts, filenames, user emails..."
className="input-field pl-10 w-full"
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
onKeyDown={(e) => e.key === 'Enter' && searchLogs()}
/>
</div>
</div>
<div className="w-48">
<label className="block text-sm text-gray-500 mb-1">Provider</label>
<select
className="select-field w-full"
value={providerFilter}
onChange={(e) => setProviderFilter(e.target.value)}
>
<option value="">All Providers</option>
<option value="openai">OpenAI</option>
<option value="google">Google</option>
<option value="elevenlabs">ElevenLabs</option>
<option value="topaz">Topaz</option>
<option value="runway">Runway</option>
<option value="stability">Stability AI</option>
</select>
</div>
<div className="w-48">
<label className="block text-sm text-gray-500 mb-1">Start Date</label>
<input
type="date"
className="input-field w-full"
value={startDate}
onChange={(e) => setStartDate(e.target.value)}
/>
</div>
<button
onClick={searchLogs}
disabled={loading}
className="btn-primary h-[42px] px-6"
>
{loading ? 'Searching...' : 'Search'}
</button>
</div>
{/* Results Table */}
<div className="bg-forge-dark rounded-xl border border-gray-800 overflow-hidden">
<table className="w-full">
<thead className="bg-black/20">
<tr>
<th className="px-6 py-3 text-left text-sm font-medium text-gray-500">Time</th>
<th className="px-6 py-3 text-left text-sm font-medium text-gray-500">User</th>
<th className="px-6 py-3 text-left text-sm font-medium text-gray-500">Service / Model</th>
<th className="px-6 py-3 text-right text-sm font-medium text-gray-500">Cost</th>
<th className="px-6 py-3 text-right text-sm font-medium text-gray-500">Latency</th>
<th className="px-6 py-3 w-10"></th>
</tr>
</thead>
<tbody className="divide-y divide-gray-800">
{logs.length === 0 && !loading && (
<tr>
<td colSpan={6} className="px-6 py-12 text-center text-gray-500">
No logs found. Try adjusting your search query.
</td>
</tr>
)}
{logs.map((log) => (
<>
<tr
key={log.id}
onClick={() => toggleRow(log.id)}
className="hover:bg-white/5 cursor-pointer transition-colors"
>
<td className="px-6 py-4 text-sm text-gray-300">
{new Date(log.timestamp).toLocaleString()}
</td>
<td className="px-6 py-4">
<div className="text-sm text-white">{log.user.name || log.user.email}</div>
<div className="text-xs text-gray-500">{log.user.email}</div>
</td>
<td className="px-6 py-4">
<div className="flex items-center gap-2">
{getModuleIcon(log.service.module)}
<span className="text-sm text-white font-medium">{log.service.provider}</span>
</div>
<div className="text-xs text-forge-yellow mt-1 pl-6">{log.service.model}</div>
</td>
<td className="px-6 py-4 text-right">
<div className="flex items-center justify-end gap-1 text-sm text-green-400 font-mono">
<DollarSign className="w-3 h-3" />
{log.metrics.cost_usd.toFixed(4)}
</div>
</td>
<td className="px-6 py-4 text-right">
<div className="flex items-center justify-end gap-1 text-sm text-gray-400">
<Clock className="w-3 h-3" />
{log.metrics.latency_ms}ms
</div>
</td>
<td className="px-6 py-4">
{expandedRow === log.id ? <ChevronUp className="w-4 h-4 text-gray-500" /> : <ChevronDown className="w-4 h-4 text-gray-500" />}
</td>
</tr>
{expandedRow === log.id && (
<tr className="bg-black/20">
<td colSpan={6} className="px-6 py-4 border-b border-gray-800">
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
{/* Request Details */}
<div className="bg-forge-darker rounded-lg p-4 border border-gray-700/50">
<h4 className="text-gray-400 text-xs uppercase font-semibold mb-3 border-b border-gray-700 pb-2">Request Parameters</h4>
{formatMetadata(log.request_details)}
{/* Show Prompt Prominently if exists */}
{log.request_details?.prompt && (
<div className="mt-4 pt-4 border-t border-gray-700/50">
<div className="text-gray-500 uppercase text-xs font-bold mb-1">Full Prompt</div>
<div className="text-white text-sm bg-black/30 p-3 rounded border border-gray-700/50 leading-relaxed max-h-32 overflow-y-auto">
{log.request_details.prompt}
</div>
</div>
)}
</div>
{/* Response Details */}
<div className="bg-forge-darker rounded-lg p-4 border border-gray-700/50">
<h4 className="text-gray-400 text-xs uppercase font-semibold mb-3 border-b border-gray-700 pb-2">Response & Output</h4>
{formatMetadata(log.response_details)}
</div>
</div>
</td>
</tr>
)}
</>
))}
</tbody>
</table>
</div>
</div>
</AdminGuard>
);
}

View file

@ -180,17 +180,20 @@ export default function VoiceToTextPage() {
{/* File Upload */}
<div>
<label className="block text-sm font-medium text-gray-300 mb-2">
Upload Audio
Upload Audio or Video
</label>
<FileUpload
onUpload={handleFileUpload}
accept={{ 'audio/*': ['.mp3', '.wav', '.m4a', '.flac', '.ogg'] }}
accept={{
'audio/*': ['.mp3', '.wav', '.m4a', '.flac', '.ogg'],
'video/*': ['.mp4', '.mov', '.avi', '.mkv', '.webm']
}}
currentFile={file}
onClear={() => {
setFile(null);
setAssetId(null);
}}
label="Upload an audio file to transcribe"
label="Upload an audio or video file to transcribe"
/>
{uploading && (
<p className="mt-2 text-sm text-forge-yellow">Uploading...</p>
@ -252,7 +255,7 @@ export default function VoiceToTextPage() {
className="btn-primary w-full flex items-center justify-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed"
>
<Sparkles className="w-5 h-5" />
{loading ? 'Transcribing...' : 'Transcribe Audio'}
{loading ? 'Transcribing...' : 'Transcribe'}
</button>
{/* Job Progress */}

View file

@ -462,6 +462,7 @@ export default function MyFilesPage() {
{getCommonFileType() === 'image' && (
<>
<button onClick={() => { handleBatchAction('upscale_image'); setShowActionMenu(false); }} className="w-full text-left px-3 py-2 text-sm text-gray-300 hover:bg-white/10 hover:text-white transition-colors">Upscale Image</button>
<button onClick={() => { router.push(`/image/generate?referenceAssetId=${Array.from(selectedAssetsMap.keys())[0]}`); setShowActionMenu(false); }} className="w-full text-left px-3 py-2 text-sm text-gray-300 hover:bg-white/10 hover:text-white transition-colors">Edit with Nano Banana</button>
<button onClick={() => { handleBatchAction('remove_bg'); setShowActionMenu(false); }} className="w-full text-left px-3 py-2 text-sm text-gray-300 hover:bg-white/10 hover:text-white transition-colors">Remove Background</button>
<div className="h-px bg-gray-700 my-1" />
<button onClick={() => { handleBatchAction('img_to_video'); setShowActionMenu(false); }} className="w-full text-left px-3 py-2 text-sm text-gray-300 hover:bg-white/10 hover:text-white transition-colors">Image to Video</button>
@ -754,6 +755,17 @@ export default function MyFilesPage() {
</p>
</div>
<div className="flex items-center gap-2">
{selectedAsset.file_type === 'image' && (
<button
onClick={() => {
router.push(`/image/generate?referenceAssetId=${selectedAsset.id}`);
}}
className="px-4 py-2 bg-purple-600 text-white rounded-lg hover:bg-purple-700 flex items-center gap-2"
>
<Edit2 className="w-4 h-4" />
Edit
</button>
)}
<button
onClick={() => handleDownload(selectedAsset)}
className="btn-primary flex items-center gap-2"

View file

@ -0,0 +1,422 @@
'use client';
import { useState, useEffect, useRef } from 'react';
import { useSearchParams, useRouter } from 'next/navigation';
import { toast } from 'react-hot-toast';
import {
Wand2, Download, History, RotateCcw, Image as ImageIcon,
Sparkles, Zap, ArrowRight, Loader2, Save, FolderOpen
} from 'lucide-react';
import api, { modulesApi, assetsApi } from '@/lib/api';
import { useStore } from '@/lib/store';
import FileUpload from '@/components/FileUpload';
import AssetPickerModal from '@/components/AssetPickerModal';
import JobProgress from '@/components/JobProgress';
export default function NanoBananaProPage() {
const router = useRouter();
const searchParams = useSearchParams();
const initialAssetId = searchParams.get('assetId');
// State
const [currentAsset, setCurrentAsset] = useState<any>(null);
const [prompt, setPrompt] = useState('');
const [loading, setLoading] = useState(false);
const [history, setHistory] = useState<any[]>([]);
const [imageSize, setImageSize] = useState('2K');
const [aspectRatio, setAspectRatio] = useState('16:9');
// Picker State
const [showPicker, setShowPicker] = useState(false);
const handleAssetSelect = (assets: any[]) => {
if (assets.length > 0) {
const selected = assets[0];
setCurrentAsset(selected);
setHistory((prev: any[]) => [...prev, selected]);
toast.success('Image selected from library');
}
};
// Quick Actions (from PHP reference)
const quickActions = [
{ label: 'Add Lighting', prompt: 'Add dramatic lighting' },
{ label: 'Add Sunset', prompt: 'Add sunset in background' },
{ label: 'More Vibrant', prompt: 'Make colors more vibrant' },
{ label: 'Motion Blur', prompt: 'Add motion blur' },
{ label: 'Photorealistic', prompt: 'Make it photorealistic' },
{ label: 'Depth of Field', prompt: 'Add depth of field effect' },
];
// Load initial asset if provided
useEffect(() => {
if (initialAssetId) {
loadAsset(initialAssetId);
}
}, [initialAssetId]);
const loadAsset = async (id: string) => {
try {
const response = await assetsApi.get(id);
setCurrentAsset(response.data);
// Add to history if not present
setHistory(prev => {
if (!prev.find(item => item.id === id)) {
return [...prev, response.data];
}
return prev;
});
} catch (err) {
toast.error('Failed to load initial image');
}
};
const handleGenerate = async (e?: React.FormEvent) => {
if (e) e.preventDefault();
if (!prompt.trim()) {
toast.error('Please enter a prompt');
return;
}
setLoading(true);
try {
// Logic for Edit Loop:
// The backend endpoint /modules/image/generate expects a JSON payload (ImageGenerateRequest).
// It supports 'reference_asset_id' to load the image from the server side.
// We do NOT upload the file here; currentAsset is already on the server.
const payload: any = {
prompt: prompt,
provider: 'nano-banana',
model: 'gemini-3-pro-image-preview', // Enforce Nano Banana
// Standard params
aspect_ratio: aspectRatio,
provider_options: {
image_size: imageSize,
}
};
if (currentAsset) {
payload.reference_asset_id = currentAsset.id;
}
// Use direct API call with JSON
const response = await api.post('/modules/image/generate', payload);
// The response is a Job ID
const jobId = response.data.id;
// Set active job ID to show progress bar
setActiveJobId(jobId);
} catch (err: any) {
console.error(err);
toast.error(err.response?.data?.detail || 'Generation failed');
setLoading(false);
}
};
// (pollJob removed as we use JobProgress component)
// Revised Strategy:
// We'll use a `JobProgress` component that calls a callback `onComplete`
// The `handleGenerate` will just kick off the job and set a local `activeJobId`.
const [activeJobId, setActiveJobId] = useState<string | null>(null);
const handleJobComplete = async (job: any) => {
setLoading(false);
setActiveJobId(null);
if (job.status === 'completed' && job.output_asset_ids?.length > 0) {
// Fetch the new asset
const newAssetId = job.output_asset_ids[0];
try {
const assetResponse = await assetsApi.get(newAssetId);
const newAsset = assetResponse.data;
setCurrentAsset(newAsset);
setHistory(prev => [...prev, newAsset]);
setPrompt(''); // Clear prompt on success for next edit
toast.success('Image updated!');
} catch (e) {
toast.error('Failed to load new image');
}
} else {
toast.error('Generation failed');
}
};
const handleJobError = (msg: string) => {
setLoading(false);
setActiveJobId(null);
toast.error(msg);
};
const startJob = async (jobId: string) => {
setActiveJobId(jobId);
};
// Quick Action Handler
const handleQuickAction = (actionPrompt: string) => {
setPrompt(actionPrompt);
// Auto-submit? The PHP code does.
// Let's set it and let user confirm or auto-submit.
// User requested "works on tha nano banan generate and edit cycle", implies speed.
// Let's just set the prompt and scroll to text area.
};
const handleReset = () => {
if (confirm('Start fresh? This will clear the current image context.')) {
setCurrentAsset(null);
setPrompt('');
router.push('/image/edit-pro'); // Clear URL param
}
};
const handleFileUpload = async (file: File) => {
setLoading(true);
try {
const response = await assetsApi.upload(file, undefined, true);
const uploadedAsset = response.data;
setCurrentAsset(uploadedAsset);
setHistory(prev => [...prev, uploadedAsset]);
toast.success('Image uploaded ready for editing');
} catch (err) {
toast.error('Failed to upload image');
} finally {
setLoading(false);
}
};
const handleDownload = async () => {
if (!currentAsset) return;
try {
const response = await assetsApi.download(currentAsset.id);
const url = window.URL.createObjectURL(response.data);
const a = document.createElement('a');
a.href = url;
a.download = currentAsset.original_filename;
a.click();
window.URL.revokeObjectURL(url);
} catch (e) {
toast.error('Download failed');
}
};
return (
<div className="min-h-screen bg-black text-white p-6 md:p-12 font-sans">
<div className="max-w-[1600px] mx-auto grid grid-cols-1 lg:grid-cols-12 gap-8">
{/* Header */}
<div className="lg:col-span-12 flex items-center justify-between border-b-2 border-forge-yellow pb-6 mb-4">
<div>
<h1 className="text-4xl font-bold text-forge-yellow tracking-tight mb-2">Nano Banana Pro</h1>
<p className="text-gray-400">Advanced AI Image Generation & Iterative Editing Loop</p>
</div>
<div className="flex gap-2">
<button onClick={handleReset} className="px-4 py-2 rounded-lg bg-gray-800 hover:bg-gray-700 text-gray-300 transition-colors flex items-center gap-2">
<RotateCcw className="w-4 h-4" /> Reset
</button>
</div>
</div>
{/* Left Panel: Controls */}
<div className="lg:col-span-4 space-y-6">
<div className="bg-[#1a1a1a] border border-[#333] rounded-xl p-6">
<h2 className="text-xl font-semibold text-forge-yellow mb-6 flex items-center gap-2">
<Wand2 className="w-5 h-5" />
{currentAsset ? 'Edit Instruction' : 'Create New Image'}
</h2>
<div className="space-y-4">
<div>
<div className="flex items-center justify-between mb-2">
<label className="block text-forge-yellow font-medium text-sm">Upload Initial Image (Optional)</label>
<button
onClick={() => setShowPicker(true)}
className="text-xs flex items-center gap-1 text-gray-400 hover:text-white transition-colors"
>
<FolderOpen className="w-3 h-3" /> Select from Library
</button>
</div>
<FileUpload
onUpload={handleFileUpload}
accept={{ 'image/*': ['.png', '.jpg', '.jpeg', '.webp'] }}
currentFile={null}
label="Drop an image to start editing"
className="bg-black border-2 border-[#333] hover:border-forge-yellow transition-colors"
/>
<AssetPickerModal
isOpen={showPicker}
onClose={() => setShowPicker(false)}
onConfirm={handleAssetSelect}
allowedTypes={['image/']}
maxSelect={1}
title="Select Image to Edit"
/>
</div>
<div>
<label className="block text-forge-yellow font-medium mb-2 text-sm">Prompt</label>
<textarea
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
placeholder={currentAsset ? 'e.g., "Add a sunset tone", "remove the person", "make it oil painting"' : 'Describe the image you want to create...'}
className="w-full h-32 bg-black border-2 border-[#333] focus:border-forge-yellow rounded-lg p-4 text-white placeholder-gray-600 outline-none resize-none transition-colors"
onKeyDown={(e) => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
handleGenerate();
}
}}
/>
</div>
{!currentAsset && (
<div className="grid grid-cols-2 gap-4">
<div>
<label className="block text-forge-yellow font-medium mb-2 text-sm">Aspect Ratio</label>
<select
value={aspectRatio}
onChange={(e) => setAspectRatio(e.target.value)}
className="w-full bg-black border border-[#333] rounded-lg p-2 outline-none focus:border-forge-yellow"
>
<option value="16:9">16:9 (Landscape)</option>
<option value="1:1">1:1 (Square)</option>
<option value="9:16">9:16 (Portrait)</option>
<option value="4:3">4:3</option>
<option value="3:4">3:4</option>
</select>
</div>
<div>
<label className="block text-forge-yellow font-medium mb-2 text-sm">Size</label>
<select
value={imageSize}
onChange={(e) => setImageSize(e.target.value)}
className="w-full bg-black border border-[#333] rounded-lg p-2 outline-none focus:border-forge-yellow"
>
<option value="1K">1K (Fast)</option>
<option value="2K">2K (Balanced)</option>
</select>
</div>
</div>
)}
<button
onClick={(e) => handleGenerate(e)}
disabled={loading || !prompt.trim()}
className={`w-full py-4 rounded-lg font-bold text-lg transition-all transform active:scale-95 ${loading || !prompt.trim()
? 'bg-gray-700 text-gray-400 cursor-not-allowed'
: 'bg-forge-yellow text-black hover:bg-[#ffcd1f] hover:-translate-y-1 shadow-[0_4px_12px_rgba(255,196,7,0.2)]'
}`}
>
{loading ? (
<div className="flex items-center justify-center gap-2">
<Loader2 className="w-5 h-5 animate-spin" />
{currentAsset ? 'Editing...' : 'Generating...'}
</div>
) : (
<div className="flex items-center justify-center gap-2">
{currentAsset ? <Sparkles className="w-5 h-5" /> : <Zap className="w-5 h-5" />}
{currentAsset ? 'Update Image' : 'Generate'}
</div>
)}
</button>
</div>
{/* Quick Actions */}
{currentAsset && (
<div className="mt-8 pt-6 border-t border-[#333]">
<h3 className="text-gray-400 text-xs font-bold uppercase tracking-wider mb-4">Quick Edits</h3>
<div className="grid grid-cols-2 gap-2">
{quickActions.map((action, idx) => (
<button
key={idx}
onClick={() => handleQuickAction(action.prompt)}
className="p-2 text-xs bg-[#222] text-forge-yellow border border-forge-yellow/30 hover:bg-forge-yellow hover:text-black rounded transition-all font-medium text-center"
>
{action.label}
</button>
))}
</div>
</div>
)}
{/* Active Job Tracker */}
{activeJobId && (
<div className="mt-6">
<JobProgress jobId={activeJobId} onComplete={handleJobComplete} onError={handleJobError} />
</div>
)}
</div>
{/* History List (Simple) */}
{history.length > 1 && (
<div className="bg-[#1a1a1a] border border-[#333] rounded-xl p-4">
<h3 className="text-gray-400 text-xs font-bold uppercase tracking-wider mb-4 flex items-center gap-2">
<History className="w-4 h-4" /> Session History
</h3>
<div className="space-y-2 max-h-48 overflow-y-auto custom-scrollbar">
{history.slice(0, -1).reverse().map((histAsset, index) => (
<div
key={`${histAsset.id}-${index}`}
onClick={() => setCurrentAsset(histAsset)}
className="flex items-center gap-3 p-2 rounded hover:bg-[#2a2a2a] cursor-pointer group"
>
<div className="w-12 h-12 rounded bg-black overflow-hidden shrink-0 border border-gray-700">
<img
src={`/api/v1/assets/${histAsset.id}/download`}
className="w-full h-full object-cover opacity-70 group-hover:opacity-100 transition-opacity"
alt="History"
/>
</div>
<div className="min-w-0">
<p className="text-xs text-gray-300 truncate">{histAsset.created_at}</p>
<p className="text-[10px] text-gray-500 truncate">ID: {histAsset.id.slice(0, 8)}</p>
</div>
</div>
))}
</div>
</div>
)}
</div>
{/* Right Panel: Display */}
<div className="lg:col-span-8 bg-[#0a0a0a] rounded-2xl border-2 border-[#1a1a1a] p-2 flex flex-col h-[800px]">
<div className="flex-1 flex items-center justify-center bg-black/50 rounded-xl relative overflow-hidden group">
{currentAsset ? (
<>
<img
src={`/api/v1/assets/${currentAsset.id}/download`}
className="max-w-full max-h-full object-contain shadow-2xl"
alt="Current"
/>
<div className="absolute top-4 right-4 flex gap-2 opacity-0 group-hover:opacity-100 transition-opacity">
<button
onClick={handleDownload}
className="bg-black/80 hover:bg-forge-yellow hover:text-black text-white p-3 rounded-full backdrop-blur-md transition-all border border-white/10"
>
<Download className="w-5 h-5" />
</button>
</div>
</>
) : (
<div className="text-center text-[#333] space-y-4">
<div className="w-24 h-24 rounded-full border-4 border-[#1a1a1a] flex items-center justify-center mx-auto">
<ImageIcon className="w-10 h-10" />
</div>
<p className="font-medium text-lg">Ready to Create</p>
<p className="text-sm max-w-xs mx-auto">Enter a prompt to generate your first image, then iterate with precise edits.</p>
</div>
)}
</div>
</div>
</div>
</div>
);
}
// End of file

View file

@ -6,6 +6,7 @@ import { toast } from 'react-hot-toast';
import { ImagePlus, Download, Sparkles, Pencil, X, Loader2, Maximize, Film } from 'lucide-react';
import JobProgress from '@/components/JobProgress';
import ProviderControls from '@/components/ProviderControls';
import AssetPreviewModal from '@/components/AssetPreviewModal';
import { modulesApi, assetsApi, capabilitiesApi } from '@/lib/api';
import { useStore } from '@/lib/store';
import { ProviderConfig } from '@/types/providers';
@ -32,6 +33,9 @@ export default function ImageGeneratePage() {
const [editingImage, setEditingImage] = useState<any | null>(null);
const [editInstructions, setEditInstructions] = useState('');
// Fullscreen Preview State
const [previewAsset, setPreviewAsset] = useState<any | null>(null);
// Load provider capabilities on mount
useEffect(() => {
const loadCapabilities = async () => {
@ -39,13 +43,15 @@ export default function ImageGeneratePage() {
const response = await capabilitiesApi.getImageProviders();
setCapabilities(response.data);
// Set default provider and model
const firstProvider = Object.keys(response.data)[0];
setProvider(firstProvider);
setModel(response.data[firstProvider].defaultModel);
// Set default provider and model (Favor "nano-banana" if available)
const providers = Object.keys(response.data);
const defaultProvider = providers.includes('nano-banana') ? 'nano-banana' : providers[0];
setProvider(defaultProvider);
setModel(response.data[defaultProvider].defaultModel);
// Initialize with default values
initializeDefaults(response.data[firstProvider]);
initializeDefaults(response.data[defaultProvider]);
} catch (err) {
console.error('Failed to load provider configurations:', err);
toast.error('Failed to load provider configurations');
@ -64,9 +70,58 @@ export default function ImageGeneratePage() {
setPrompt(urlPrompt);
}
// Check for reference asset for editing/variations if we support it via URL
// (Optional: handle assetId if needed)
}, [searchParams]);
const urlProvider = searchParams.get('provider');
const urlModel = searchParams.get('model');
if (capabilities && urlProvider && capabilities[urlProvider]) {
// Only apply if we haven't already (or if we want to force it)
// Since this runs on mount/updates, we check if current state matches
// BUT: prompt/params usually mean "set this up".
const config = capabilities[urlProvider];
setProvider(urlProvider);
// Determine model
const targetModel = urlModel && config.models.some(m => m.id === urlModel)
? urlModel
: config.defaultModel;
setModel(targetModel);
// Calculate Defaults for this Provider + Model
const defaults: Record<string, any> = {};
// 1. Common Controls
if (config.commonControls?.length) {
config.commonControls.forEach(c => defaults[c.name] = c.default);
}
// 2. Model Controls
const modelConfig = config.models.find(m => m.id === targetModel);
if (modelConfig?.controls?.length) {
modelConfig.controls.forEach(c => defaults[c.name] = c.default);
}
setProviderOptions(defaults);
}
const refAssetId = searchParams.get('referenceAssetId');
if (refAssetId) {
const fetchRefAsset = async () => {
try {
const response = await assetsApi.get(refAssetId);
if (response.data) {
handleStartEdit(response.data);
toast.success("Loaded image for editing");
}
} catch (err) {
console.error("Failed to load reference asset:", err);
toast.error("Failed to load image for editing");
}
};
fetchRefAsset();
}
}, [searchParams, capabilities]);
// Initialize default values for provider
const initializeDefaults = (config: ProviderConfig) => {
@ -148,7 +203,7 @@ export default function ImageGeneratePage() {
const payload = {
prompt: effectivePrompt,
provider: editingImage ? 'nano-banana' : provider,
model: editingImage ? 'gemini-2.5-flash-image' : model,
model: editingImage ? 'gemini-3-pro-image-preview' : model,
provider_options: editingImage ? undefined : providerOptions,
reference_asset_id: editingImage?.id || undefined,
};
@ -213,7 +268,7 @@ export default function ImageGeneratePage() {
// Auto-switch to Nano Banana for editing
if (capabilities && capabilities['nano-banana']) {
setProvider('nano-banana');
setModel('gemini-2.5-flash-image');
setModel('gemini-3-pro-image-preview');
initializeDefaults(capabilities['nano-banana']);
}
};
@ -420,38 +475,50 @@ export default function ImageGeneratePage() {
alt="Generated"
className="w-full h-auto object-contain bg-black/20"
/>
{/* Hover Overlay */}
<div className="absolute inset-0 bg-black/60 opacity-0 group-hover:opacity-100 transition-opacity flex flex-col items-center justify-center gap-2 p-4">
{supportsEditing && (
{/* Fullscreen Button (Prominent) */}
<button
onClick={() => setPreviewAsset(image)}
className="absolute top-1/2 left-1/2 -translate-x-1/2 -translate-y-[calc(50%+2rem)] bg-white/10 hover:bg-white/20 backdrop-blur-md border border-white/20 text-white rounded-full p-4 transition-all hover:scale-110 mb-4"
title="Fullscreen Preview"
>
<Maximize className="w-8 h-8 text-forge-yellow" />
</button>
<div className="absolute bottom-4 left-4 right-4 flex gap-2">
<button
onClick={() => handleStartEdit(image)}
className="bg-purple-600 hover:bg-purple-700 text-white py-1.5 px-3 rounded-lg font-medium text-sm transition-colors flex items-center gap-2 w-full justify-center"
title="Edit with Nano Banana"
onClick={() => router.push(`/image/edit-pro?assetId=${image.id}`)}
className="bg-purple-600 hover:bg-purple-700 text-white py-1.5 px-3 rounded-lg font-medium text-sm transition-colors flex items-center gap-2 flex-1 justify-center"
title="Edit with Nano Banana Pro"
>
<Pencil className="w-4 h-4" />
Edit
Nano Edit
</button>
)}
<button
onClick={() => router.push(`/image/upscale?assetId=${image.id}`)}
className="bg-blue-600 hover:bg-blue-700 text-white py-1.5 px-3 rounded-lg font-medium text-sm transition-colors flex items-center gap-2 w-full justify-center"
>
<Maximize className="w-4 h-4" />
Upscale
</button>
<button
onClick={() => router.push(`/video/generate?assetId=${image.id}`)}
className="bg-green-600 hover:bg-green-700 text-white py-1.5 px-3 rounded-lg font-medium text-sm transition-colors flex items-center gap-2 w-full justify-center"
>
<Film className="w-4 h-4" />
Video
</button>
<button
onClick={() => handleDownload(image.id, image.original_filename)}
className="btn-primary py-1.5 px-3 text-sm flex items-center gap-2 w-full justify-center"
>
<Download className="w-4 h-4" />
Download
</button>
<button
onClick={() => router.push(`/image/upscale?assetId=${image.id}`)}
className="bg-blue-600 hover:bg-blue-700 text-white py-1.5 px-3 rounded-lg font-medium text-sm transition-colors flex items-center gap-2 flex-1 justify-center"
>
<Maximize className="w-4 h-4" />
Upscale
</button>
<button
onClick={() => router.push(`/video/generate?assetId=${image.id}`)}
className="bg-green-600 hover:bg-green-700 text-white py-1.5 px-3 rounded-lg font-medium text-sm transition-colors flex items-center gap-2 flex-1 justify-center"
>
<Film className="w-4 h-4" />
Video
</button>
<button
onClick={() => handleDownload(image.id, image.original_filename)}
className="btn-primary py-1.5 px-3 text-sm flex items-center gap-2 flex-1 justify-center"
>
<Download className="w-4 h-4" />
Save
</button>
</div>
</div>
</div>
</div>
@ -464,6 +531,17 @@ export default function ImageGeneratePage() {
)}
</div>
</div>
{/* Fullscreen Preview Modal */}
{previewAsset && (
<AssetPreviewModal
isOpen={!!previewAsset}
onClose={() => setPreviewAsset(null)}
assetUrl={`/api/v1/assets/${previewAsset.id}/download`}
assetType="image"
assetName={previewAsset.original_filename}
/>
)}
</div>
);
}

View file

@ -3,8 +3,9 @@
import { useState, useEffect } from 'react';
import { useRouter, useSearchParams } from 'next/navigation';
import { toast } from 'react-hot-toast';
import { FileText, Copy, Check, Sparkles, Download, Trash2, RefreshCw } from 'lucide-react';
import { FileText, Copy, Check, Sparkles, Download, Trash2, RefreshCw, FolderPlus } from 'lucide-react';
import FileUpload from '@/components/FileUpload';
import AssetPickerModal from '@/components/AssetPickerModal';
import { modulesApi, assetsApi, jobsApi } from '@/lib/api';
import { useStore } from '@/lib/store';
import { useDragFromCarousel } from '@/hooks/useDragFromCarousel';
@ -29,6 +30,7 @@ export default function AltTextPage() {
const { addJob, updateJob } = useStore();
const [queue, setQueue] = useState<QueueItem[]>([]);
const [processing, setProcessing] = useState(false);
const [showPicker, setShowPicker] = useState(false);
// Handle URL params on mount
useEffect(() => {
@ -113,6 +115,30 @@ export default function AltTextPage() {
toast.success(`${files.length} images added to queue`);
};
const handleAssetSelection = (assets: any[]) => {
const newItems: QueueItem[] = assets.map(asset => ({
id: Math.random().toString(36).substring(7),
assetId: asset.id,
filename: asset.original_filename,
status: 'pending'
}));
setQueue(prev => {
// Dedup based on assetId
const existingIds = new Set(prev.map(p => p.assetId).filter(Boolean));
const filtered = newItems.filter(i => !existingIds.has(i.assetId));
if (filtered.length < newItems.length) {
toast('Some items were already in queue', { icon: '' });
}
return [...prev, ...filtered];
});
if (assets.length > 0) {
toast.success(`${assets.length} images added from library`);
}
};
const processItem = async (item: QueueItem) => {
if (item.status === 'completed' || item.status === 'processing') return item;
@ -283,6 +309,16 @@ export default function AltTextPage() {
>
{/* Upload Section */}
<div className="space-y-4">
<div className="flex justify-between items-center">
<h2 className="text-lg font-semibold text-white">Add Images</h2>
<button
onClick={() => setShowPicker(true)}
className="btn-secondary text-sm flex items-center gap-2"
>
<FolderPlus className="w-4 h-4" /> Select from Library
</button>
</div>
<FileUpload
onUploadMultiple={handleFileUpload}
accept={{ 'image/*': ['.png', '.jpg', '.jpeg', '.webp', '.gif'] }}
@ -291,6 +327,14 @@ export default function AltTextPage() {
/>
</div>
<AssetPickerModal
isOpen={showPicker}
onClose={() => setShowPicker(false)}
onConfirm={handleAssetSelection}
allowedTypes={['image/']}
title="Select Images for Alt Text"
/>
{/* Queue Actions */}
{queue.length > 0 && (
<div className="flex flex-wrap gap-4 items-center justify-between bg-forge-dark p-4 rounded-xl border border-gray-800">

View file

@ -1,201 +1,431 @@
'use client';
import { useState } from 'react';
import { useState, useEffect } from 'react';
import { useRouter } from 'next/navigation';
import { toast } from 'react-hot-toast';
import { Wand2, Copy, Check, Sparkles, RefreshCw } from 'lucide-react';
import {
Sparkles, Copy, Camera, Check, Loader2, Info, Sliders,
RefreshCw, Lock
} from 'lucide-react';
import { modulesApi } from '@/lib/api';
const styles = [
{ value: 'cinematic', label: 'Cinematic', description: 'Movie-like scenes with dramatic lighting' },
{ value: 'photographic', label: 'Photographic', description: 'Professional photography style' },
{ value: 'artistic', label: 'Artistic', description: 'Painterly with rich colors' },
{ value: 'product', label: 'Product', description: 'Commercial product photography' },
{ value: 'fantasy', label: 'Fantasy', description: 'Magical and otherworldly' },
{ value: 'minimal', label: 'Minimal', description: 'Clean and simple' },
{ value: 'vintage', label: 'Vintage', description: 'Nostalgic retro aesthetics' },
{ value: 'futuristic', label: 'Futuristic', description: 'Sci-fi and modern tech' },
];
interface CineOption {
value: string;
display?: string;
name?: string;
tooltip?: string;
tags?: string;
keywords?: string;
compatibleLenses?: string[];
compatibleFormats?: string[];
defaultCamera?: string;
defaultLens?: string;
focusType?: string;
[key: string]: any;
}
export default function PromptStudioPage() {
interface CineOptions {
cameras: CineOption[];
lenses: CineOption[];
applications: CineOption[];
aspect_ratios: string[];
}
export default function CinePromptStudioPage() {
const router = useRouter();
// Data State
const [options, setOptions] = useState<CineOptions | null>(null);
const [loadingOptions, setLoadingOptions] = useState(true);
// Selection State
const [application, setApplication] = useState('');
const [camera, setCamera] = useState('');
const [lens, setLens] = useState('');
const [aspectRatio, setAspectRatio] = useState('16:9');
const [prompt, setPrompt] = useState('');
const [style, setStyle] = useState('cinematic');
const [creativeFreedom, setCreativeFreedom] = useState(0.3);
// UI State
const [compatibleLenses, setCompatibleLenses] = useState<CineOption[]>([]);
const [generating, setGenerating] = useState(false);
const [enhancedPrompt, setEnhancedPrompt] = useState('');
const [negativePrompt, setNegativePrompt] = useState('');
const [loading, setLoading] = useState(false);
const [copied, setCopied] = useState<string | null>(null);
const [showTooltip, setShowTooltip] = useState<string | null>(null);
// Fetch Options on Mount
useEffect(() => {
const fetchOptions = async () => {
try {
const response = await modulesApi.getCineOptions();
setOptions(response.data);
// Set Defaults
const apps = response.data.applications;
if (apps && apps.length > 0) {
const defaultApp = apps.find((a: any) => a.value === 'Golden Hour (Outdoor)') || apps[0];
setApplication(defaultApp.value);
}
} catch (error) {
console.error('Failed to load CineOptions:', error);
toast.error('Failed to load studio options');
} finally {
setLoadingOptions(false);
}
};
fetchOptions();
}, []);
// Update Camera/Lens when Application Changes
useEffect(() => {
if (!options || !application) return;
const selectedApp = options.applications.find(a => a.value === application);
if (selectedApp) {
if (selectedApp.defaultCamera) setCamera(selectedApp.defaultCamera);
if (selectedApp.defaultLens) setLens(selectedApp.defaultLens);
}
}, [application, options]);
// Filter Compatible Lenses
useEffect(() => {
if (!options || !camera) return;
const selectedCamera = options.cameras.find(c => c.value === camera);
if (selectedCamera && selectedCamera.compatibleLenses) {
const filtered = options.lenses.filter(l =>
selectedCamera.compatibleLenses?.includes(l.value)
);
setCompatibleLenses(filtered);
// Auto-select valid lens if current is invalid
if (lens && !filtered.find(l => l.value === lens) && filtered.length > 0) {
setLens(filtered[0].value);
}
} else {
setCompatibleLenses(options.lenses);
}
}, [camera, options]);
// Actions
const handleEnhance = async () => {
if (!prompt.trim()) {
toast.error('Please enter a prompt');
toast.error('Please enter a scene description');
return;
}
setLoading(true);
setGenerating(true);
setEnhancedPrompt('');
try {
const response = await modulesApi.enhancePrompt({
prompt,
style,
application,
camera,
lens,
aspect_ratio: aspectRatio,
creative_freedom: creativeFreedom
});
setEnhancedPrompt(response.data.enhanced_prompt);
setNegativePrompt(response.data.negative_prompt);
toast.success('Prompt enhanced!');
} catch (err: any) {
toast.error(err.response?.data?.detail || 'Failed to enhance prompt');
toast.success('Prompt optimized successfully!');
} catch (error: any) {
toast.error(error.response?.data?.detail || 'Optimization failed');
} finally {
setLoading(false);
setGenerating(false);
}
};
const copyToClipboard = (text: string, field: string) => {
const copyToClipboard = (text: string, type: string) => {
navigator.clipboard.writeText(text);
setCopied(field);
toast.success('Copied to clipboard!');
setCopied(type);
setTimeout(() => setCopied(null), 2000);
toast.success('Copied to clipboard');
};
const useEnhancedPrompt = () => {
setPrompt(enhancedPrompt);
toast.success('Enhanced prompt moved to input');
};
if (loadingOptions) {
return (
<div className="flex h-96 items-center justify-center">
<Loader2 className="w-8 h-8 animate-spin text-forge-yellow" />
</div>
);
}
const selectedApp = options?.applications.find(a => a.value === application);
const selectedCamera = options?.cameras.find(c => c.value === camera);
const selectedLens = options?.lenses.find(l => l.value === lens);
return (
<div className="max-w-4xl mx-auto space-y-8">
<div className="flex items-center gap-4">
<div className="w-12 h-12 bg-forge-yellow/10 rounded-lg flex items-center justify-center">
<Wand2 className="w-6 h-6 text-forge-yellow" />
</div>
<div>
<h1 className="text-2xl font-bold text-white">Prompt Studio</h1>
<p className="text-gray-500">Enhance your prompts with AI assistance</p>
</div>
</div>
{/* Input Section */}
<div className="bg-forge-dark rounded-xl border border-gray-800 p-6 space-y-6">
<div>
<label className="block text-sm font-medium text-gray-300 mb-2">
Your Prompt
</label>
<textarea
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
placeholder="Enter a basic prompt to enhance..."
className="input-field min-h-[120px] resize-none"
/>
</div>
{/* Style Selection */}
<div>
<label className="block text-sm font-medium text-gray-300 mb-3">
Enhancement Style
</label>
<div className="grid grid-cols-2 md:grid-cols-4 gap-3">
{styles.map((s) => (
<button
key={s.value}
onClick={() => setStyle(s.value)}
className={`p-3 rounded-lg text-left transition-all ${style === s.value
? 'bg-forge-yellow/10 border-forge-yellow text-white border'
: 'bg-forge-gray border border-gray-700 text-gray-400 hover:border-gray-600'
}`}
>
<p className="font-medium text-sm">{s.label}</p>
<p className="text-xs mt-1 opacity-70">{s.description}</p>
</button>
))}
<div className="max-w-7xl mx-auto space-y-8 pb-12">
{/* Header */}
<div className="flex items-center justify-between">
<div className="flex items-center gap-4">
<div className="w-12 h-12 bg-forge-yellow/10 rounded-lg flex items-center justify-center">
<Camera className="w-6 h-6 text-forge-yellow" />
</div>
<div>
<h1 className="text-2xl font-bold text-white">Prompt Studio Pro</h1>
<p className="text-gray-500">Physics-based prompt engineering engine</p>
</div>
</div>
<button
onClick={handleEnhance}
disabled={loading || !prompt.trim()}
className="btn-primary w-full flex items-center justify-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed"
>
{loading ? (
<>
<RefreshCw className="w-5 h-5 animate-spin" />
Enhancing...
</>
) : (
<>
<Sparkles className="w-5 h-5" />
Enhance Prompt
</>
)}
</button>
<div className="text-xs text-gray-500 bg-gray-900 px-3 py-1 rounded-full border border-gray-800">
v3.3 - Auteur Logic Enabled
</div>
</div>
{/* Results Section */}
{(enhancedPrompt || negativePrompt) && (
<div className="space-y-6">
{/* Enhanced Prompt */}
<div className="bg-forge-dark rounded-xl border border-gray-800 p-6">
<div className="flex items-center justify-between mb-3">
<h3 className="text-white font-medium">Enhanced Prompt</h3>
<div className="flex items-center gap-2">
<div className="grid grid-cols-1 lg:grid-cols-12 gap-8">
{/* LEFT COLUMN: CONTROLS */}
<div className="lg:col-span-4 space-y-6">
<div className="bg-forge-dark rounded-xl border border-gray-800 p-6 space-y-6">
<h2 className="text-sm font-bold text-gray-400 uppercase tracking-wider flex items-center gap-2 border-b border-gray-800 pb-2">
<Sliders className="w-4 h-4" />
Technical Specs
</h2>
{/* Application */}
<div className="space-y-2">
<label className="text-sm text-gray-300 font-medium">Application Context</label>
<select
value={application}
onChange={(e) => setApplication(e.target.value)}
className="w-full bg-black/50 border border-gray-700 rounded-lg px-3 py-2 text-white focus:border-forge-yellow focus:ring-1 focus:ring-forge-yellow outline-none"
>
{options?.applications.map((app) => (
<option key={app.value} value={app.value}>{app.value}</option>
))}
</select>
</div>
{/* Camera */}
<div className="space-y-2 relative">
<div className="flex items-center justify-between">
<label className="text-sm text-gray-300 font-medium">Camera Body</label>
<button
onClick={useEnhancedPrompt}
className="text-sm text-forge-yellow hover:text-yellow-400 flex items-center gap-1"
onMouseEnter={() => setShowTooltip('camera')}
onMouseLeave={() => setShowTooltip(null)}
className="text-gray-500 hover:text-white"
>
<RefreshCw className="w-3 h-3" />
Use as input
</button>
<button
onClick={() => router.push(`/image/generate?prompt=${encodeURIComponent(enhancedPrompt)}`)}
className="text-sm text-blue-400 hover:text-blue-300 flex items-center gap-1"
>
<Sparkles className="w-3 h-3" />
Generate Image
</button>
<button
onClick={() => copyToClipboard(enhancedPrompt, 'enhanced')}
className="p-2 text-gray-400 hover:text-forge-yellow transition-colors"
>
{copied === 'enhanced' ? (
<Check className="w-4 h-4" />
) : (
<Copy className="w-4 h-4" />
)}
<Info className="w-4 h-4" />
</button>
</div>
<select
value={camera}
onChange={(e) => setCamera(e.target.value)}
className="w-full bg-black/50 border border-gray-700 rounded-lg px-3 py-2 text-white focus:border-forge-yellow focus:ring-1 focus:ring-forge-yellow outline-none"
>
{options?.cameras.map((c) => (
<option key={c.value} value={c.value}>{c.display}</option>
))}
</select>
<div className="text-xs text-gray-500 mt-1 truncate">
{selectedCamera?.tags}
</div>
{/* Tooltip Overlay */}
{showTooltip === 'camera' && (
<div className="absolute z-10 bottom-full mb-2 left-0 right-0 bg-gray-900 border border-gray-700 p-3 rounded-lg shadow-xl text-xs text-gray-300">
{selectedCamera?.tooltip}
</div>
)}
</div>
{/* Lens */}
<div className="space-y-2 relative">
<div className="flex items-center justify-between">
<label className="text-sm text-gray-300 font-medium">Lens Kit</label>
<button
onMouseEnter={() => setShowTooltip('lens')}
onMouseLeave={() => setShowTooltip(null)}
className="text-gray-500 hover:text-white"
>
<Info className="w-4 h-4" />
</button>
</div>
<select
value={lens}
onChange={(e) => setLens(e.target.value)}
className="w-full bg-black/50 border border-gray-700 rounded-lg px-3 py-2 text-white focus:border-forge-yellow focus:ring-1 focus:ring-forge-yellow outline-none"
>
{compatibleLenses.map((l) => (
<option key={l.value} value={l.value}>{l.display}</option>
))}
</select>
<div className="text-xs text-gray-500 mt-1 truncate">
{selectedLens?.keywords}
</div>
{showTooltip === 'lens' && (
<div className="absolute z-10 bottom-full mb-2 left-0 right-0 bg-gray-900 border border-gray-700 p-3 rounded-lg shadow-xl text-xs text-gray-300">
{selectedLens?.tooltip}
</div>
)}
</div>
{/* Aspect Ratio */}
<div className="space-y-2">
<label className="text-sm text-gray-300 font-medium">Aspect Ratio</label>
<div className="grid grid-cols-3 gap-2">
{options?.aspect_ratios.map((ratio) => (
<button
key={ratio}
onClick={() => setAspectRatio(ratio)}
className={`text-xs py-2 rounded-md transition-colors ${aspectRatio === ratio
? 'bg-forge-yellow text-black font-bold'
: 'bg-gray-800 text-gray-400 hover:bg-gray-700'
}`}
>
{ratio}
</button>
))}
</div>
</div>
{/* Pro Tip */}
<div className="bg-blue-900/20 border border-blue-900/50 rounded-lg p-3 flex items-start gap-2">
<Info className="w-4 h-4 text-blue-400 mt-0.5 shrink-0" />
<p className="text-xs text-blue-300">
Selecting an Application auto-configures optimal camera/lens physics. Override manually if needed.
</p>
</div>
</div>
</div>
{/* RIGHT COLUMN: EDITOR */}
<div className="lg:col-span-8 space-y-6">
{/* Creative Freedom */}
<div className="bg-forge-dark rounded-xl border border-gray-800 p-6">
<div className="flex items-center justify-between mb-4">
<label className="text-sm font-bold text-gray-300 flex items-center gap-2">
<Sparkles className="w-4 h-4 text-forge-yellow" />
Creative Freedom
</label>
<span className="text-xs text-forge-yellow font-mono px-2 py-1 bg-forge-yellow/10 rounded">
{(creativeFreedom * 100).toFixed(0)}%
</span>
</div>
<input
type="range"
min="0"
max="100"
value={creativeFreedom * 100}
onChange={(e) => setCreativeFreedom(Number(e.target.value) / 100)}
className="w-full h-2 bg-gray-700 rounded-lg appearance-none cursor-pointer accent-forge-yellow"
/>
<div className="flex justify-between text-xs text-gray-500 mt-2">
<span>Strict (Literal)</span>
<span>Balanced</span>
<span>Creative (Smart Fill)</span>
</div>
<p className="text-gray-300 whitespace-pre-wrap">{enhancedPrompt}</p>
</div>
{/* Negative Prompt */}
<div className="bg-forge-dark rounded-xl border border-gray-800 p-6">
<div className="flex items-center justify-between mb-3">
<h3 className="text-white font-medium">Negative Prompt</h3>
{/* Input Area */}
<div className="space-y-4">
<label className="text-sm font-bold text-gray-300 block">Scene Description</label>
<textarea
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
placeholder="Describe your scene... e.g., 'A vintage car parked in front of a neon-lit diner at dusk'"
className="w-full h-32 bg-forge-dark border border-gray-800 rounded-xl p-4 text-white placeholder-gray-600 focus:border-forge-yellow outline-none resize-none transition-all"
/>
<div className="flex gap-4">
<button
onClick={() => copyToClipboard(negativePrompt, 'negative')}
className="p-2 text-gray-400 hover:text-forge-yellow transition-colors"
onClick={handleEnhance}
disabled={generating || !prompt.trim()}
className="flex-1 btn-primary py-3 flex items-center justify-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed"
>
{copied === 'negative' ? (
<Check className="w-4 h-4" />
{generating ? (
<>
<Loader2 className="w-5 h-5 animate-spin" />
Optimizing Physics...
</>
) : (
<Copy className="w-4 h-4" />
<>
<Sparkles className="w-5 h-5" />
Optimize My Prompt
</>
)}
</button>
</div>
<p className="text-gray-300 whitespace-pre-wrap">{negativePrompt}</p>
</div>
</div>
)}
{/* Tips */}
<div className="bg-forge-gray rounded-xl p-6">
<h3 className="text-white font-medium mb-3">Tips for Better Prompts</h3>
<ul className="space-y-2 text-gray-400 text-sm">
<li> Be specific about subjects, actions, and settings</li>
<li> Include mood, lighting, and atmosphere details</li>
<li> Mention art styles or artist references if desired</li>
<li> Use the negative prompt to exclude unwanted elements</li>
<li> Iterate by using enhanced prompts as new input</li>
</ul>
{/* Output Area */}
{enhancedPrompt && (
<div className="space-y-4">
{/* Enhanced Prompt */}
<div className="relative group">
<div className="absolute -inset-0.5 bg-gradient-to-r from-forge-yellow to-purple-600 rounded-xl opacity-30 group-hover:opacity-50 transition duration-1000"></div>
<div className="relative bg-forge-dark rounded-xl border border-gray-700 p-6">
<div className="flex items-center justify-between mb-4 border-b border-gray-800 pb-4">
<h3 className="text-sm font-bold text-gray-400 uppercase tracking-wider">Physics-Optimized Prompt</h3>
<div className="flex gap-2">
<button
onClick={() => router.push(`/image/generate?prompt=${encodeURIComponent(enhancedPrompt)}&provider=nano-banana&model=gemini-3-pro-image-preview`)}
className="text-xs bg-blue-600 hover:bg-blue-500 text-white px-3 py-1.5 rounded-lg flex items-center gap-1 transition-colors"
>
<Sparkles className="w-3 h-3" />
Generate
</button>
<button
onClick={() => copyToClipboard(enhancedPrompt, 'enhanced')}
className="text-xs bg-gray-700 hover:bg-gray-600 text-white px-3 py-1.5 rounded-lg flex items-center gap-1 transition-colors"
>
{copied === 'enhanced' ? <Check className="w-3 h-3" /> : <Copy className="w-3 h-3" />}
Copy
</button>
</div>
</div>
<p className="text-gray-200 leading-relaxed font-mono text-sm whitespace-pre-wrap">
{enhancedPrompt}
</p>
{/* Stats */}
<div className="mt-4 pt-4 border-t border-gray-800 flex gap-4 text-xs text-gray-500">
<span>{enhancedPrompt.split(' ').length} words</span>
<span>Focus: {selectedApp?.focusType === 'realism' ? 'Deep Realism' : 'Stylistic'}</span>
<span>Sensor: {selectedCamera?.sensorFormat}</span>
</div>
</div>
</div>
{/* Negative Prompt */}
<div className="bg-forge-dark rounded-xl border border-gray-800 p-6">
<div className="flex items-center justify-between mb-2">
<h3 className="text-sm font-bold text-gray-500 uppercase">Negative Constraints</h3>
<button
onClick={() => copyToClipboard(negativePrompt, 'negative')}
className="text-gray-500 hover:text-white transition-colors"
>
{copied === 'negative' ? <Check className="w-3 h-3" /> : <Copy className="w-3 h-3" />}
</button>
</div>
<p className="text-gray-400 text-sm font-mono">{negativePrompt}</p>
</div>
</div>
)}
{/* Current Settings Summary */}
{!enhancedPrompt && (
<div className="bg-gray-900/50 rounded-xl p-6 border border-gray-800">
<div className="flex items-center justify-between mb-4">
<span className="text-xs text-gray-500 font-bold uppercase">Current Physics Config</span>
<span className={`text-xs px-2 py-0.5 rounded-full border ${selectedApp?.focusType === 'realism' ? 'bg-green-900/20 text-green-400 border-green-900' : 'bg-purple-900/20 text-purple-400 border-purple-900'}`}>
{selectedApp?.focusType === 'realism' ? 'Realism Mode' : 'Stylistic Mode'}
</span>
</div>
<div className="grid grid-cols-2 gap-4 text-xs text-gray-400">
<div><span className="text-gray-600 block mb-1">Sensor Physics</span> {selectedCamera?.physics}</div>
<div><span className="text-gray-600 block mb-1">Optical Characteristics</span> {selectedLens?.physics}</div>
<div><span className="text-gray-600 block mb-1">Lighting Model</span> {selectedApp?.lighting}</div>
</div>
</div>
)}
</div>
</div>
</div>
);

View file

@ -57,7 +57,11 @@ const colors = [
export default function SubtitlesPage() {
const { addJob, updateJob } = useStore();
const [file, setFile] = useState<File | null>(null);
const [subtitleFile, setSubtitleFile] = useState<File | null>(null);
const [subtitleAssetId, setSubtitleAssetId] = useState<string | null>(null);
const [assetId, setAssetId] = useState<string | null>(null);
const [mode, setMode] = useState<'transcribe' | 'burn'>('transcribe');
const [sourceLanguage, setSourceLanguage] = useState('');
const [targetLanguage, setTargetLanguage] = useState('');
const [burnSubtitles, setBurnSubtitles] = useState(false);
@ -73,33 +77,31 @@ export default function SubtitlesPage() {
const [showAdvanced, setShowAdvanced] = useState(false);
const [jobId, setJobId] = useState<string | null>(null);
const [uploadProgress, setUploadProgress] = useState(0);
const [results, setResults] = useState<any>(null);
const [loading, setLoading] = useState(false);
const [uploading, setUploading] = useState(false);
const handleFileUpload = async (uploadedFile: File) => {
setFile(uploadedFile);
toast.success('Video ready for processing!');
};
const handleSubtitleUpload = async (uploadedFile: File) => {
setSubtitleFile(uploadedFile);
toast.success('Subtitle file ready!');
};
const handleProcess = async () => {
if (!file) {
toast.error('Please upload a video first');
return;
}
// Debug: Check file state
console.log('🎬 Starting subtitle processing...');
console.log(' File object:', file);
console.log(' File name:', file.name);
console.log(' File size:', file.size, 'bytes');
console.log(' File type:', file.type);
setLoading(true);
setUploadProgress(0);
setResults(null);
try {
// Create minimal FormData with only required fields for testing
const formData = new FormData();
formData.append('file', file);
@ -110,21 +112,27 @@ export default function SubtitlesPage() {
if (targetLanguage) {
formData.append('target_language', targetLanguage);
}
if (burnSubtitles) {
// Handle Burn Mode specific inputs
if (mode === 'burn' || burnSubtitles) {
formData.append('burn_subtitles', 'true');
}
// Debug: Log FormData contents
console.log('📤 Sending MINIMAL FormData to subtitle API:');
for (const [key, value] of formData.entries()) {
if (value instanceof File) {
console.log(` ${key}:`, value.name, `(${value.size} bytes, ${value.type})`);
} else {
console.log(` ${key}:`, value);
if (subtitleFile) {
formData.append('subtitle_file', subtitleFile);
}
// Add styling options
formData.append('font', font);
formData.append('font_size', fontSize.toString());
formData.append('text_color', textColor);
formData.append('outline_color', outlineColor);
formData.append('outline_width', outlineWidth.toString());
formData.append('position', position);
}
const response = await modulesApi.processSubtitles(formData);
const response = await modulesApi.processSubtitles(formData, (progressEvent) => {
const percentCompleted = Math.round((progressEvent.loaded * 100) / progressEvent.total);
setUploadProgress(percentCompleted);
});
const job = response.data;
setJobId(job.id);
@ -139,30 +147,35 @@ export default function SubtitlesPage() {
toast.success('Subtitle processing started!');
} catch (err: any) {
console.error('Subtitle processing error:', err);
console.error('Error response:', err.response);
console.error('Error data:', err.response?.data);
// Handle validation errors (array of error objects)
if (err.response?.data?.detail && Array.isArray(err.response.data.detail)) {
const errorMessages = err.response.data.detail.map((e: any) => {
// ... error handling ...
const errorData = err.response?.data;
if (errorData?.detail && Array.isArray(errorData.detail)) {
const errorMessages = errorData.detail.map((e: any) => {
return `${e.loc?.join('.')} - ${e.msg}`;
}).join('; ');
toast.error(`Validation error: ${errorMessages}`);
} else if (typeof err.response?.data?.detail === 'string') {
toast.error(err.response.data.detail);
} else if (typeof errorData?.detail === 'string') {
toast.error(errorData.detail);
} else if (errorData?.message) {
toast.error(errorData.message);
} else {
toast.error('Failed to start processing');
toast.error('Failed to start processing. Check console for details.');
}
setLoading(false);
setUploadProgress(0);
}
};
// ... (keep handleJobComplete, handleJobError, etc.) ...
const handleJobComplete = async (job: any) => {
setLoading(false);
setUploadProgress(0);
updateJob(job.id, { status: 'completed', progress: 100 });
if (job.output_asset_ids?.length > 0) {
// ... (keep existing logic) ...
const assets = await Promise.all(
job.output_asset_ids.map(async (id: string) => {
const asset = await assetsApi.get(id);
@ -180,10 +193,12 @@ export default function SubtitlesPage() {
const handleJobError = (error: string) => {
setLoading(false);
setUploadProgress(0);
toast.error(error);
};
const handleDownload = async (asset: any) => {
// ... (keep existing logic) ...
try {
const response = await assetsApi.download(asset.id);
const url = window.URL.createObjectURL(response.data);
@ -199,6 +214,7 @@ export default function SubtitlesPage() {
return (
<div className="max-w-6xl mx-auto space-y-8">
{/* ... (Keep header) ... */}
<div className="flex items-center gap-4">
<div className="w-12 h-12 bg-forge-yellow/10 rounded-lg flex items-center justify-center">
<Captions className="w-6 h-6 text-forge-yellow" />
@ -212,44 +228,92 @@ export default function SubtitlesPage() {
<div className="grid grid-cols-1 lg:grid-cols-2 gap-8">
{/* Controls */}
<div className="space-y-6">
{/* File Upload */}
<div>
<label className="block text-sm font-medium text-gray-300 mb-2">
Upload Video
</label>
<FileUpload
onUpload={handleFileUpload}
accept={{ 'video/*': ['.mp4', '.mov', '.avi', '.webm'] }}
currentFile={file}
onClear={() => {
setFile(null);
setAssetId(null);
}}
label="Upload a video for transcription"
/>
{/* ... (Keep Mode Selection, File Uploads, Languages, Styling, Checkbox) ... */}
{/* Mode Selection */}
<div className="flex bg-forge-dark rounded-lg p-1 border border-gray-800 mb-6">
<button
onClick={() => { setMode('transcribe'); setBurnSubtitles(false); }}
className={`flex-1 py-2 px-4 rounded-md text-sm font-medium transition-colors ${mode === 'transcribe'
? 'bg-forge-yellow text-black shadow-sm'
: 'text-gray-400 hover:text-white'
}`}
>
Auto-Transcribe
</button>
<button
onClick={() => { setMode('burn'); setBurnSubtitles(true); }}
className={`flex-1 py-2 px-4 rounded-md text-sm font-medium transition-colors ${mode === 'burn'
? 'bg-forge-yellow text-black shadow-sm'
: 'text-gray-400 hover:text-white'
}`}
>
Burn from SRT
</button>
</div>
{/* File Uploads */}
<div className="space-y-4">
<div>
<label className="block text-sm font-medium text-gray-300 mb-2">
Upload Video
</label>
<FileUpload
onUpload={handleFileUpload}
accept={{ 'video/*': ['.mp4', '.mov', '.avi', '.webm'] }}
currentFile={file}
onClear={() => {
setFile(null);
setAssetId(null);
}}
label="Upload a video"
/>
</div>
{mode === 'burn' && (
<div>
<label className="block text-sm font-medium text-gray-300 mb-2">
Upload Subtitles (SRT/VTT)
</label>
<FileUpload
onUpload={handleSubtitleUpload}
accept={{
'text/plain': ['.srt', '.vtt', '.ass', '.txt'],
'application/x-subrip': ['.srt']
}}
currentFile={subtitleFile}
onClear={() => {
setSubtitleFile(null);
setSubtitleAssetId(null);
}}
label="Upload subtitle file"
/>
</div>
)}
</div>
{/* Languages */}
<div className="grid grid-cols-2 gap-4">
{mode === 'transcribe' && (
<div>
<label className="block text-sm font-medium text-gray-300 mb-2">
Source Language
</label>
<select
value={sourceLanguage}
onChange={(e) => setSourceLanguage(e.target.value)}
className="select-field"
>
{languages.map((lang) => (
<option key={lang.value} value={lang.value}>
{lang.label}
</option>
))}
</select>
</div>
)}
<div>
<label className="block text-sm font-medium text-gray-300 mb-2">
Source Language
</label>
<select
value={sourceLanguage}
onChange={(e) => setSourceLanguage(e.target.value)}
className="select-field"
>
{languages.map((lang) => (
<option key={lang.value} value={lang.value}>
{lang.label}
</option>
))}
</select>
</div>
<div>
<label className="block text-sm font-medium text-gray-300 mb-2">
Translate To
Translate To {mode === 'burn' && '(Optional)'}
</label>
<select
value={targetLanguage}
@ -361,29 +425,43 @@ export default function SubtitlesPage() {
</div>
</div>
{/* Burn Subtitles */}
<div className="flex items-center gap-3">
<input
type="checkbox"
id="burnSubtitles"
checked={burnSubtitles}
onChange={(e) => setBurnSubtitles(e.target.checked)}
className="w-4 h-4 rounded border-gray-600 bg-forge-dark text-forge-yellow focus:ring-forge-yellow"
/>
<label htmlFor="burnSubtitles" className="text-gray-300">
Burn subtitles into video (hardcoded)
</label>
</div>
{/* Burn Subtitles Checkbox - Only show in Transcribe mode */}
{mode === 'transcribe' && (
<div className="flex items-center gap-3">
<input
type="checkbox"
id="burnSubtitles"
checked={burnSubtitles}
onChange={(e) => setBurnSubtitles(e.target.checked)}
className="w-4 h-4 rounded border-gray-600 bg-forge-dark text-forge-yellow focus:ring-forge-yellow"
/>
<label htmlFor="burnSubtitles" className="text-gray-300">
Burn subtitles into video (hardcoded)
</label>
</div>
)}
{/* Process Button */}
<button
onClick={handleProcess}
disabled={loading || !file}
className="btn-primary w-full flex items-center justify-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed"
>
<Sparkles className="w-5 h-5" />
{loading ? 'Processing...' : 'Generate Subtitles'}
</button>
<div className="space-y-4">
<button
onClick={handleProcess}
disabled={loading || !file}
className="btn-primary w-full flex items-center justify-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed"
>
<Sparkles className="w-5 h-5" />
{loading ? (uploadProgress < 100 ? `Uploading Video (${uploadProgress}%)` : 'Processing...') : 'Generate Subtitles'}
</button>
{/* Upload Progress Bar (Visible during upload only) */}
{loading && uploadProgress < 100 && uploadProgress > 0 && (
<div className="w-full bg-gray-700 rounded-full h-2.5">
<div
className="bg-forge-yellow h-2.5 rounded-full transition-all duration-300"
style={{ width: `${uploadProgress}%` }}
></div>
</div>
)}
</div>
{/* Job Progress */}
{jobId && loading && (
@ -393,6 +471,15 @@ export default function SubtitlesPage() {
onError={handleJobError}
/>
)}
{/* Processing State with no Job ID yet (Waiting for upload result) */}
{loading && !jobId && uploadProgress === 100 && (
<div className="bg-forge-dark border border-forge-yellow/30 rounded-lg p-4 flex items-center gap-3 animate-pulse">
<div className="w-5 h-5 border-2 border-forge-yellow border-t-transparent rounded-full animate-spin"></div>
<div className="text-sm text-forge-yellow">Initializing Job...</div>
</div>
)}
</div>
{/* Results */}
@ -403,22 +490,37 @@ export default function SubtitlesPage() {
{/* Generated Files */}
<div className="bg-forge-dark rounded-xl border border-gray-800 p-4">
<h3 className="text-white font-medium mb-3">Generated Files</h3>
{/* Result Video Player */}
{results.assets.find((a: any) => a.file_type === 'video') && (
<div className="mb-6 rounded-lg overflow-hidden bg-black aspect-video relative">
<video
controls
className="w-full h-full object-contain"
title="Generated Video"
>
<source src={`/api/v1/assets/${results.assets.find((a: any) => a.file_type === 'video').id}/download`} type="video/mp4" />
Your browser does not support the video tag.
</video>
</div>
)}
<div className="space-y-2">
{results.assets.map((asset: any) => (
<div
key={asset.id}
className="flex items-center justify-between p-3 bg-forge-gray rounded-lg"
>
<div>
<p className="text-white text-sm">{asset.original_filename}</p>
<div className="overflow-hidden">
<p className="text-white text-sm truncate" title={asset.original_filename}>{asset.original_filename}</p>
<p className="text-xs text-gray-500">
{asset.metadata?.type === 'translated' ? 'Translated' : 'Original'} {' '}
{asset.file_type}
{asset.metadata?.type ? (asset.metadata.type === 'translated' ? 'Translated SRT' : 'Original SRT') : (asset.file_type === 'video' ? 'Burned Video' : asset.file_type)}
</p>
</div>
<button
onClick={() => handleDownload(asset)}
className="p-2 text-forge-yellow hover:bg-forge-yellow/10 rounded transition-colors"
className="p-2 text-forge-yellow hover:bg-forge-yellow/10 rounded transition-colors shrink-0"
title="Download"
>
<Download className="w-4 h-4" />
</button>

View file

@ -0,0 +1,201 @@
'use client';
import { useState, useEffect } from 'react';
import { assetsApi } from '@/lib/api';
import { X, Search, Check, FileImage, FileVideo, FileAudio, FileText, Loader2 } from 'lucide-react';
import { toast } from 'react-hot-toast';
interface Asset {
id: string;
original_filename: string;
mime_type: string;
file_size_bytes: number;
created_at: string;
}
interface AssetPickerModalProps {
isOpen: boolean;
onClose: () => void;
onConfirm: (assets: Asset[]) => void;
allowedTypes?: string[]; // e.g. ['image/', 'video/'] prefixes
maxSelect?: number;
title?: string;
}
export default function AssetPickerModal({
isOpen,
onClose,
onConfirm,
allowedTypes = [],
maxSelect,
title = "Select from Library"
}: AssetPickerModalProps) {
const [assets, setAssets] = useState<Asset[]>([]);
const [loading, setLoading] = useState(false);
const [search, setSearch] = useState('');
const [selected, setSelected] = useState<Set<string>>(new Set());
useEffect(() => {
if (isOpen) {
loadAssets();
setSelected(new Set()); // Reset selection on open
}
}, [isOpen]);
const loadAssets = async () => {
setLoading(true);
try {
// Fetch reasonably large number of recent assets
// In a real app, implement pagination or infinite scroll
const response = await assetsApi.list({ limit: 100, sort: 'created_at', order: 'desc' });
setAssets(response.data);
} catch (error) {
console.error('Failed to load assets', error);
toast.error('Failed to load library');
} finally {
setLoading(false);
}
};
const toggleSelection = (asset: Asset) => {
const newSelected = new Set(selected);
if (newSelected.has(asset.id)) {
newSelected.delete(asset.id);
} else {
if (maxSelect && newSelected.size >= maxSelect) {
toast.error(`Maximum ${maxSelect} items allowed`);
return;
}
newSelected.add(asset.id);
}
setSelected(newSelected);
};
const handleConfirm = () => {
const selectedAssets = assets.filter(a => selected.has(a.id));
onConfirm(selectedAssets);
onClose();
};
const filteredAssets = assets.filter(asset => {
// Type filter
if (allowedTypes.length > 0) {
if (!allowedTypes.some(type => asset.mime_type.startsWith(type))) return false;
}
// Search filter
if (search) {
return asset.original_filename.toLowerCase().includes(search.toLowerCase());
}
return true;
});
if (!isOpen) return null;
return (
<div className="fixed inset-0 z-50 flex items-center justify-center p-4 bg-black/80 backdrop-blur-sm">
<div className="bg-forge-dark border border-gray-800 rounded-2xl w-full max-w-4xl flex flex-col max-h-[85vh] shadow-2xl">
{/* Header */}
<div className="p-4 border-b border-gray-800 flex items-center justify-between">
<h2 className="text-xl font-bold text-white">{title}</h2>
<button onClick={onClose} className="p-2 hover:bg-gray-800 rounded-lg text-gray-400 hover:text-white transition-colors">
<X className="w-5 h-5" />
</button>
</div>
{/* Toolbar */}
<div className="p-4 border-b border-gray-800 flex gap-4">
<div className="relative flex-1">
<Search className="absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-gray-500" />
<input
type="text"
placeholder="Search files..."
value={search}
onChange={(e) => setSearch(e.target.value)}
className="w-full bg-black/40 border border-gray-700 rounded-lg pl-9 pr-4 py-2 text-sm text-white focus:ring-1 focus:ring-forge-yellow focus:border-forge-yellow outline-none"
/>
</div>
<div className="text-sm text-gray-400 flex items-center">
{selected.size} selected
</div>
</div>
{/* content */}
<div className="flex-1 overflow-y-auto p-4 custom-scrollbar">
{loading ? (
<div className="flex items-center justify-center h-64">
<Loader2 className="w-8 h-8 animate-spin text-forge-yellow" />
</div>
) : filteredAssets.length === 0 ? (
<div className="flex flex-col items-center justify-center h-64 text-gray-500">
<FileImage className="w-12 h-12 mb-4 opacity-50" />
<p>No matching files found</p>
</div>
) : (
<div className="grid grid-cols-2 sm:grid-cols-3 md:grid-cols-4 lg:grid-cols-5 gap-4">
{filteredAssets.map(asset => {
const isSelected = selected.has(asset.id);
return (
<div
key={asset.id}
onClick={() => toggleSelection(asset)}
className={`group relative aspect-square rounded-xl overflow-hidden cursor-pointer border-2 transition-all ${isSelected ? 'border-forge-yellow ring-2 ring-forge-yellow/20' : 'border-transparent hover:border-gray-600'
}`}
>
{/* Thumbnail */}
<div className="absolute inset-0 bg-gray-900">
{asset.mime_type.startsWith('image/') ? (
<img
src={`/api/v1/assets/${asset.id}/download`}
alt={asset.original_filename}
className="w-full h-full object-cover"
loading="lazy"
/>
) : (
<div className="w-full h-full flex items-center justify-center text-gray-600">
{asset.mime_type.startsWith('video/') ? <FileVideo className="w-8 h-8" /> :
asset.mime_type.startsWith('audio/') ? <FileAudio className="w-8 h-8" /> :
<FileText className="w-8 h-8" />}
</div>
)}
</div>
{/* Overlay */}
<div className={`absolute inset-0 bg-black/40 transition-opacity ${isSelected ? 'opacity-100' : 'opacity-0 group-hover:opacity-100'}`}>
<div className="absolute top-2 right-2">
<div className={`w-6 h-6 rounded-full flex items-center justify-center border ${isSelected ? 'bg-forge-yellow border-forge-yellow text-black' : 'bg-black/50 border-white/50 text-transparent'
}`}>
<Check className="w-4 h-4" />
</div>
</div>
<div className="absolute bottom-0 left-0 right-0 p-2 bg-gradient-to-t from-black/90 to-transparent">
<p className="text-xs text-white truncate">{asset.original_filename}</p>
</div>
</div>
</div>
);
})}
</div>
)}
</div>
{/* Footer */}
<div className="p-4 border-t border-gray-800 flex justify-end gap-3 bg-gray-900/50 rounded-b-2xl">
<button
onClick={onClose}
className="px-4 py-2 rounded-lg text-sm font-medium text-gray-300 hover:text-white hover:bg-gray-800 transition-colors"
>
Cancel
</button>
<button
onClick={handleConfirm}
disabled={selected.size === 0}
className="btn-primary px-6 py-2 rounded-lg text-sm font-medium disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2"
>
Select {selected.size > 0 ? `(${selected.size})` : ''}
</button>
</div>
</div>
</div>
);
}

View file

@ -0,0 +1,174 @@
'use client';
import { X, ZoomIn, ZoomOut, Download, Film, Image as ImageIcon } from 'lucide-react';
import { useState, useRef, useEffect } from 'react';
interface AssetPreviewModalProps {
isOpen: boolean;
onClose: () => void;
assetUrl: string;
assetType: 'image' | 'video';
assetName?: string;
}
export default function AssetPreviewModal({
isOpen,
onClose,
assetUrl,
assetType,
assetName = 'Preview'
}: AssetPreviewModalProps) {
const [scale, setScale] = useState(1);
const [isDragging, setIsDragging] = useState(false);
const [position, setPosition] = useState({ x: 0, y: 0 });
const [dragStart, setDragStart] = useState({ x: 0, y: 0 });
const containerRef = useRef<HTMLDivElement>(null);
// Reset state when modal opens/closes or asset changes
useEffect(() => {
if (isOpen) {
setScale(1);
setPosition({ x: 0, y: 0 });
}
}, [isOpen, assetUrl]);
if (!isOpen) return null;
const handleZoomIn = () => setScale(s => Math.min(s + 0.5, 4));
const handleZoomOut = () => setScale(s => Math.max(s - 0.5, 1));
const handleWheel = (e: React.WheelEvent) => {
if (e.ctrlKey || e.metaKey) {
e.preventDefault();
const delta = e.deltaY > 0 ? -0.1 : 0.1;
setScale(s => Math.min(Math.max(s + delta, 0.5), 5));
}
};
const handleMouseDown = (e: React.MouseEvent) => {
if (scale > 1) {
setIsDragging(true);
setDragStart({ x: e.clientX - position.x, y: e.clientY - position.y });
}
};
const handleMouseMove = (e: React.MouseEvent) => {
if (isDragging && scale > 1) {
setPosition({
x: e.clientX - dragStart.x,
y: e.clientY - dragStart.y
});
}
};
const handleMouseUp = () => setIsDragging(false);
return (
<div
className="fixed inset-0 z-50 flex items-center justify-center bg-black/95 backdrop-blur-md"
onClick={(e) => {
// Close if clicking the background
if (e.target === e.currentTarget) onClose();
}}
>
{/* Toolbar */}
<div className="absolute top-0 left-0 right-0 p-4 flex justify-between items-center z-50 bg-gradient-to-b from-black/80 to-transparent pointer-events-none">
<div className="pointer-events-auto flex items-center gap-3">
<div className="bg-white/10 backdrop-blur-md px-4 py-2 rounded-full border border-white/20 text-white font-medium text-sm flex items-center gap-2">
{assetType === 'video' ? <Film className="w-4 h-4 text-forge-yellow" /> : <ImageIcon className="w-4 h-4 text-forge-yellow" />}
{assetName}
</div>
</div>
<div className="pointer-events-auto flex items-center gap-2">
{/* Zoom Controls (only for images) */}
{assetType === 'image' && (
<div className="bg-white/10 backdrop-blur-md rounded-full border border-white/20 flex overflow-hidden mr-2">
<button
onClick={handleZoomOut}
className="p-2 hover:bg-white/10 text-white transition-colors border-r border-white/10"
disabled={scale <= 1}
>
<ZoomOut className="w-5 h-5" />
</button>
<div className="px-3 py-2 text-xs font-mono text-white/70 min-w-[3rem] text-center flex items-center justify-center">
{(scale * 100).toFixed(0)}%
</div>
<button
onClick={handleZoomIn}
className="p-2 hover:bg-white/10 text-white transition-colors border-l border-white/10"
disabled={scale >= 4}
>
<ZoomIn className="w-5 h-5" />
</button>
</div>
)}
<a
href={assetUrl}
download={assetName}
className="p-3 rounded-full bg-white/10 border border-white/20 text-white hover:bg-white/20 transition-all hover:scale-105"
title="Download Original"
>
<Download className="w-5 h-5" />
</a>
<button
onClick={onClose}
className="p-3 rounded-full bg-white/10 border border-white/20 text-white hover:bg-red-500/20 hover:border-red-500/50 hover:text-red-400 transition-all hover:scale-105 ml-2"
title="Close Preview"
>
<X className="w-5 h-5" />
</button>
</div>
</div>
{/* Content Area */}
<div
ref={containerRef}
className="w-full h-full flex items-center justify-center overflow-hidden p-6"
onWheel={handleWheel}
>
{assetType === 'video' ? (
<video
src={assetUrl}
controls
autoPlay
className="max-w-full max-h-full rounded-lg shadow-2xl"
style={{
boxShadow: '0 0 50px rgba(0,0,0,0.5)'
}}
/>
) : (
<div
className={`relative transition-transform duration-100 ease-out ${isDragging ? 'cursor-grabbing' : scale > 1 ? 'cursor-grab' : ''}`}
style={{
transform: `scale(${scale}) translate(${position.x / scale}px, ${position.y / scale}px)`,
}}
onMouseDown={handleMouseDown}
onMouseMove={handleMouseMove}
onMouseUp={handleMouseUp}
onMouseLeave={handleMouseUp}
>
<img
src={assetUrl}
alt={assetName}
className="max-w-full max-h-[90vh] object-contain rounded-lg shadow-2xl"
draggable={false}
style={{
boxShadow: '0 0 50px rgba(0,0,0,0.5)'
}}
/>
</div>
)}
</div>
{/* Hint overlay */}
{assetType === 'image' && scale === 1 && (
<div className="absolute bottom-8 left-1/2 -translate-x-1/2 pointer-events-none opacity-50 text-white/50 text-xs px-4 py-2 rounded-full bg-black/40 backdrop-blur-sm border border-white/10">
Scroll to zoom Drag to pan
</div>
)}
</div>
);
}

View file

@ -14,6 +14,7 @@ interface FileUploadProps {
currentFile?: File | null;
onClear?: () => void;
multiple?: boolean;
className?: string;
}
const fileIcons: Record<string, any> = {
@ -31,6 +32,7 @@ export default function FileUpload({
currentFile,
onClear,
multiple = false,
className,
}: FileUploadProps) {
const [error, setError] = useState<string | null>(null);
@ -170,7 +172,8 @@ export default function FileUpload({
data-file-drop-zone="true"
className={clsx(
'upload-zone',
isDragActive && 'active'
isDragActive && 'active',
className
)}
>
<input {...getInputProps()} />

View file

@ -43,6 +43,7 @@ const modules = [
icon: Image,
items: [
{ name: 'Generate', href: '/image/generate', icon: ImagePlus },
{ name: 'Nano Edit', href: '/image/edit-pro', icon: Sparkles },
{ name: 'Upscale', href: '/image/upscale', icon: Maximize },
{ name: 'Remove Background', href: '/image/remove-bg', icon: Eraser },
],
@ -244,6 +245,18 @@ export default function Sidebar() {
<TrendingUp className="w-5 h-5 flex-shrink-0" />
{!sidebarCollapsed && <span>Reports</span>}
</Link>
<Link
href="/admin/usage"
className={clsx(
'flex items-center gap-3 px-4 py-2.5 mx-2 rounded-lg transition-colors',
pathname === '/admin/usage'
? 'bg-red-900/20 text-red-400'
: 'text-gray-400 hover:text-red-400 hover:bg-red-900/10'
)}
>
<History className="w-5 h-5 flex-shrink-0" />
{!sidebarCollapsed && <span>Usage Search</span>}
</Link>
<Link
href="/admin/logs"
className={clsx(

View file

@ -65,7 +65,11 @@ export const modulesApi = {
generateVideo: (data: any) => api.post('/modules/video/generate', data),
upscaleVideo: (data: any) => api.post('/modules/video/upscale', data),
extractFrame: (data: { asset_id: string; timestamp: number }) => api.post('/modules/video/extract-frame', data),
processSubtitles: (data: any) => api.post('/modules/video/subtitles', data),
processSubtitles: (data: any, onUploadProgress?: (progressEvent: any) => void) =>
api.post('/modules/video/subtitles', data, {
onUploadProgress,
headers: { 'Content-Type': 'multipart/form-data' }
}),
// Audio
voiceToText: (data: any) => api.post('/modules/audio/voice-to-text', data),
@ -78,6 +82,7 @@ export const modulesApi = {
// Text
generateAltText: (data: any) => api.post('/modules/text/alt-text', data),
enhancePrompt: (data: any) => api.post('/modules/text/enhance-prompt', data),
getCineOptions: () => api.get('/modules/text/cine-options'),
// Mermaid
generateMermaid: (data: any) => api.post('/modules/text/mermaid/generate', data),