forge/backend/app/api/v1/modules.py
DJP 4b096f45cb Fix auth, database issues, add provider API endpoint
- Fixed bcrypt password hashing compatibility
- Fixed cookie-based authentication persistence
- Added withCredentials for cookie support
- Fixed Nano Banana logger error
- Fixed Imagen to use REST API instead of Vertex AI
- Fixed Topaz hydration errors
- Added Flux API key
- Fixed database corruption from disk space
- Added thumbnail_path column
- Added provider capabilities API endpoint
- Added voice library save/load functionality
- Enhanced text-to-speech with more voice settings

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2025-12-09 20:50:12 -05:00

834 lines
25 KiB
Python

"""Module API Routes - All AI processing endpoints"""
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, BackgroundTasks, Body
from sqlalchemy.orm import Session
from typing import Optional, List
from uuid import UUID
from pydantic import BaseModel
import json
from app.database import get_db
from app.models.job import Job
from app.models.user import User
from app.services import (
image_generator,
image_upscaler,
background_remover,
video_generator,
video_upscaler,
subtitle_processor,
voice_to_text,
text_to_speech,
alt_text_generator,
prompt_studio,
markdown_tools,
sound_effects
)
router = APIRouter()
# ============== REQUEST MODELS ==============
class ImageGenerateRequest(BaseModel):
prompt: str
provider: str = "openai"
model: Optional[str] = None
width: int = 1024
height: int = 1024
style: Optional[str] = None
quality: Optional[str] = None
negative_prompt: Optional[str] = None
aspect_ratio: Optional[str] = None
style_preset: Optional[str] = None
# For iterative editing (Nano Banana/Gemini)
reference_asset_id: Optional[str] = None
class VideoGenerateRequest(BaseModel):
prompt: str
provider: str = "runway"
model: Optional[str] = None
duration: int = 5
aspect_ratio: str = "16:9"
resolution: str = "1280x768"
# Runway specific
camera_control: Optional[dict] = None
frame_position: str = "first"
# Veo specific
first_frame_asset_id: Optional[str] = None
last_frame_asset_id: Optional[str] = None
reference_asset_ids: Optional[List[str]] = None
# Input image
input_asset_id: Optional[str] = None
class TextToSpeechRequest(BaseModel):
text: str
voice_id: str = "21m00Tcm4TlvDq8ikWAM"
model_id: str = "eleven_multilingual_v2"
stability: float = 0.5
similarity_boost: float = 0.5
style: float = 0.0
use_speaker_boost: bool = True
speed: float = 1.0
output_format: str = "mp3_44100_128"
class SoundEffectRequest(BaseModel):
text: str
duration_seconds: Optional[float] = None
prompt_influence: float = 0.3
loop: bool = False
output_format: str = "mp3_44100_128"
class PromptEnhanceRequest(BaseModel):
prompt: str
style: str = "cinematic"
provider: str = "openai"
include_negative: bool = True
include_technical: bool = True
language: str = "en"
class MermaidRenderRequest(BaseModel):
code: str
output_format: str = "svg"
theme: str = "default"
background: str = "transparent"
class MermaidGenerateRequest(BaseModel):
description: str
diagram_type: str = "flowchart"
style: str = "detailed"
render: bool = True
class MarkdownConvertRequest(BaseModel):
content: str
output_format: str = "html"
theme: str = "github"
class MarkdownGenerateRequest(BaseModel):
topic: str
content_type: str = "article"
length: str = "medium"
include_toc: bool = True
# ============== IMAGE MODULES ==============
def job_response(job: Job) -> dict:
"""Format job for API response"""
return {
"id": str(job.id),
"module": job.module,
"action": job.action,
"status": job.status,
"progress": job.progress or 0,
"input_data": job.input_data,
"output_data": job.output_data,
"input_asset_ids": [str(a) for a in job.input_asset_ids] if job.input_asset_ids else None,
"output_asset_ids": [str(a) for a in job.output_asset_ids] if job.output_asset_ids else None,
"error_message": job.error_message,
"api_provider": job.api_provider,
"api_model": job.api_model,
"created_at": job.created_at.isoformat() if job.created_at else None,
"completed_at": job.completed_at.isoformat() if job.completed_at else None,
}
@router.post("/image/generate")
async def generate_image(
request: ImageGenerateRequest,
background_tasks: BackgroundTasks,
db: Session = Depends(get_db)
):
"""Generate an image using various AI providers
Providers: openai, dalle3, stable-diffusion, leonardo, ideogram, flux, gemini, nano-banana
Supports iterative editing with reference_asset_id for nano-banana/gemini providers
"""
from app.models.asset import Asset
import base64
user = db.query(User).filter(User.email == "test@forge.ai").first()
input_data = request.model_dump(exclude_none=True)
# If reference_asset_id is provided, load the image and convert to base64
if request.reference_asset_id:
asset = db.query(Asset).filter(Asset.id == request.reference_asset_id).first()
if asset and asset.file_path:
import os
if os.path.exists(asset.file_path):
with open(asset.file_path, "rb") as f:
image_data = f.read()
# Convert to base64 for the generator
input_data["reference_image"] = base64.b64encode(image_data).decode("utf-8")
# Remove reference_asset_id from input_data (we've converted it)
del input_data["reference_asset_id"]
job = Job(
user_id=user.id if user else None,
module="image_generator",
action="generate",
input_data=input_data,
status="queued",
progress=0
)
db.add(job)
db.commit()
db.refresh(job)
background_tasks.add_task(image_generator.generate, str(job.id))
return job_response(job)
@router.post("/image/upscale")
async def upscale_image(
file: UploadFile = File(...),
scale: int = Form(2),
model: str = Form("auto"),
face_enhancement: bool = Form(False),
noise_reduction: Optional[int] = Form(None),
sharpening: Optional[int] = Form(None),
compression_recovery: Optional[int] = Form(None),
detail_enhancement: Optional[int] = Form(None),
preserve_grain: bool = Form(False),
output_format: str = Form("png"),
background_tasks: BackgroundTasks = None,
db: Session = Depends(get_db)
):
"""Upscale an image using Topaz Labs
Models: proteus, artemis, gaia, iris, nyx, rhea, theia, auto
"""
user = db.query(User).filter(User.email == "test@forge.ai").first()
from app.api.v1.assets import upload_asset
asset = await upload_asset(file=file, source_module="image_upscaler", db=db)
job = Job(
user_id=user.id if user else None,
module="image_upscaler",
action="upscale",
input_data={
"scale": scale,
"model": model,
"face_enhancement": face_enhancement,
"noise_reduction": noise_reduction,
"sharpening": sharpening,
"compression_recovery": compression_recovery,
"detail_enhancement": detail_enhancement,
"preserve_grain": preserve_grain,
"output_format": output_format
},
input_asset_ids=[asset.id],
status="queued"
)
db.add(job)
db.commit()
db.refresh(job)
if background_tasks:
background_tasks.add_task(image_upscaler.upscale, str(job.id))
return job_response(job)
@router.post("/image/remove-background")
async def remove_background(
file: UploadFile = File(...),
output_format: str = Form("png"),
background_tasks: BackgroundTasks = None,
db: Session = Depends(get_db)
):
"""Remove background from image"""
user = db.query(User).filter(User.email == "test@forge.ai").first()
from app.api.v1.assets import upload_asset
asset = await upload_asset(file=file, source_module="background_remover", db=db)
job = Job(
user_id=user.id if user else None,
module="background_remover",
action="remove",
input_data={"output_format": output_format},
input_asset_ids=[asset.id],
status="queued"
)
db.add(job)
db.commit()
db.refresh(job)
if background_tasks:
background_tasks.add_task(background_remover.remove_background, str(job.id))
return job_response(job)
# ============== VIDEO MODULES ==============
@router.post("/video/generate")
async def generate_video(
request: VideoGenerateRequest,
background_tasks: BackgroundTasks,
db: Session = Depends(get_db)
):
"""Generate video using Runway or Google Veo
Runway: gen3_alpha, gen3_alpha_turbo, gen4
Veo: veo-3.1-generate-preview, veo-3.1-fast
"""
user = db.query(User).filter(User.email == "test@forge.ai").first()
input_asset_ids = []
if request.input_asset_id:
input_asset_ids.append(UUID(request.input_asset_id))
job = Job(
user_id=user.id if user else None,
module="video_generator",
action="generate",
input_data=request.model_dump(exclude_none=True),
input_asset_ids=input_asset_ids if input_asset_ids else None,
status="queued"
)
db.add(job)
db.commit()
db.refresh(job)
background_tasks.add_task(video_generator.generate, str(job.id))
return job_response(job)
@router.post("/video/upscale")
async def upscale_video(
file: UploadFile = File(...),
scale: int = Form(2),
model: str = Form("auto"),
frame_interpolation: int = Form(1),
background_tasks: BackgroundTasks = None,
db: Session = Depends(get_db)
):
"""Upscale video using Topaz Labs"""
user = db.query(User).filter(User.email == "test@forge.ai").first()
from app.api.v1.assets import upload_asset
asset = await upload_asset(file=file, source_module="video_upscaler", db=db)
job = Job(
user_id=user.id if user else None,
module="video_upscaler",
action="upscale",
input_data={
"scale": scale,
"model": model,
"frame_interpolation": frame_interpolation
},
input_asset_ids=[asset.id],
status="queued"
)
db.add(job)
db.commit()
db.refresh(job)
if background_tasks:
background_tasks.add_task(video_upscaler.upscale, str(job.id))
return job_response(job)
@router.get("/video/subtitles/config")
async def get_subtitle_config():
"""Get available subtitle configuration options"""
return subtitle_processor.get_subtitle_config()
@router.post("/video/subtitles")
async def generate_subtitles(
file: UploadFile = File(...),
source_language: str = Form("auto"),
target_language: Optional[str] = Form(None),
burn_subtitles: bool = Form(False),
whisper_model: str = Form("base"),
output_format: str = Form("srt"),
# Styling options
font: str = Form("Arial"),
font_size: int = Form(24),
text_color: str = Form("white"),
outline_color: str = Form("black"),
outline_width: float = Form(2.0),
background_color: Optional[str] = Form(None),
background_opacity: float = Form(0.0),
position: str = Form("bottom"),
alignment: str = Form("center"),
margin_v: int = Form(30),
margin_h: int = Form(20),
shadow: int = Form(0),
bold: bool = Form(False),
italic: bool = Form(False),
font_preset: Optional[str] = Form(None),
word_timestamps: bool = Form(False),
background_tasks: BackgroundTasks = None,
db: Session = Depends(get_db)
):
"""
Generate subtitles for video using Whisper + DeepL
Parameters:
- source_language: Source language code or "auto" for detection
- target_language: Target language code for translation (optional)
- burn_subtitles: Whether to burn subtitles into video
- whisper_model: Whisper model (tiny/base/small/medium/large/large-v2/large-v3)
- output_format: Output format (srt/vtt/ass)
Styling (for burning):
- font: Font family name
- font_size: Font size in points
- text_color: Primary text color
- outline_color: Text outline color
- outline_width: Outline thickness (0-5)
- background_color: Background box color
- background_opacity: Background opacity (0-1)
- position: Vertical position (bottom/top/center)
- alignment: Horizontal alignment (left/center/right)
- margin_v: Vertical margin from edge
- margin_h: Horizontal margin
- shadow: Shadow depth (0-4)
- bold: Use bold text
- italic: Use italic text
- font_preset: Predefined style preset (default/cinematic/documentary/news/social_media/minimal/bold)
- word_timestamps: Include word-level timestamps
"""
user = db.query(User).filter(User.email == "test@forge.ai").first()
from app.api.v1.assets import upload_asset
asset = await upload_asset(file=file, source_module="subtitle_processor", db=db)
job = Job(
user_id=user.id if user else None,
module="subtitle_processor",
action="generate",
input_data={
"source_language": source_language,
"target_language": target_language,
"burn_subtitles": burn_subtitles,
"whisper_model": whisper_model,
"output_format": output_format,
"font": font,
"font_size": font_size,
"text_color": text_color,
"outline_color": outline_color,
"outline_width": outline_width,
"background_color": background_color,
"background_opacity": background_opacity,
"position": position,
"alignment": alignment,
"margin_v": margin_v,
"margin_h": margin_h,
"shadow": shadow,
"bold": bold,
"italic": italic,
"font_preset": font_preset,
"word_timestamps": word_timestamps
},
input_asset_ids=[asset.id],
status="queued"
)
db.add(job)
db.commit()
db.refresh(job)
if background_tasks:
background_tasks.add_task(subtitle_processor.process, str(job.id))
return job_response(job)
# ============== AUDIO MODULES ==============
@router.post("/audio/voice-to-text")
async def transcribe_audio(
file: UploadFile = File(...),
output_format: str = Form("txt"),
translate: bool = Form(False),
target_language: str = Form("EN-US"),
background_tasks: BackgroundTasks = None,
db: Session = Depends(get_db)
):
"""Transcribe audio to text using Whisper"""
user = db.query(User).filter(User.email == "test@forge.ai").first()
from app.api.v1.assets import upload_asset
asset = await upload_asset(file=file, source_module="voice_to_text", db=db)
job = Job(
user_id=user.id if user else None,
module="voice_to_text",
action="transcribe",
input_data={
"output_format": output_format,
"translate": translate,
"target_language": target_language
},
input_asset_ids=[asset.id],
status="queued"
)
db.add(job)
db.commit()
db.refresh(job)
if background_tasks:
background_tasks.add_task(voice_to_text.transcribe, str(job.id))
return job_response(job)
@router.post("/audio/text-to-speech")
async def synthesize_speech(
request: TextToSpeechRequest,
background_tasks: BackgroundTasks,
db: Session = Depends(get_db)
):
"""Convert text to speech using ElevenLabs
Models: eleven_multilingual_v2, eleven_flash_v2_5, eleven_turbo_v2_5, eleven_v3
"""
user = db.query(User).filter(User.email == "test@forge.ai").first()
job = Job(
user_id=user.id if user else None,
module="text_to_speech",
action="synthesize",
input_data=request.model_dump(),
status="queued"
)
db.add(job)
db.commit()
db.refresh(job)
background_tasks.add_task(text_to_speech.synthesize, str(job.id))
return job_response(job)
@router.post("/audio/speech-to-speech")
async def convert_voice(
file: UploadFile = File(...),
voice_id: str = Form(...),
background_tasks: BackgroundTasks = None,
db: Session = Depends(get_db)
):
"""Convert voice to another voice using ElevenLabs"""
user = db.query(User).filter(User.email == "test@forge.ai").first()
from app.api.v1.assets import upload_asset
asset = await upload_asset(file=file, source_module="speech_to_speech", db=db)
job = Job(
user_id=user.id if user else None,
module="speech_to_speech",
action="convert",
input_data={"voice_id": voice_id},
input_asset_ids=[asset.id],
status="queued"
)
db.add(job)
db.commit()
db.refresh(job)
if background_tasks:
background_tasks.add_task(text_to_speech.speech_to_speech, str(job.id))
return job_response(job)
@router.post("/audio/sound-effects")
async def generate_sound_effect(
request: SoundEffectRequest,
background_tasks: BackgroundTasks,
db: Session = Depends(get_db)
):
"""Generate sound effects from text description using ElevenLabs
Describe the sound you want - explosions, footsteps, ambient sounds, etc.
Max duration: 22 seconds
"""
user = db.query(User).filter(User.email == "test@forge.ai").first()
job = Job(
user_id=user.id if user else None,
module="sound_effects",
action="generate",
input_data=request.model_dump(),
status="queued"
)
db.add(job)
db.commit()
db.refresh(job)
background_tasks.add_task(sound_effects.generate_sound_effect_job, str(job.id))
return job_response(job)
@router.get("/audio/sound-effects/formats")
async def get_sound_effect_formats():
"""Get available output formats for sound effects"""
generator = sound_effects.get_sound_effects_generator()
return await generator.get_available_formats()
# ============== TEXT MODULES ==============
@router.post("/text/alt-text")
async def generate_alt_text(
file: UploadFile = File(...),
background_tasks: BackgroundTasks = None,
db: Session = Depends(get_db)
):
"""Generate alt text for image using GPT-4 Vision"""
user = db.query(User).filter(User.email == "test@forge.ai").first()
from app.api.v1.assets import upload_asset
asset = await upload_asset(file=file, source_module="alt_text_generator", db=db)
job = Job(
user_id=user.id if user else None,
module="alt_text_generator",
action="generate",
input_data={},
input_asset_ids=[asset.id],
status="queued"
)
db.add(job)
db.commit()
db.refresh(job)
if background_tasks:
background_tasks.add_task(alt_text_generator.generate, str(job.id))
return job_response(job)
@router.get("/image/providers")
def get_image_providers():
"""Get all image providers with their capabilities"""
from app.services.image_generator import IMAGE_PROVIDERS, STABILITY_STYLE_PRESETS
# Add Stability style presets to the config
providers = IMAGE_PROVIDERS.copy()
if "stable-diffusion" in providers:
providers["stable-diffusion"]["style_presets"] = STABILITY_STYLE_PRESETS
return providers
@router.post("/text/enhance-prompt")
async def enhance_prompt(
request: PromptEnhanceRequest,
db: Session = Depends(get_db)
):
"""Enhance a prompt using AI (Gemini/OpenAI)
Styles: cinematic, photographic, artistic, product, fantasy, minimal,
vintage, futuristic, anime, portrait, landscape, abstract,
fashion, architecture, food
Providers: openai, gpt-image-1, stable-diffusion, midjourney, flux, leonardo
"""
result = await prompt_studio.enhance(
prompt=request.prompt,
style=request.style,
provider=request.provider,
include_negative=request.include_negative,
include_technical=request.include_technical,
language=request.language
)
return result
@router.get("/text/prompt-styles")
async def get_prompt_styles():
"""Get available prompt enhancement styles"""
return prompt_studio.get_available_styles()
# ============== MARKDOWN & MERMAID MODULES ==============
@router.post("/text/mermaid/render")
async def render_mermaid_diagram(request: MermaidRenderRequest):
"""Render Mermaid diagram code to SVG/PNG
Themes: default, dark, forest, neutral
Formats: svg, png
"""
result = await markdown_tools.render_mermaid(
code=request.code,
output_format=request.output_format,
theme=request.theme,
background=request.background
)
return result
@router.post("/text/mermaid/generate")
async def generate_mermaid_diagram(request: MermaidGenerateRequest):
"""Generate Mermaid diagram from natural language description
Diagram types: flowchart, sequence, class, state, er, journey,
gantt, pie, mindmap, timeline, gitgraph
Styles: simple, detailed, complex
"""
result = await markdown_tools.generate_mermaid_with_ai(
description=request.description,
diagram_type=request.diagram_type,
style=request.style
)
# Optionally render the diagram
if request.render and result.get("success") and result.get("code"):
render_result = await markdown_tools.render_mermaid(result["code"])
result["rendered"] = render_result
return result
@router.get("/text/mermaid/templates")
async def get_mermaid_templates():
"""Get available Mermaid diagram templates"""
return markdown_tools.get_mermaid_templates()
@router.get("/text/mermaid/templates/{diagram_type}")
async def get_mermaid_template(diagram_type: str):
"""Get a specific Mermaid template"""
template = markdown_tools.get_mermaid_template(diagram_type)
if not template:
raise HTTPException(status_code=404, detail=f"Template not found: {diagram_type}")
return template
@router.post("/text/markdown/convert")
async def convert_markdown(request: MarkdownConvertRequest):
"""Convert Markdown to HTML or plain text
Output formats: html, plain
Themes: github (for HTML)
"""
result = await markdown_tools.convert_markdown(
content=request.content,
output_format=request.output_format,
theme=request.theme
)
return result
@router.post("/text/markdown/generate")
async def generate_markdown_content(request: MarkdownGenerateRequest):
"""Generate Markdown content using AI
Content types: article, documentation, readme, tutorial, report
Length: short, medium, long
"""
result = await markdown_tools.generate_markdown_with_ai(
topic=request.topic,
content_type=request.content_type,
length=request.length,
include_toc=request.include_toc
)
return result
# ============== UTILITY ENDPOINTS ==============
@router.get("/voices")
async def get_elevenlabs_voices():
"""Get available ElevenLabs voices"""
voices = await text_to_speech.get_voices()
return voices
@router.get("/models/{provider}")
async def get_provider_models(provider: str):
"""Get available models for a provider"""
models = {
# Image providers
"openai": ["gpt-image-1", "dall-e-3", "dall-e-2"],
"stable-diffusion": ["sd3-large", "sd3-medium", "sdxl-1.0", "stable-cascade"],
"leonardo": ["phoenix-1", "kino-xl", "anime-xl"],
"ideogram": ["V_2", "V_2_TURBO"],
"flux": ["flux-pro-1.1", "flux-dev", "flux-schnell"],
"gemini": ["gemini-2.0-flash-exp"],
# Video providers
"runway": ["gen3_alpha", "gen3_alpha_turbo", "gen4"],
"veo": [
"veo-3.1-generate-preview",
"veo-3.1-fast-generate-preview",
"veo-3.0-generate-001",
"veo-3.0-fast-generate-001",
"veo-2.0-generate-001"
],
# Upscaling
"topaz-image": ["proteus", "artemis", "gaia", "iris", "nyx", "rhea", "theia", "auto"],
"topaz-video": ["auto", "proteus", "artemis"],
# Audio
"elevenlabs": [
"eleven_multilingual_v2",
"eleven_flash_v2_5",
"eleven_turbo_v2_5",
"eleven_v3",
"eleven_monolingual_v1"
]
}
return models.get(provider, [])
@router.get("/models")
async def get_all_models():
"""Get all available models organized by category"""
return {
"image": {
"openai": {
"models": ["gpt-image-1", "dall-e-3"],
"default": "gpt-image-1",
"features": ["quality", "background", "transparent"]
},
"stable-diffusion": {
"models": ["sd3-large", "sd3-medium", "sdxl-1.0"],
"default": "sd3-large",
"features": ["negative_prompt", "style_preset", "img2img"]
},
"flux": {
"models": ["flux-pro-1.1", "flux-dev", "flux-schnell"],
"default": "flux-pro-1.1",
"features": ["img2img"]
}
},
"video": {
"runway": {
"models": ["gen3_alpha", "gen3_alpha_turbo", "gen4"],
"default": "gen3_alpha_turbo",
"features": ["camera_control", "image_to_video"]
},
"veo": {
"models": ["veo-3.1-generate-preview", "veo-3.1-fast-generate-preview", "veo-3.0-generate-001"],
"default": "veo-3.1-generate-preview",
"features": ["audio", "reference_images", "video_extension", "frame_interpolation"]
}
},
"audio": {
"elevenlabs": {
"models": ["eleven_multilingual_v2", "eleven_flash_v2_5", "eleven_turbo_v2_5", "eleven_v3"],
"default": "eleven_multilingual_v2",
"features": ["32_languages", "voice_cloning", "voice_settings"]
}
}
}