1013 lines
32 KiB
Python
1013 lines
32 KiB
Python
"""Module API Routes - All AI processing endpoints"""
|
|
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, BackgroundTasks, Body
|
|
from sqlalchemy.orm import Session
|
|
from typing import Optional, List, Union, Any
|
|
from uuid import UUID
|
|
from pydantic import BaseModel
|
|
import json
|
|
|
|
from app.database import get_db
|
|
from app.models.job import Job
|
|
from app.models.user import User
|
|
from app.services import (
|
|
image_generator,
|
|
image_upscaler,
|
|
background_remover,
|
|
video_generator,
|
|
video_upscaler,
|
|
subtitle_processor,
|
|
voice_to_text,
|
|
text_to_speech,
|
|
alt_text_generator,
|
|
prompt_studio,
|
|
markdown_tools,
|
|
sound_effects
|
|
)
|
|
from app.workers.tasks import process_video_generation, process_image_upscaling
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
# ============== REQUEST MODELS ==============
|
|
|
|
class ImageGenerateRequest(BaseModel):
|
|
prompt: str
|
|
provider: str = "openai"
|
|
model: Optional[str] = None
|
|
|
|
# Generic provider_options accepts any key-value pairs
|
|
provider_options: Optional[dict] = None
|
|
|
|
# Keep backward compatibility fields
|
|
width: Optional[int] = None
|
|
height: Optional[int] = None
|
|
style: Optional[str] = None
|
|
quality: Optional[str] = None
|
|
negative_prompt: Optional[str] = None
|
|
aspect_ratio: Optional[str] = None
|
|
style_preset: Optional[str] = None
|
|
reference_asset_id: Optional[str] = None
|
|
|
|
def get_merged_options(self) -> dict:
|
|
"""Merge backward-compatible fields with provider_options"""
|
|
options = self.provider_options.copy() if self.provider_options else {}
|
|
|
|
# Add backward-compatible fields if not in provider_options
|
|
if self.width and 'width' not in options:
|
|
options['width'] = self.width
|
|
if self.height and 'height' not in options:
|
|
options['height'] = self.height
|
|
if self.style and 'style' not in options:
|
|
options['style'] = self.style
|
|
if self.quality and 'quality' not in options:
|
|
options['quality'] = self.quality
|
|
if self.negative_prompt and 'negative_prompt' not in options:
|
|
options['negative_prompt'] = self.negative_prompt
|
|
if self.aspect_ratio and 'aspect_ratio' not in options:
|
|
options['aspect_ratio'] = self.aspect_ratio
|
|
if self.style_preset and 'style_preset' not in options:
|
|
options['style_preset'] = self.style_preset
|
|
if self.reference_asset_id and 'reference_asset_id' not in options:
|
|
options['reference_asset_id'] = self.reference_asset_id
|
|
|
|
return options
|
|
|
|
|
|
class VideoGenerateRequest(BaseModel):
|
|
prompt: Optional[str] = None
|
|
provider: str = "runway"
|
|
model: Optional[str] = None
|
|
|
|
# Generic provider_options
|
|
provider_options: Optional[dict] = None
|
|
|
|
# Backward compatibility fields
|
|
duration: Optional[Union[int, str]] = None
|
|
aspect_ratio: Optional[str] = None
|
|
resolution: Optional[str] = None
|
|
camera_control: Optional[dict] = None
|
|
frame_position: Optional[str] = None
|
|
first_frame_asset_id: Optional[str] = None
|
|
last_frame_asset_id: Optional[str] = None
|
|
reference_asset_ids: Optional[List[str]] = None
|
|
input_asset_id: Optional[str] = None
|
|
|
|
def get_merged_options(self) -> dict:
|
|
"""Merge backward-compatible fields with provider_options"""
|
|
options = self.provider_options.copy() if self.provider_options else {}
|
|
|
|
# Add backward-compatible fields if not in provider_options
|
|
if self.duration and 'duration' not in options:
|
|
options['duration'] = self.duration
|
|
if self.aspect_ratio and 'aspect_ratio' not in options:
|
|
options['aspect_ratio'] = self.aspect_ratio
|
|
if self.resolution and 'resolution' not in options:
|
|
options['resolution'] = self.resolution
|
|
if self.camera_control and 'camera_control' not in options:
|
|
options['camera_control'] = self.camera_control
|
|
if self.frame_position and 'frame_position' not in options:
|
|
options['frame_position'] = self.frame_position
|
|
if self.first_frame_asset_id and 'first_frame_asset_id' not in options:
|
|
options['first_frame_asset_id'] = self.first_frame_asset_id
|
|
if self.last_frame_asset_id and 'last_frame_asset_id' not in options:
|
|
options['last_frame_asset_id'] = self.last_frame_asset_id
|
|
if self.reference_asset_ids and 'reference_asset_ids' not in options:
|
|
options['reference_asset_ids'] = self.reference_asset_ids
|
|
if self.input_asset_id and 'input_asset_id' not in options:
|
|
options['input_asset_id'] = self.input_asset_id
|
|
|
|
return options
|
|
|
|
|
|
class TextToSpeechRequest(BaseModel):
|
|
text: str
|
|
voice_id: str = "21m00Tcm4TlvDq8ikWAM"
|
|
model_id: str = "eleven_multilingual_v2"
|
|
stability: float = 0.5
|
|
similarity_boost: float = 0.5
|
|
style: float = 0.0
|
|
use_speaker_boost: bool = True
|
|
speed: float = 1.0
|
|
output_format: str = "mp3_44100_128"
|
|
|
|
|
|
class SoundEffectRequest(BaseModel):
|
|
text: str
|
|
duration_seconds: Optional[float] = None
|
|
prompt_influence: float = 0.3
|
|
|
|
|
|
class ImageUpscaleRequest(BaseModel):
|
|
asset_id: str
|
|
scale: int = 2
|
|
model: str = "Standard V2"
|
|
output_format: str = "png"
|
|
crop_to_fill: bool = False
|
|
|
|
# Face enhancement
|
|
face_enhancement: bool = False
|
|
face_enhancement_creativity: Optional[float] = None
|
|
face_enhancement_strength: Optional[float] = None
|
|
|
|
# Frontend matches
|
|
denoise_strength: Optional[float] = None
|
|
sharpen: Optional[float] = None
|
|
|
|
# Legacy / Other params
|
|
detail: Optional[float] = None
|
|
focus_boost: Optional[float] = None
|
|
strength: Optional[float] = None
|
|
subject_detection: Optional[str] = None
|
|
|
|
|
|
class VideoUpscaleRequest(BaseModel):
|
|
asset_id: str
|
|
scale: int = 2
|
|
model: str = "auto"
|
|
frame_interpolation: int = 1
|
|
# New Topaz parameters
|
|
fps: Optional[float] = None
|
|
sharpening: Optional[int] = None # 0-100
|
|
recover_detail: Optional[int] = None # 0-100
|
|
add_noise: Optional[int] = None # 0-100
|
|
video_type: Optional[str] = "Progressive" # Progressive, Interlaced, Interlaced Progressive
|
|
video_type: Optional[str] = "Progressive" # Progressive, Interlaced, Interlaced Progressive
|
|
face_enhancement: bool = False
|
|
|
|
|
|
class FrameExtractionRequest(BaseModel):
|
|
asset_id: str
|
|
timestamp: float
|
|
|
|
|
|
class RemoveBackgroundRequest(BaseModel):
|
|
asset_id: str
|
|
output_format: str = "png"
|
|
refine_mask: bool = True
|
|
|
|
|
|
class VoiceToTextRequest(BaseModel):
|
|
asset_id: str
|
|
output_format: str = "txt"
|
|
translate: bool = False
|
|
target_language: str = "EN-US"
|
|
|
|
|
|
class AltTextRequest(BaseModel):
|
|
asset_id: str
|
|
|
|
|
|
class PromptEnhanceRequest(BaseModel):
|
|
prompt: str
|
|
style: str = "cinematic"
|
|
provider: str = "openai"
|
|
include_negative: bool = True
|
|
include_technical: bool = True
|
|
language: str = "en"
|
|
|
|
|
|
class MermaidRenderRequest(BaseModel):
|
|
code: str
|
|
output_format: str = "svg"
|
|
theme: str = "default"
|
|
background: str = "transparent"
|
|
|
|
|
|
class MermaidGenerateRequest(BaseModel):
|
|
description: str
|
|
diagram_type: str = "flowchart"
|
|
style: str = "detailed"
|
|
render: bool = True
|
|
|
|
|
|
class MarkdownConvertRequest(BaseModel):
|
|
content: str
|
|
output_format: str = "html"
|
|
theme: str = "github"
|
|
|
|
|
|
class MarkdownGenerateRequest(BaseModel):
|
|
topic: str
|
|
content_type: str = "article"
|
|
length: str = "medium"
|
|
include_toc: bool = True
|
|
|
|
|
|
# ============== IMAGE MODULES ==============
|
|
|
|
def job_response(job: Job) -> dict:
|
|
"""Format job for API response"""
|
|
return {
|
|
"id": str(job.id),
|
|
"module": job.module,
|
|
"action": job.action,
|
|
"status": job.status,
|
|
"progress": job.progress or 0,
|
|
"input_data": job.input_data,
|
|
"output_data": job.output_data,
|
|
"input_asset_ids": [str(a) for a in job.input_asset_ids] if job.input_asset_ids else None,
|
|
"output_asset_ids": [str(a) for a in job.output_asset_ids] if job.output_asset_ids else None,
|
|
"error_message": job.error_message,
|
|
"api_provider": job.api_provider,
|
|
"api_model": job.api_model,
|
|
"created_at": job.created_at.isoformat() if job.created_at else None,
|
|
"completed_at": job.completed_at.isoformat() if job.completed_at else None,
|
|
}
|
|
|
|
|
|
@router.post("/image/generate")
|
|
async def generate_image(
|
|
request: ImageGenerateRequest,
|
|
background_tasks: BackgroundTasks,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Generate an image using various AI providers
|
|
|
|
Providers: openai, dalle3, stable-diffusion, leonardo, ideogram, flux, gemini, nano-banana
|
|
Supports iterative editing with reference_asset_id for nano-banana/gemini providers
|
|
"""
|
|
from app.models.asset import Asset
|
|
import base64
|
|
|
|
user = db.query(User).filter(User.email == "test@forge.ai").first()
|
|
|
|
input_data = request.model_dump(exclude_none=True)
|
|
|
|
# If reference_asset_id is provided, load the image and convert to base64
|
|
if request.reference_asset_id:
|
|
asset = db.query(Asset).filter(Asset.id == request.reference_asset_id).first()
|
|
if asset and asset.file_path:
|
|
import os
|
|
if os.path.exists(asset.file_path):
|
|
with open(asset.file_path, "rb") as f:
|
|
image_data = f.read()
|
|
# Convert to base64 for the generator
|
|
input_data["reference_image"] = base64.b64encode(image_data).decode("utf-8")
|
|
# Remove reference_asset_id from input_data (we've converted it)
|
|
del input_data["reference_asset_id"]
|
|
|
|
job = Job(
|
|
user_id=user.id if user else None,
|
|
module="image_generator",
|
|
action="generate",
|
|
input_data=input_data,
|
|
status="queued",
|
|
progress=0
|
|
)
|
|
db.add(job)
|
|
db.commit()
|
|
db.refresh(job)
|
|
|
|
background_tasks.add_task(image_generator.generate, str(job.id))
|
|
|
|
return job_response(job)
|
|
|
|
|
|
@router.post("/image/upscale")
|
|
async def upscale_image(
|
|
request: ImageUpscaleRequest,
|
|
background_tasks: BackgroundTasks,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Upscale an image using Topaz Labs
|
|
|
|
Models: proteus, artemis, gaia, iris, nyx, rhea, theia, auto
|
|
"""
|
|
user = db.query(User).filter(User.email == "test@forge.ai").first()
|
|
|
|
# Validate asset exists
|
|
from app.models.asset import Asset
|
|
|
|
asset = db.query(Asset).filter(Asset.id == UUID(request.asset_id)).first()
|
|
if not asset:
|
|
raise HTTPException(status_code=404, detail="Asset not found")
|
|
|
|
job = Job(
|
|
user_id=user.id if user else None,
|
|
module="image_upscaler",
|
|
action="upscale",
|
|
input_data={
|
|
"scale": request.scale,
|
|
"model": request.model,
|
|
"face_enhancement": request.face_enhancement,
|
|
# Use new fields mapped from frontend
|
|
"denoise": request.denoise_strength, # Map denoise_strength -> denoise for backend service
|
|
"sharpen": request.sharpen,
|
|
|
|
# Optional extra params
|
|
"face_enhancement_creativity": request.face_enhancement_creativity,
|
|
"face_enhancement_strength": request.face_enhancement_strength,
|
|
|
|
"output_format": request.output_format
|
|
},
|
|
input_asset_ids=[asset.id],
|
|
status="queued"
|
|
)
|
|
db.add(job)
|
|
db.commit()
|
|
db.refresh(job)
|
|
|
|
# Offload to Celery Worker (Redis) for scalability
|
|
process_image_upscaling.delay(str(job.id))
|
|
|
|
return job_response(job)
|
|
|
|
|
|
@router.post("/image/remove-background")
|
|
async def remove_background(
|
|
request: RemoveBackgroundRequest,
|
|
background_tasks: BackgroundTasks,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Remove background from image"""
|
|
user = db.query(User).filter(User.email == "test@forge.ai").first()
|
|
|
|
# Validate asset exists
|
|
from app.models.asset import Asset
|
|
|
|
asset = db.query(Asset).filter(Asset.id == UUID(request.asset_id)).first()
|
|
if not asset:
|
|
raise HTTPException(status_code=404, detail="Asset not found")
|
|
|
|
job = Job(
|
|
user_id=user.id if user else None,
|
|
module="background_remover",
|
|
action="remove",
|
|
input_data={
|
|
"output_format": request.output_format,
|
|
"refine_mask": request.refine_mask
|
|
},
|
|
input_asset_ids=[asset.id],
|
|
status="queued"
|
|
)
|
|
db.add(job)
|
|
db.commit()
|
|
db.refresh(job)
|
|
|
|
background_tasks.add_task(background_remover.remove_background, str(job.id))
|
|
|
|
return job_response(job)
|
|
|
|
|
|
# ============== VIDEO MODULES ==============
|
|
|
|
@router.post("/video/generate")
|
|
async def generate_video(
|
|
request: VideoGenerateRequest,
|
|
background_tasks: BackgroundTasks,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Generate video using Runway or Google Veo
|
|
|
|
Runway: gen3_alpha, gen3_alpha_turbo, gen4
|
|
Veo: veo-3.1-generate-preview, veo-3.1-fast
|
|
"""
|
|
user = db.query(User).filter(User.email == "test@forge.ai").first()
|
|
|
|
input_asset_ids = []
|
|
if request.input_asset_id:
|
|
input_asset_ids.append(UUID(request.input_asset_id))
|
|
|
|
job = Job(
|
|
user_id=user.id if user else None,
|
|
module="video_generator",
|
|
action="generate",
|
|
input_data=request.model_dump(exclude_none=True),
|
|
input_asset_ids=input_asset_ids if input_asset_ids else None,
|
|
status="queued"
|
|
)
|
|
db.add(job)
|
|
db.commit()
|
|
db.refresh(job)
|
|
|
|
# Offload to Celery Worker (Redis) for scalability
|
|
process_video_generation.delay(str(job.id))
|
|
|
|
return job_response(job)
|
|
|
|
|
|
@router.post("/video/upscale")
|
|
async def upscale_video(
|
|
request: VideoUpscaleRequest,
|
|
background_tasks: BackgroundTasks,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Upscale video using Topaz Labs"""
|
|
user = db.query(User).filter(User.email == "test@forge.ai").first()
|
|
|
|
# Validate asset exists
|
|
from app.models.asset import Asset
|
|
|
|
asset = db.query(Asset).filter(Asset.id == UUID(request.asset_id)).first()
|
|
if not asset:
|
|
raise HTTPException(status_code=404, detail="Asset not found")
|
|
|
|
job = Job(
|
|
user_id=user.id if user else None,
|
|
module="video_upscaler",
|
|
action="upscale",
|
|
input_data={
|
|
"scale": request.scale,
|
|
"model": request.model,
|
|
"frame_interpolation": request.frame_interpolation,
|
|
"fps": request.fps,
|
|
"sharpening": request.sharpening,
|
|
"recover_detail": request.recover_detail,
|
|
"add_noise": request.add_noise,
|
|
"video_type": request.video_type,
|
|
"face_enhancement": request.face_enhancement
|
|
},
|
|
input_asset_ids=[asset.id],
|
|
status="queued"
|
|
)
|
|
db.add(job)
|
|
db.commit()
|
|
db.refresh(job)
|
|
|
|
background_tasks.add_task(video_upscaler.upscale, str(job.id))
|
|
|
|
return job_response(job)
|
|
|
|
|
|
@router.post("/video/extract-frame")
|
|
async def extract_frame_endpoint(
|
|
request: FrameExtractionRequest,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Extract a single frame from a video"""
|
|
from app.services import frame_extractor
|
|
try:
|
|
# Since extract_frame is sync (using subprocess), we can run it directly or in threadpool
|
|
# For simplicity in FastAPI, just calling it is fine if it's fast (< few sec).
|
|
# Topaz upscaler uses async + background tasks because it takes minutes.
|
|
# fast-seeking ffmpeg extract is usually < 1s.
|
|
new_asset = frame_extractor.extract_frame(request.asset_id, request.timestamp)
|
|
return new_asset
|
|
except Exception as e:
|
|
import traceback
|
|
traceback.print_exc()
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
|
|
@router.get("/video/subtitles/config")
|
|
async def get_subtitle_config():
|
|
"""Get available subtitle configuration options"""
|
|
return subtitle_processor.get_subtitle_config()
|
|
|
|
|
|
@router.post("/video/subtitles")
|
|
async def generate_subtitles(
|
|
file: UploadFile = File(...),
|
|
source_language: str = Form("auto"),
|
|
target_language: Optional[str] = Form(None),
|
|
burn_subtitles: bool = Form(False),
|
|
whisper_model: str = Form("base"),
|
|
output_format: str = Form("srt"),
|
|
# Styling options
|
|
font: str = Form("Arial"),
|
|
font_size: int = Form(24),
|
|
text_color: str = Form("white"),
|
|
outline_color: str = Form("black"),
|
|
outline_width: float = Form(2.0),
|
|
background_color: Optional[str] = Form(None),
|
|
background_opacity: float = Form(0.0),
|
|
position: str = Form("bottom"),
|
|
alignment: str = Form("center"),
|
|
margin_v: int = Form(30),
|
|
margin_h: int = Form(20),
|
|
shadow: int = Form(0),
|
|
bold: bool = Form(False),
|
|
italic: bool = Form(False),
|
|
font_preset: Optional[str] = Form(None),
|
|
word_timestamps: bool = Form(False),
|
|
background_tasks: BackgroundTasks = None,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""
|
|
Generate subtitles for video using Whisper + DeepL
|
|
|
|
Parameters:
|
|
- source_language: Source language code or "auto" for detection
|
|
- target_language: Target language code for translation (optional)
|
|
- burn_subtitles: Whether to burn subtitles into video
|
|
- whisper_model: Whisper model (tiny/base/small/medium/large/large-v2/large-v3)
|
|
- output_format: Output format (srt/vtt/ass)
|
|
|
|
Styling (for burning):
|
|
- font: Font family name
|
|
- font_size: Font size in points
|
|
- text_color: Primary text color
|
|
- outline_color: Text outline color
|
|
- outline_width: Outline thickness (0-5)
|
|
- background_color: Background box color
|
|
- background_opacity: Background opacity (0-1)
|
|
- position: Vertical position (bottom/top/center)
|
|
- alignment: Horizontal alignment (left/center/right)
|
|
- margin_v: Vertical margin from edge
|
|
- margin_h: Horizontal margin
|
|
- shadow: Shadow depth (0-4)
|
|
- bold: Use bold text
|
|
- italic: Use italic text
|
|
- font_preset: Predefined style preset (default/cinematic/documentary/news/social_media/minimal/bold)
|
|
- word_timestamps: Include word-level timestamps
|
|
"""
|
|
user = db.query(User).filter(User.email == "test@forge.ai").first()
|
|
|
|
from app.api.v1.assets import upload_asset
|
|
asset = await upload_asset(file=file, source_module="subtitle_processor", db=db)
|
|
|
|
job = Job(
|
|
user_id=user.id if user else None,
|
|
module="subtitle_processor",
|
|
action="generate",
|
|
input_data={
|
|
"source_language": source_language,
|
|
"target_language": target_language,
|
|
"burn_subtitles": burn_subtitles,
|
|
"whisper_model": whisper_model,
|
|
"output_format": output_format,
|
|
"font": font,
|
|
"font_size": font_size,
|
|
"text_color": text_color,
|
|
"outline_color": outline_color,
|
|
"outline_width": outline_width,
|
|
"background_color": background_color,
|
|
"background_opacity": background_opacity,
|
|
"position": position,
|
|
"alignment": alignment,
|
|
"margin_v": margin_v,
|
|
"margin_h": margin_h,
|
|
"shadow": shadow,
|
|
"bold": bold,
|
|
"italic": italic,
|
|
"font_preset": font_preset,
|
|
"word_timestamps": word_timestamps
|
|
},
|
|
input_asset_ids=[asset.id],
|
|
status="queued"
|
|
)
|
|
db.add(job)
|
|
db.commit()
|
|
db.refresh(job)
|
|
|
|
if background_tasks:
|
|
background_tasks.add_task(subtitle_processor.process, str(job.id))
|
|
|
|
return job_response(job)
|
|
|
|
|
|
# ============== AUDIO MODULES ==============
|
|
|
|
@router.post("/audio/voice-to-text")
|
|
async def transcribe_audio(
|
|
request: VoiceToTextRequest,
|
|
background_tasks: BackgroundTasks,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Transcribe audio to text using Whisper"""
|
|
user = db.query(User).filter(User.email == "test@forge.ai").first()
|
|
|
|
# Validate asset exists
|
|
from app.models.asset import Asset
|
|
|
|
asset = db.query(Asset).filter(Asset.id == UUID(request.asset_id)).first()
|
|
if not asset:
|
|
raise HTTPException(status_code=404, detail="Asset not found")
|
|
|
|
job = Job(
|
|
user_id=user.id if user else None,
|
|
module="voice_to_text",
|
|
action="transcribe",
|
|
input_data={
|
|
"output_format": request.output_format,
|
|
"translate": request.translate,
|
|
"target_language": request.target_language
|
|
},
|
|
input_asset_ids=[asset.id],
|
|
status="queued"
|
|
)
|
|
db.add(job)
|
|
db.commit()
|
|
db.refresh(job)
|
|
|
|
background_tasks.add_task(voice_to_text.transcribe, str(job.id))
|
|
|
|
return job_response(job)
|
|
|
|
|
|
@router.post("/audio/text-to-speech")
|
|
async def synthesize_speech(
|
|
request: TextToSpeechRequest,
|
|
background_tasks: BackgroundTasks,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Convert text to speech using ElevenLabs
|
|
|
|
Models: eleven_multilingual_v2, eleven_flash_v2_5, eleven_turbo_v2_5, eleven_v3
|
|
"""
|
|
user = db.query(User).filter(User.email == "test@forge.ai").first()
|
|
|
|
job = Job(
|
|
user_id=user.id if user else None,
|
|
module="text_to_speech",
|
|
action="synthesize",
|
|
input_data=request.model_dump(),
|
|
status="queued"
|
|
)
|
|
db.add(job)
|
|
db.commit()
|
|
db.refresh(job)
|
|
|
|
background_tasks.add_task(text_to_speech.synthesize, str(job.id))
|
|
|
|
return job_response(job)
|
|
|
|
|
|
@router.post("/audio/speech-to-speech")
|
|
async def convert_voice(
|
|
file: UploadFile = File(...),
|
|
voice_id: str = Form(...),
|
|
background_tasks: BackgroundTasks = None,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Convert voice to another voice using ElevenLabs"""
|
|
user = db.query(User).filter(User.email == "test@forge.ai").first()
|
|
|
|
from app.api.v1.assets import upload_asset
|
|
asset = await upload_asset(file=file, source_module="speech_to_speech", db=db)
|
|
|
|
job = Job(
|
|
user_id=user.id if user else None,
|
|
module="speech_to_speech",
|
|
action="convert",
|
|
input_data={"voice_id": voice_id},
|
|
input_asset_ids=[asset.id],
|
|
status="queued"
|
|
)
|
|
db.add(job)
|
|
db.commit()
|
|
db.refresh(job)
|
|
|
|
if background_tasks:
|
|
background_tasks.add_task(text_to_speech.speech_to_speech, str(job.id))
|
|
|
|
return job_response(job)
|
|
|
|
|
|
@router.post("/audio/sound-effects")
|
|
async def generate_sound_effect(
|
|
request: SoundEffectRequest,
|
|
background_tasks: BackgroundTasks,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Generate sound effects from text description using ElevenLabs
|
|
|
|
Describe the sound you want - explosions, footsteps, ambient sounds, etc.
|
|
Max duration: 22 seconds
|
|
"""
|
|
user = db.query(User).filter(User.email == "test@forge.ai").first()
|
|
|
|
job = Job(
|
|
user_id=user.id if user else None,
|
|
module="sound_effects",
|
|
action="generate",
|
|
input_data=request.model_dump(),
|
|
status="queued"
|
|
)
|
|
db.add(job)
|
|
db.commit()
|
|
db.refresh(job)
|
|
|
|
background_tasks.add_task(sound_effects.generate_sound_effect_job, str(job.id))
|
|
|
|
return job_response(job)
|
|
|
|
|
|
@router.get("/audio/sound-effects/formats")
|
|
async def get_sound_effect_formats():
|
|
"""Get available output formats for sound effects"""
|
|
generator = sound_effects.get_sound_effects_generator()
|
|
return await generator.get_available_formats()
|
|
|
|
|
|
# ============== TEXT MODULES ==============
|
|
|
|
@router.post("/text/alt-text")
|
|
async def generate_alt_text(
|
|
request: AltTextRequest,
|
|
background_tasks: BackgroundTasks,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Generate alt text for image using GPT-4 Vision"""
|
|
user = db.query(User).filter(User.email == "test@forge.ai").first()
|
|
|
|
# Validate asset exists
|
|
from app.models.asset import Asset
|
|
asset = db.query(Asset).filter(Asset.id == UUID(request.asset_id)).first()
|
|
if not asset:
|
|
raise HTTPException(status_code=404, detail="Asset not found")
|
|
|
|
job = Job(
|
|
user_id=user.id if user else None,
|
|
module="alt_text_generator",
|
|
action="generate",
|
|
input_data={},
|
|
input_asset_ids=[asset.id],
|
|
status="queued"
|
|
)
|
|
db.add(job)
|
|
db.commit()
|
|
db.refresh(job)
|
|
|
|
background_tasks.add_task(alt_text_generator.generate, str(job.id))
|
|
|
|
return job_response(job)
|
|
|
|
|
|
@router.get("/image/providers")
|
|
def get_image_providers():
|
|
"""Get all image providers with their capabilities (legacy format)"""
|
|
from app.services.image_generator import IMAGE_PROVIDERS, STABILITY_STYLE_PRESETS
|
|
|
|
# Add Stability style presets to the config
|
|
providers = IMAGE_PROVIDERS.copy()
|
|
if "stable-diffusion" in providers:
|
|
providers["stable-diffusion"]["style_presets"] = STABILITY_STYLE_PRESETS
|
|
|
|
return providers
|
|
|
|
|
|
@router.get("/capabilities/image")
|
|
def get_image_provider_capabilities():
|
|
"""Get all image provider configurations with detailed controls"""
|
|
from app.providers.image_providers import get_image_provider_configs
|
|
return get_image_provider_configs()
|
|
|
|
|
|
@router.get("/capabilities/video")
|
|
def get_video_provider_capabilities():
|
|
"""Get all video provider configurations with detailed controls"""
|
|
from app.providers.video_providers import get_video_provider_configs
|
|
return get_video_provider_configs()
|
|
|
|
|
|
@router.get("/capabilities/image/{provider_id}")
|
|
def get_image_provider_config(provider_id: str):
|
|
"""Get specific image provider configuration"""
|
|
from app.providers.image_providers import IMAGE_PROVIDER_CONFIGS
|
|
if provider_id not in IMAGE_PROVIDER_CONFIGS:
|
|
raise HTTPException(status_code=404, detail="Provider not found")
|
|
return IMAGE_PROVIDER_CONFIGS[provider_id].model_dump(by_alias=True)
|
|
|
|
|
|
@router.get("/capabilities/video/{provider_id}")
|
|
def get_video_provider_config(provider_id: str):
|
|
"""Get specific video provider configuration"""
|
|
from app.providers.video_providers import VIDEO_PROVIDER_CONFIGS
|
|
if provider_id not in VIDEO_PROVIDER_CONFIGS:
|
|
raise HTTPException(status_code=404, detail="Provider not found")
|
|
return VIDEO_PROVIDER_CONFIGS[provider_id].model_dump(by_alias=True)
|
|
|
|
|
|
@router.post("/text/enhance-prompt")
|
|
async def enhance_prompt(
|
|
request: PromptEnhanceRequest,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Enhance a prompt using AI (Gemini/OpenAI)
|
|
|
|
Styles: cinematic, photographic, artistic, product, fantasy, minimal,
|
|
vintage, futuristic, anime, portrait, landscape, abstract,
|
|
fashion, architecture, food
|
|
|
|
Providers: openai, gpt-image-1, stable-diffusion, midjourney, flux, leonardo
|
|
"""
|
|
result = await prompt_studio.enhance(
|
|
prompt=request.prompt,
|
|
style=request.style,
|
|
provider=request.provider,
|
|
include_negative=request.include_negative,
|
|
include_technical=request.include_technical,
|
|
language=request.language
|
|
)
|
|
return result
|
|
|
|
|
|
@router.get("/text/prompt-styles")
|
|
async def get_prompt_styles():
|
|
"""Get available prompt enhancement styles"""
|
|
return prompt_studio.get_available_styles()
|
|
|
|
|
|
# ============== MARKDOWN & MERMAID MODULES ==============
|
|
|
|
@router.post("/text/mermaid/render")
|
|
async def render_mermaid_diagram(request: MermaidRenderRequest):
|
|
"""Render Mermaid diagram code to SVG/PNG
|
|
|
|
Themes: default, dark, forest, neutral
|
|
Formats: svg, png
|
|
"""
|
|
result = await markdown_tools.render_mermaid(
|
|
code=request.code,
|
|
output_format=request.output_format,
|
|
theme=request.theme,
|
|
background=request.background
|
|
)
|
|
return result
|
|
|
|
|
|
@router.post("/text/mermaid/generate")
|
|
async def generate_mermaid_diagram(request: MermaidGenerateRequest):
|
|
"""Generate Mermaid diagram from natural language description
|
|
|
|
Diagram types: flowchart, sequence, class, state, er, journey,
|
|
gantt, pie, mindmap, timeline, gitgraph
|
|
|
|
Styles: simple, detailed, complex
|
|
"""
|
|
result = await markdown_tools.generate_mermaid_with_ai(
|
|
description=request.description,
|
|
diagram_type=request.diagram_type,
|
|
style=request.style
|
|
)
|
|
|
|
# Optionally render the diagram
|
|
if request.render and result.get("success") and result.get("code"):
|
|
render_result = await markdown_tools.render_mermaid(result["code"])
|
|
result["rendered"] = render_result
|
|
|
|
return result
|
|
|
|
|
|
@router.get("/text/mermaid/templates")
|
|
async def get_mermaid_templates():
|
|
"""Get available Mermaid diagram templates"""
|
|
return markdown_tools.get_mermaid_templates()
|
|
|
|
|
|
@router.get("/text/mermaid/templates/{diagram_type}")
|
|
async def get_mermaid_template(diagram_type: str):
|
|
"""Get a specific Mermaid template"""
|
|
template = markdown_tools.get_mermaid_template(diagram_type)
|
|
if not template:
|
|
raise HTTPException(status_code=404, detail=f"Template not found: {diagram_type}")
|
|
return template
|
|
|
|
|
|
@router.post("/text/markdown/convert")
|
|
async def convert_markdown(request: MarkdownConvertRequest):
|
|
"""Convert Markdown to HTML or plain text
|
|
|
|
Output formats: html, plain
|
|
Themes: github (for HTML)
|
|
"""
|
|
result = await markdown_tools.convert_markdown(
|
|
content=request.content,
|
|
output_format=request.output_format,
|
|
theme=request.theme
|
|
)
|
|
return result
|
|
|
|
|
|
@router.post("/text/markdown/generate")
|
|
async def generate_markdown_content(request: MarkdownGenerateRequest):
|
|
"""Generate Markdown content using AI
|
|
|
|
Content types: article, documentation, readme, tutorial, report
|
|
Length: short, medium, long
|
|
"""
|
|
result = await markdown_tools.generate_markdown_with_ai(
|
|
topic=request.topic,
|
|
content_type=request.content_type,
|
|
length=request.length,
|
|
include_toc=request.include_toc
|
|
)
|
|
return result
|
|
|
|
|
|
# ============== UTILITY ENDPOINTS ==============
|
|
|
|
@router.get("/voices")
|
|
async def get_elevenlabs_voices():
|
|
"""Get available ElevenLabs voices"""
|
|
voices = await text_to_speech.get_voices()
|
|
return voices
|
|
|
|
|
|
@router.get("/models/{provider}")
|
|
async def get_provider_models(provider: str):
|
|
"""Get available models for a provider"""
|
|
models = {
|
|
# Image providers
|
|
"openai": ["gpt-image-1", "dall-e-3", "dall-e-2"],
|
|
"stable-diffusion": ["sd3-large", "sd3-medium", "sdxl-1.0", "stable-cascade"],
|
|
"leonardo": ["phoenix-1", "kino-xl", "anime-xl"],
|
|
"ideogram": ["V_2", "V_2_TURBO"],
|
|
"flux": ["flux-pro-1.1", "flux-dev", "flux-schnell"],
|
|
"gemini": ["gemini-2.0-flash-exp"],
|
|
# Video providers
|
|
"runway": ["gen3_alpha", "gen3_alpha_turbo", "gen4"],
|
|
"veo": [
|
|
"veo-3.1-generate-preview",
|
|
"veo-3.1-fast-generate-preview",
|
|
"veo-3.0-generate-001",
|
|
"veo-3.0-fast-generate-001",
|
|
"veo-2.0-generate-001"
|
|
],
|
|
# Upscaling
|
|
"topaz-image": ["proteus", "artemis", "gaia", "iris", "nyx", "rhea", "theia", "auto"],
|
|
"topaz-video": ["auto", "proteus", "artemis"],
|
|
# Audio
|
|
"elevenlabs": [
|
|
"eleven_multilingual_v2",
|
|
"eleven_flash_v2_5",
|
|
"eleven_turbo_v2_5",
|
|
"eleven_v3",
|
|
"eleven_monolingual_v1"
|
|
]
|
|
}
|
|
return models.get(provider, [])
|
|
|
|
|
|
@router.get("/models")
|
|
async def get_all_models():
|
|
"""Get all available models organized by category"""
|
|
return {
|
|
"image": {
|
|
"openai": {
|
|
"models": ["gpt-image-1", "dall-e-3"],
|
|
"default": "gpt-image-1",
|
|
"features": ["quality", "background", "transparent"]
|
|
},
|
|
"stable-diffusion": {
|
|
"models": ["sd3-large", "sd3-medium", "sdxl-1.0"],
|
|
"default": "sd3-large",
|
|
"features": ["negative_prompt", "style_preset", "img2img"]
|
|
},
|
|
"flux": {
|
|
"models": ["flux-pro-1.1", "flux-dev", "flux-schnell"],
|
|
"default": "flux-pro-1.1",
|
|
"features": ["img2img"]
|
|
}
|
|
},
|
|
"video": {
|
|
"runway": {
|
|
"models": ["gen3_alpha", "gen3_alpha_turbo", "gen4"],
|
|
"default": "gen3_alpha_turbo",
|
|
"features": ["camera_control", "image_to_video"]
|
|
},
|
|
"veo": {
|
|
"models": ["veo-3.1-generate-preview", "veo-3.1-fast-generate-preview", "veo-3.0-generate-001"],
|
|
"default": "veo-3.1-generate-preview",
|
|
"features": ["audio", "reference_images", "video_extension", "frame_interpolation"]
|
|
}
|
|
},
|
|
"audio": {
|
|
"elevenlabs": {
|
|
"models": ["eleven_multilingual_v2", "eleven_flash_v2_5", "eleven_turbo_v2_5", "eleven_v3"],
|
|
"default": "eleven_multilingual_v2",
|
|
"features": ["32_languages", "voice_cloning", "voice_settings"]
|
|
}
|
|
}
|
|
}
|