forge/backend/app/services/sound_effects.py
DJP 7a804e896d Initial commit - FORGE AI unified platform
Features:
- Image generation (OpenAI, Gemini, Leonardo, Bria, Stability, Flux)
- Nano Banana iterative editing
- Video generation and upscaling
- Audio TTS, STT, sound effects (ElevenLabs)
- Text prompt studio and alt text
- User authentication with JWT/cookies
- Admin panel with voice management
- Job queue with Celery
- PostgreSQL + Redis backend
- Next.js 15 + FastAPI architecture

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2025-12-09 20:39:00 -05:00

229 lines
6.9 KiB
Python

"""Sound Effects Generation Service using ElevenLabs API"""
import httpx
import structlog
from typing import Optional, Dict, Any
from pathlib import Path
import uuid
from app.config import settings
logger = structlog.get_logger()
# ElevenLabs Sound Effects API endpoint
ELEVENLABS_SFX_URL = "https://api.elevenlabs.io/v1/sound-generation"
# Available output formats
OUTPUT_FORMATS = {
"mp3_44100_128": "MP3 (44.1kHz, 128kbps)",
"mp3_44100_192": "MP3 (44.1kHz, 192kbps)",
"pcm_48000": "WAV (48kHz)",
"opus_48000_64": "Opus (48kHz, 64kbps)",
}
class SoundEffectsGenerator:
"""Generate sound effects using ElevenLabs API"""
def __init__(self):
self.api_key = settings.elevenlabs_api_key
if not self.api_key:
logger.warning("ElevenLabs API key not configured")
async def generate(
self,
text: str,
duration_seconds: Optional[float] = None,
prompt_influence: float = 0.3,
loop: bool = False,
output_format: str = "mp3_44100_128",
output_path: Optional[str] = None,
) -> Dict[str, Any]:
"""
Generate a sound effect from text description.
Args:
text: Description of the sound effect to generate
duration_seconds: Desired duration (max 22 seconds, or None for auto)
prompt_influence: How closely to follow the prompt (0.0-1.0)
loop: Whether to generate a looping sound effect
output_format: Audio format (mp3_44100_128, pcm_48000, etc.)
output_path: Optional path to save the audio file
Returns:
Dict with file_path, duration, format info
"""
if not self.api_key:
raise ValueError("ElevenLabs API key not configured")
logger.info(
"Generating sound effect",
text=text[:50] + "..." if len(text) > 50 else text,
duration=duration_seconds,
loop=loop,
)
headers = {
"xi-api-key": self.api_key,
"Content-Type": "application/json",
}
payload: Dict[str, Any] = {
"text": text,
"prompt_influence": prompt_influence,
}
if duration_seconds is not None:
payload["duration_seconds"] = min(duration_seconds, 22) # Max 22 seconds
if loop:
payload["loop"] = True
params = {"output_format": output_format}
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
ELEVENLABS_SFX_URL,
headers=headers,
json=payload,
params=params,
)
if response.status_code == 422:
error_detail = response.json()
raise ValueError(f"Validation error: {error_detail}")
response.raise_for_status()
# Determine file extension from format
if output_format.startswith("mp3"):
extension = ".mp3"
elif output_format.startswith("pcm"):
extension = ".wav"
elif output_format.startswith("opus"):
extension = ".opus"
else:
extension = ".mp3"
# Generate output path if not provided
if not output_path:
output_path = str(
Path(settings.storage_path)
/ "audio"
/ f"sfx_{uuid.uuid4().hex[:8]}{extension}"
)
# Ensure directory exists
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
# Write the audio file
with open(output_path, "wb") as f:
f.write(response.content)
file_size = len(response.content)
logger.info(
"Sound effect generated",
output_path=output_path,
file_size=file_size,
format=output_format,
)
return {
"file_path": output_path,
"file_size": file_size,
"format": output_format,
"duration_seconds": duration_seconds,
"loop": loop,
}
async def get_available_formats(self) -> Dict[str, str]:
"""Return available output formats"""
return OUTPUT_FORMATS
# Singleton instance
_generator: Optional[SoundEffectsGenerator] = None
def get_sound_effects_generator() -> SoundEffectsGenerator:
"""Get the singleton sound effects generator instance"""
global _generator
if _generator is None:
_generator = SoundEffectsGenerator()
return _generator
async def generate_sound_effect_job(job_id: str) -> None:
"""Process a sound effect generation job"""
from app.database import SessionLocal
from app.models.job import Job
from app.models.asset import Asset
import asyncio
db = SessionLocal()
try:
job = db.query(Job).filter(Job.id == job_id).first()
if not job:
logger.error(f"Job {job_id} not found")
return
job.status = "processing"
job.progress = 10
db.commit()
input_data = job.input_data
generator = get_sound_effects_generator()
# Generate the sound effect
result = await generator.generate(
text=input_data["text"],
duration_seconds=input_data.get("duration_seconds"),
prompt_influence=input_data.get("prompt_influence", 0.3),
loop=input_data.get("loop", False),
output_format=input_data.get("output_format", "mp3_44100_128"),
)
job.progress = 80
db.commit()
# Create asset for the output
file_path = result["file_path"]
filename = Path(file_path).name
asset = Asset(
user_id=job.user_id,
original_filename=filename,
stored_filename=filename,
file_path=file_path,
file_type="audio",
mime_type="audio/mpeg" if filename.endswith(".mp3") else "audio/wav",
file_size_bytes=result["file_size"],
source_module="sound_effects",
source_job_id=job.id,
)
db.add(asset)
db.commit()
db.refresh(asset)
job.output_asset_ids = [asset.id]
job.output_data = {
"duration_seconds": result.get("duration_seconds"),
"format": result["format"],
"loop": result["loop"],
}
job.status = "completed"
job.progress = 100
db.commit()
logger.info(f"Sound effect job {job_id} completed successfully")
except Exception as e:
logger.error(f"Sound effect job {job_id} failed: {str(e)}")
job = db.query(Job).filter(Job.id == job_id).first()
if job:
job.status = "failed"
job.error_message = str(e)
db.commit()
finally:
db.close()