Features: - Image generation (OpenAI, Gemini, Leonardo, Bria, Stability, Flux) - Nano Banana iterative editing - Video generation and upscaling - Audio TTS, STT, sound effects (ElevenLabs) - Text prompt studio and alt text - User authentication with JWT/cookies - Admin panel with voice management - Job queue with Celery - PostgreSQL + Redis backend - Next.js 15 + FastAPI architecture 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
229 lines
6.9 KiB
Python
229 lines
6.9 KiB
Python
"""Sound Effects Generation Service using ElevenLabs API"""
|
|
import httpx
|
|
import structlog
|
|
from typing import Optional, Dict, Any
|
|
from pathlib import Path
|
|
import uuid
|
|
|
|
from app.config import settings
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
# ElevenLabs Sound Effects API endpoint
|
|
ELEVENLABS_SFX_URL = "https://api.elevenlabs.io/v1/sound-generation"
|
|
|
|
# Available output formats
|
|
OUTPUT_FORMATS = {
|
|
"mp3_44100_128": "MP3 (44.1kHz, 128kbps)",
|
|
"mp3_44100_192": "MP3 (44.1kHz, 192kbps)",
|
|
"pcm_48000": "WAV (48kHz)",
|
|
"opus_48000_64": "Opus (48kHz, 64kbps)",
|
|
}
|
|
|
|
|
|
class SoundEffectsGenerator:
|
|
"""Generate sound effects using ElevenLabs API"""
|
|
|
|
def __init__(self):
|
|
self.api_key = settings.elevenlabs_api_key
|
|
if not self.api_key:
|
|
logger.warning("ElevenLabs API key not configured")
|
|
|
|
async def generate(
|
|
self,
|
|
text: str,
|
|
duration_seconds: Optional[float] = None,
|
|
prompt_influence: float = 0.3,
|
|
loop: bool = False,
|
|
output_format: str = "mp3_44100_128",
|
|
output_path: Optional[str] = None,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Generate a sound effect from text description.
|
|
|
|
Args:
|
|
text: Description of the sound effect to generate
|
|
duration_seconds: Desired duration (max 22 seconds, or None for auto)
|
|
prompt_influence: How closely to follow the prompt (0.0-1.0)
|
|
loop: Whether to generate a looping sound effect
|
|
output_format: Audio format (mp3_44100_128, pcm_48000, etc.)
|
|
output_path: Optional path to save the audio file
|
|
|
|
Returns:
|
|
Dict with file_path, duration, format info
|
|
"""
|
|
if not self.api_key:
|
|
raise ValueError("ElevenLabs API key not configured")
|
|
|
|
logger.info(
|
|
"Generating sound effect",
|
|
text=text[:50] + "..." if len(text) > 50 else text,
|
|
duration=duration_seconds,
|
|
loop=loop,
|
|
)
|
|
|
|
headers = {
|
|
"xi-api-key": self.api_key,
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
payload: Dict[str, Any] = {
|
|
"text": text,
|
|
"prompt_influence": prompt_influence,
|
|
}
|
|
|
|
if duration_seconds is not None:
|
|
payload["duration_seconds"] = min(duration_seconds, 22) # Max 22 seconds
|
|
|
|
if loop:
|
|
payload["loop"] = True
|
|
|
|
params = {"output_format": output_format}
|
|
|
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
|
response = await client.post(
|
|
ELEVENLABS_SFX_URL,
|
|
headers=headers,
|
|
json=payload,
|
|
params=params,
|
|
)
|
|
|
|
if response.status_code == 422:
|
|
error_detail = response.json()
|
|
raise ValueError(f"Validation error: {error_detail}")
|
|
|
|
response.raise_for_status()
|
|
|
|
# Determine file extension from format
|
|
if output_format.startswith("mp3"):
|
|
extension = ".mp3"
|
|
elif output_format.startswith("pcm"):
|
|
extension = ".wav"
|
|
elif output_format.startswith("opus"):
|
|
extension = ".opus"
|
|
else:
|
|
extension = ".mp3"
|
|
|
|
# Generate output path if not provided
|
|
if not output_path:
|
|
output_path = str(
|
|
Path(settings.storage_path)
|
|
/ "audio"
|
|
/ f"sfx_{uuid.uuid4().hex[:8]}{extension}"
|
|
)
|
|
|
|
# Ensure directory exists
|
|
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Write the audio file
|
|
with open(output_path, "wb") as f:
|
|
f.write(response.content)
|
|
|
|
file_size = len(response.content)
|
|
|
|
logger.info(
|
|
"Sound effect generated",
|
|
output_path=output_path,
|
|
file_size=file_size,
|
|
format=output_format,
|
|
)
|
|
|
|
return {
|
|
"file_path": output_path,
|
|
"file_size": file_size,
|
|
"format": output_format,
|
|
"duration_seconds": duration_seconds,
|
|
"loop": loop,
|
|
}
|
|
|
|
async def get_available_formats(self) -> Dict[str, str]:
|
|
"""Return available output formats"""
|
|
return OUTPUT_FORMATS
|
|
|
|
|
|
# Singleton instance
|
|
_generator: Optional[SoundEffectsGenerator] = None
|
|
|
|
|
|
def get_sound_effects_generator() -> SoundEffectsGenerator:
|
|
"""Get the singleton sound effects generator instance"""
|
|
global _generator
|
|
if _generator is None:
|
|
_generator = SoundEffectsGenerator()
|
|
return _generator
|
|
|
|
|
|
async def generate_sound_effect_job(job_id: str) -> None:
|
|
"""Process a sound effect generation job"""
|
|
from app.database import SessionLocal
|
|
from app.models.job import Job
|
|
from app.models.asset import Asset
|
|
import asyncio
|
|
|
|
db = SessionLocal()
|
|
|
|
try:
|
|
job = db.query(Job).filter(Job.id == job_id).first()
|
|
if not job:
|
|
logger.error(f"Job {job_id} not found")
|
|
return
|
|
|
|
job.status = "processing"
|
|
job.progress = 10
|
|
db.commit()
|
|
|
|
input_data = job.input_data
|
|
generator = get_sound_effects_generator()
|
|
|
|
# Generate the sound effect
|
|
result = await generator.generate(
|
|
text=input_data["text"],
|
|
duration_seconds=input_data.get("duration_seconds"),
|
|
prompt_influence=input_data.get("prompt_influence", 0.3),
|
|
loop=input_data.get("loop", False),
|
|
output_format=input_data.get("output_format", "mp3_44100_128"),
|
|
)
|
|
|
|
job.progress = 80
|
|
db.commit()
|
|
|
|
# Create asset for the output
|
|
file_path = result["file_path"]
|
|
filename = Path(file_path).name
|
|
|
|
asset = Asset(
|
|
user_id=job.user_id,
|
|
original_filename=filename,
|
|
stored_filename=filename,
|
|
file_path=file_path,
|
|
file_type="audio",
|
|
mime_type="audio/mpeg" if filename.endswith(".mp3") else "audio/wav",
|
|
file_size_bytes=result["file_size"],
|
|
source_module="sound_effects",
|
|
source_job_id=job.id,
|
|
)
|
|
db.add(asset)
|
|
db.commit()
|
|
db.refresh(asset)
|
|
|
|
job.output_asset_ids = [asset.id]
|
|
job.output_data = {
|
|
"duration_seconds": result.get("duration_seconds"),
|
|
"format": result["format"],
|
|
"loop": result["loop"],
|
|
}
|
|
job.status = "completed"
|
|
job.progress = 100
|
|
db.commit()
|
|
|
|
logger.info(f"Sound effect job {job_id} completed successfully")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Sound effect job {job_id} failed: {str(e)}")
|
|
job = db.query(Job).filter(Job.id == job_id).first()
|
|
if job:
|
|
job.status = "failed"
|
|
job.error_message = str(e)
|
|
db.commit()
|
|
finally:
|
|
db.close()
|