"""Sound Effects Generation Service using ElevenLabs API""" import httpx import structlog from typing import Optional, Dict, Any from pathlib import Path import uuid from app.config import settings logger = structlog.get_logger() # ElevenLabs Sound Effects API endpoint ELEVENLABS_SFX_URL = "https://api.elevenlabs.io/v1/sound-generation" # Available output formats OUTPUT_FORMATS = { "mp3_44100_128": "MP3 (44.1kHz, 128kbps)", "mp3_44100_192": "MP3 (44.1kHz, 192kbps)", "pcm_48000": "WAV (48kHz)", "opus_48000_64": "Opus (48kHz, 64kbps)", } class SoundEffectsGenerator: """Generate sound effects using ElevenLabs API""" def __init__(self): self.api_key = settings.elevenlabs_api_key if not self.api_key: logger.warning("ElevenLabs API key not configured") async def generate( self, text: str, duration_seconds: Optional[float] = None, prompt_influence: float = 0.3, loop: bool = False, output_format: str = "mp3_44100_128", output_path: Optional[str] = None, ) -> Dict[str, Any]: """ Generate a sound effect from text description. Args: text: Description of the sound effect to generate duration_seconds: Desired duration (max 22 seconds, or None for auto) prompt_influence: How closely to follow the prompt (0.0-1.0) loop: Whether to generate a looping sound effect output_format: Audio format (mp3_44100_128, pcm_48000, etc.) output_path: Optional path to save the audio file Returns: Dict with file_path, duration, format info """ if not self.api_key: raise ValueError("ElevenLabs API key not configured") logger.info( "Generating sound effect", text=text[:50] + "..." if len(text) > 50 else text, duration=duration_seconds, loop=loop, ) headers = { "xi-api-key": self.api_key, "Content-Type": "application/json", } payload: Dict[str, Any] = { "text": text, "prompt_influence": prompt_influence, } if duration_seconds is not None: payload["duration_seconds"] = min(duration_seconds, 22) # Max 22 seconds if loop: payload["loop"] = True params = {"output_format": output_format} async with httpx.AsyncClient(timeout=120.0) as client: response = await client.post( ELEVENLABS_SFX_URL, headers=headers, json=payload, params=params, ) if response.status_code == 422: error_detail = response.json() raise ValueError(f"Validation error: {error_detail}") response.raise_for_status() # Determine file extension from format if output_format.startswith("mp3"): extension = ".mp3" elif output_format.startswith("pcm"): extension = ".wav" elif output_format.startswith("opus"): extension = ".opus" else: extension = ".mp3" # Generate output path if not provided if not output_path: output_path = str( Path(settings.storage_path) / "audio" / f"sfx_{uuid.uuid4().hex[:8]}{extension}" ) # Ensure directory exists Path(output_path).parent.mkdir(parents=True, exist_ok=True) # Write the audio file with open(output_path, "wb") as f: f.write(response.content) file_size = len(response.content) logger.info( "Sound effect generated", output_path=output_path, file_size=file_size, format=output_format, ) return { "file_path": output_path, "file_size": file_size, "format": output_format, "duration_seconds": duration_seconds, "loop": loop, } async def get_available_formats(self) -> Dict[str, str]: """Return available output formats""" return OUTPUT_FORMATS # Singleton instance _generator: Optional[SoundEffectsGenerator] = None def get_sound_effects_generator() -> SoundEffectsGenerator: """Get the singleton sound effects generator instance""" global _generator if _generator is None: _generator = SoundEffectsGenerator() return _generator async def generate_sound_effect_job(job_id: str) -> None: """Process a sound effect generation job""" from app.database import SessionLocal from app.models.job import Job from app.models.asset import Asset import asyncio db = SessionLocal() try: job = db.query(Job).filter(Job.id == job_id).first() if not job: logger.error(f"Job {job_id} not found") return job.status = "processing" job.progress = 10 db.commit() input_data = job.input_data generator = get_sound_effects_generator() # Generate the sound effect result = await generator.generate( text=input_data["text"], duration_seconds=input_data.get("duration_seconds"), prompt_influence=input_data.get("prompt_influence", 0.3), loop=input_data.get("loop", False), output_format=input_data.get("output_format", "mp3_44100_128"), ) job.progress = 80 db.commit() # Create asset for the output file_path = result["file_path"] filename = Path(file_path).name asset = Asset( user_id=job.user_id, original_filename=filename, stored_filename=filename, file_path=file_path, file_type="audio", mime_type="audio/mpeg" if filename.endswith(".mp3") else "audio/wav", file_size_bytes=result["file_size"], source_module="sound_effects", source_job_id=job.id, ) db.add(asset) db.commit() db.refresh(asset) job.output_asset_ids = [asset.id] job.output_data = { "duration_seconds": result.get("duration_seconds"), "format": result["format"], "loop": result["loop"], } job.status = "completed" job.progress = 100 db.commit() logger.info(f"Sound effect job {job_id} completed successfully") except Exception as e: logger.error(f"Sound effect job {job_id} failed: {str(e)}") job = db.query(Job).filter(Job.id == job_id).first() if job: job.status = "failed" job.error_message = str(e) db.commit() finally: db.close()