forge/backend/app/services/sound_effects.py

"""Sound Effects Generation Service using ElevenLabs API"""
import httpx
import structlog
from typing import Optional, Dict, Any
from pathlib import Path
import uuid

from app.config import settings

logger = structlog.get_logger()

# ElevenLabs Sound Effects API endpoint
ELEVENLABS_SFX_URL = "https://api.elevenlabs.io/v1/sound-generation"

# Available output formats
OUTPUT_FORMATS = {
    "mp3_44100_128": "MP3 (44.1kHz, 128kbps)",
    "mp3_44100_192": "MP3 (44.1kHz, 192kbps)",
    "pcm_48000": "WAV (48kHz)",
    "opus_48000_64": "Opus (48kHz, 64kbps)",
}


class SoundEffectsGenerator:
    """Generate sound effects using ElevenLabs API"""

    def __init__(self):
        self.api_key = settings.elevenlabs_api_key
        if not self.api_key:
            logger.warning("ElevenLabs API key not configured")

    async def generate(
        self,
        text: str,
        duration_seconds: Optional[float] = None,
        prompt_influence: float = 0.3,
        loop: bool = False,
        output_format: str = "mp3_44100_128",
        output_path: Optional[str] = None,
    ) -> Dict[str, Any]:
        """
        Generate a sound effect from text description.

        Args:
            text: Description of the sound effect to generate
            duration_seconds: Desired duration (max 22 seconds, or None for auto)
            prompt_influence: How closely to follow the prompt (0.0-1.0)
            loop: Whether to generate a looping sound effect
            output_format: Audio format (mp3_44100_128, pcm_48000, etc.)
            output_path: Optional path to save the audio file

        Returns:
            Dict with file_path, duration, format info
        """
        if not self.api_key:
            raise ValueError("ElevenLabs API key not configured")

        logger.info(
            "Generating sound effect",
            text=text[:50] + "..." if len(text) > 50 else text,
            duration=duration_seconds,
            loop=loop,
        )

        headers = {
            "xi-api-key": self.api_key,
            "Content-Type": "application/json",
        }

        payload: Dict[str, Any] = {
            "text": text,
            "prompt_influence": prompt_influence,
        }

        if duration_seconds is not None:
            payload["duration_seconds"] = min(duration_seconds, 22)  # Max 22 seconds

        if loop:
            payload["loop"] = True

        params = {"output_format": output_format}

        async with httpx.AsyncClient(timeout=120.0) as client:
            response = await client.post(
                ELEVENLABS_SFX_URL,
                headers=headers,
                json=payload,
                params=params,
            )

            if response.status_code == 422:
                error_detail = response.json()
                raise ValueError(f"Validation error: {error_detail}")

            response.raise_for_status()

            # Determine file extension from format
            if output_format.startswith("mp3"):
                extension = ".mp3"
            elif output_format.startswith("pcm"):
                extension = ".wav"
            elif output_format.startswith("opus"):
                extension = ".opus"
            else:
                extension = ".mp3"

            # Generate output path if not provided
            if not output_path:
                output_path = str(
                    Path(settings.storage_path)
                    / "audio"
                    / f"sfx_{uuid.uuid4().hex[:8]}{extension}"
                )

            # Ensure directory exists
            Path(output_path).parent.mkdir(parents=True, exist_ok=True)

            # Write the audio file
            with open(output_path, "wb") as f:
                f.write(response.content)

            file_size = len(response.content)

            logger.info(
                "Sound effect generated",
                output_path=output_path,
                file_size=file_size,
                format=output_format,
            )

            return {
                "file_path": output_path,
                "file_size": file_size,
                "format": output_format,
                "duration_seconds": duration_seconds,
                "loop": loop,
            }

    async def get_available_formats(self) -> Dict[str, str]:
        """Return available output formats"""
        return OUTPUT_FORMATS


# Singleton instance
_generator: Optional[SoundEffectsGenerator] = None


def get_sound_effects_generator() -> SoundEffectsGenerator:
    """Get the singleton sound effects generator instance"""
    global _generator
    if _generator is None:
        _generator = SoundEffectsGenerator()
    return _generator


async def generate_sound_effect_job(job_id: str) -> None:
    """Process a sound effect generation job"""
    from app.database import SessionLocal
    from app.models.job import Job
    from app.models.asset import Asset
    import asyncio

    db = SessionLocal()

    try:
        job = db.query(Job).filter(Job.id == job_id).first()
        if not job:
            logger.error(f"Job {job_id} not found")
            return

        job.status = "processing"
        job.progress = 10
        db.commit()

        input_data = job.input_data
        generator = get_sound_effects_generator()

        # Generate the sound effect
        result = await generator.generate(
            text=input_data["text"],
            duration_seconds=input_data.get("duration_seconds"),
            prompt_influence=input_data.get("prompt_influence", 0.3),
            loop=input_data.get("loop", False),
            output_format=input_data.get("output_format", "mp3_44100_128"),
        )

        job.progress = 80
        db.commit()

        # Create asset for the output
        file_path = result["file_path"]
        filename = Path(file_path).name

        asset = Asset(
            user_id=job.user_id,
            original_filename=filename,
            stored_filename=filename,
            file_path=file_path,
            file_type="audio",
            mime_type="audio/mpeg" if filename.endswith(".mp3") else "audio/wav",
            file_size_bytes=result["file_size"],
            source_module="sound_effects",
            source_job_id=job.id,
        )
        db.add(asset)
        db.commit()
        db.refresh(asset)

        job.output_asset_ids = [asset.id]
        job.output_data = {
            "duration_seconds": result.get("duration_seconds"),
            "format": result["format"],
            "loop": result["loop"],
        }
        job.status = "completed"
        job.progress = 100
        db.commit()

        logger.info(f"Sound effect job {job_id} completed successfully")

    except Exception as e:
        logger.error(f"Sound effect job {job_id} failed: {str(e)}")
        job = db.query(Job).filter(Job.id == job_id).first()
        if job:
            job.status = "failed"
            job.error_message = str(e)
            db.commit()
    finally:
        db.close()