forge/backend/app/services/video_generator.py

"""Video Generator Service - Runway and Google Veo

Runway Models:
- gen3_alpha: High quality, supports Motion Brush, Camera Control
- gen3_alpha_turbo: 7x faster, half cost, good for most use cases
- gen4: Latest model with highest fidelity

Runway Features:
- text_to_video: Generate from text prompt
- image_to_video: Generate from starting image
- camera_control: Pan, tilt, zoom, roll with intensity (-10 to 10)
- motion_brush: Define motion areas with direction
- first_frame/last_frame: Control start and end frames

Google Veo Models (December 2025):
- veo-3.1-generate-preview: Latest with native audio, 720p/1080p, reference images
- veo-3.1-fast-generate-preview: Speed-optimized variant with audio
- veo-3.0-generate-001: Stable Veo 3 with audio
- veo-3.0-fast-generate-001: Fast Veo 3 variant
- veo-2.0-generate-001: Legacy, supports 2 outputs per request

Veo 3/3.1 Features:
- Native audio generation with soundtrack, dialogue, ambient sounds
- first_frame: Starting image for video (image-to-video)
- last_frame: Ending image for video (creates frame interpolation)
- reference_images: Up to 3 images for character/style/asset consistency
- video_extension: Extend existing videos up to 20 times
- negative_prompt: Describe unwanted elements
- aspect_ratio: 16:9, 9:16
- resolution: 720p, 1080p (Veo 3.1 only)
- duration: 4, 6, or 8 seconds
- person_generation: Control adult face generation

Audio Prompt Techniques (Veo 3+):
- Dialogue: Use quotation marks ("She whispered, 'Hello'")
- Sound Effects: Explicit descriptions (tires screeching loudly)
- Ambient Noise: Environmental details (eerie hum in background)
"""
import httpx
import os
import base64
from uuid import uuid4
from datetime import datetime
import asyncio
from typing import Optional, Dict, Any, List, Tuple

from app.database import SessionLocal
from app.models.job import Job
from app.models.asset import Asset
from app.config import settings


# Runway model configurations
RUNWAY_MODELS = {
    "gen3_alpha": {
        "name": "Gen-3 Alpha",
        "description": "High quality with full feature support",
        "supports_camera_control": True,
        "supports_motion_brush": True,
        "max_duration": 10,
        "resolutions": ["1280x768", "768x1280"]
    },
    "gen3_alpha_turbo": {
        "name": "Gen-3 Alpha Turbo",
        "description": "7x faster, half the cost",
        "supports_camera_control": True,
        "supports_motion_brush": False,
        "max_duration": 10,
        "resolutions": ["1280x768", "768x1280"]
    },
    "gen4": {
        "name": "Gen-4",
        "description": "Latest model with highest fidelity",
        "supports_camera_control": True,
        "supports_motion_brush": True,
        "max_duration": 10,
        "resolutions": ["1280x768", "768x1280", "1920x1080"]
    }
}

# Veo model configurations (December 2025)
VEO_MODELS = {
    "veo-3.1-generate-preview": {
        "name": "Veo 3.1",
        "description": "Latest with native audio, 720p/1080p, reference images",
        "supports_audio": True,
        "supports_first_last_frame": True,
        "supports_reference_images": True,
        "supports_extension": True,
        "resolutions": ["720p", "1080p"],
        "durations": [4, 6, 8],
        "max_references": 3
    },
    "veo-3.1-fast-generate-preview": {
        "name": "Veo 3.1 Fast",
        "description": "Speed-optimized with audio ($0.40/sec)",
        "supports_audio": True,
        "supports_first_last_frame": True,
        "supports_reference_images": True,
        "supports_extension": True,
        "resolutions": ["720p", "1080p"],
        "durations": [4, 6, 8],
        "max_references": 3
    },
    "veo-3.0-generate-001": {
        "name": "Veo 3",
        "description": "Stable Veo 3 with native audio",
        "supports_audio": True,
        "supports_first_last_frame": True,
        "supports_reference_images": False,
        "supports_extension": False,
        "resolutions": ["720p", "1080p"],
        "durations": [4, 6, 8],
        "max_references": 0
    },
    "veo-3.0-fast-generate-001": {
        "name": "Veo 3 Fast",
        "description": "Fast Veo 3 variant with audio",
        "supports_audio": True,
        "supports_first_last_frame": True,
        "supports_reference_images": False,
        "supports_extension": False,
        "resolutions": ["720p"],
        "durations": [4, 6, 8],
        "max_references": 0
    },
    "veo-2.0-generate-001": {
        "name": "Veo 2",
        "description": "Legacy model, supports 2 outputs per request",
        "supports_audio": False,
        "supports_first_last_frame": True,
        "supports_reference_images": False,
        "supports_extension": False,
        "resolutions": ["720p"],
        "durations": [5, 6, 8],
        "max_references": 0
    }
}


async def generate(job_id: str):
    """Generate video using Runway or Veo

    Input parameters:
    - provider: 'runway' or 'veo'
    - prompt: Text description
    - model: Specific model to use
    - duration: Video length in seconds
    - aspect_ratio: '16:9', '9:16', '1:1'

    Runway-specific:
    - camera_control: {pan, tilt, zoom, roll} with values -10 to 10
    - motion_brush: [{area_mask, direction, intensity}]
    - frame_position: 'first' or 'last' for input image

    Veo-specific:
    - first_frame_asset_id: Asset ID for starting frame
    - last_frame_asset_id: Asset ID for ending frame
    - reference_asset_ids: List of asset IDs for reference (max 4)
    """
    db = SessionLocal()
    try:
        job = db.query(Job).filter(Job.id == job_id).first()
        if not job:
            return

        input_data = job.input_data
        provider = input_data.get("provider", "runway")
        prompt = input_data.get("prompt", "")

        job.progress = 10
        job.api_provider = provider
        db.commit()

        video_data = None
        filename = None

        if provider == "runway":
            video_data, filename = await _generate_runway(job, input_data, db)
        elif provider == "veo":
            video_data, filename = await _generate_veo(job, input_data, db)
        else:
            raise ValueError(f"Unknown video provider: {provider}")

        if video_data:
            # Save video
            storage_path = os.path.join(settings.storage_path, "videos")
            os.makedirs(storage_path, exist_ok=True)
            file_path = os.path.join(storage_path, filename)

            with open(file_path, "wb") as f:
                f.write(video_data)

            # Create asset
            asset = Asset(
                user_id=job.user_id,
                project_id=job.project_id,
                original_filename=filename,
                stored_filename=filename,
                file_path=file_path,
                file_type="video",
                mime_type="video/mp4",
                file_size_bytes=len(video_data),
                duration_seconds=input_data.get("duration", 5),
                source_module="video_generator",
                source_job_id=job.id,
                asset_metadata={
                    "prompt": prompt,
                    "provider": provider,
                    "model": job.api_model
                }
            )
            db.add(asset)
            db.commit()
            db.refresh(asset)

            job.output_asset_ids = [asset.id]
            job.output_data = {"asset_id": str(asset.id), "file_path": file_path}

        job.progress = 100
        job.status = "completed"
        job.completed_at = datetime.utcnow()
        db.commit()

    except Exception as e:
        job.status = "failed"
        job.error_message = str(e)
        db.commit()
    finally:
        db.close()


async def _generate_runway(job, input_data: dict, db) -> Tuple[Optional[bytes], Optional[str]]:
    """Generate video using Runway

    Supports:
    - Text to video
    - Image to video with first/middle/last frame positioning
    - Camera control (pan, tilt, zoom, roll)
    - Motion brush for targeted animation
    - Multiple resolutions
    """
    prompt = input_data.get("prompt", "")
    model = input_data.get("model", "gen3_alpha_turbo")
    duration = min(input_data.get("duration", 5), 10)
    resolution = input_data.get("resolution", "1280x768")
    frame_position = input_data.get("frame_position", "first")  # first, middle, last

    # Camera control settings
    camera_control = input_data.get("camera_control", {})
    pan = camera_control.get("pan", 0)  # -10 to 10, horizontal
    tilt = camera_control.get("tilt", 0)  # -10 to 10, vertical
    zoom = camera_control.get("zoom", 0)  # -10 to 10
    roll = camera_control.get("roll", 0)  # -10 to 10, rotation
    static = camera_control.get("static", False)  # Reduce camera motion

    job.api_model = model
    db.commit()

    # Get input image if provided
    image_data = None
    if job.input_asset_ids:
        input_asset = db.query(Asset).filter(Asset.id == job.input_asset_ids[0]).first()
        if input_asset and os.path.exists(input_asset.file_path):
            with open(input_asset.file_path, "rb") as f:
                image_data = base64.b64encode(f.read()).decode()

    async with httpx.AsyncClient(timeout=600) as client:
        # Build payload based on whether we have an image
        if image_data:
            # Image to video
            payload = {
                "model": model,
                "promptImage": f"data:image/png;base64,{image_data}",
                "promptText": prompt,
                "duration": duration,
                "ratio": resolution.replace("x", ":")
            }

            # Frame position (Gen-3 Alpha Turbo supports first, middle, last)
            if model == "gen3_alpha_turbo":
                payload["imagePosition"] = frame_position

            endpoint = "https://api.runwayml.com/v1/image_to_video"
        else:
            # Text to video
            payload = {
                "model": model,
                "promptText": prompt,
                "duration": duration,
                "ratio": resolution.replace("x", ":")
            }
            endpoint = "https://api.runwayml.com/v1/text_to_video"

        # Add camera control if any values are set
        if any([pan, tilt, zoom, roll]) and not static:
            payload["cameraControl"] = {
                "pan": pan,
                "tilt": tilt,
                "zoom": zoom,
                "roll": roll
            }
        elif static:
            payload["cameraControl"] = {"static": True}

        # Create generation task
        response = await client.post(
            endpoint,
            headers={
                "Authorization": f"Bearer {settings.runway_api_key}",
                "Content-Type": "application/json",
                "X-Runway-Version": "2024-11-06"
            },
            json=payload
        )
        response.raise_for_status()
        result = response.json()

        task_id = result.get("id")

        job.progress = 30
        job.api_request_id = task_id
        db.commit()

        # Poll for completion
        for i in range(180):  # Wait up to 6 minutes
            await asyncio.sleep(2)

            status_response = await client.get(
                f"https://api.runwayml.com/v1/tasks/{task_id}",
                headers={
                    "Authorization": f"Bearer {settings.runway_api_key}",
                    "X-Runway-Version": "2024-11-06"
                }
            )
            status_data = status_response.json()
            status = status_data.get("status", "")

            if status == "SUCCEEDED":
                output_url = status_data.get("output", [None])[0]
                if output_url:
                    video_response = await client.get(output_url)
                    filename = f"runway_{model}_{uuid4()}.mp4"
                    return video_response.content, filename
                break
            elif status == "FAILED":
                raise ValueError(f"Runway generation failed: {status_data.get('error')}")

            job.progress = min(30 + (i * 0.35), 90)
            db.commit()

    return None, None


async def _generate_veo(job, input_data: dict, db) -> Tuple[Optional[bytes], Optional[str]]:
    """Generate video using Google Veo 3/3.1

    Supports:
    - Text to video with native audio generation
    - First frame image (video starts from this image)
    - Last frame image (video ends at this image, creates frame interpolation)
    - Reference images (up to 3, for character/style/asset consistency - Veo 3.1 only)
    - Video extension (continue from previous video - Veo 3.1 only)
    - Negative prompts
    - Multiple resolutions (720p, 1080p)
    - Duration options (4, 6, 8 seconds)

    Audio Prompting:
    - Use quotation marks for dialogue: "She said, 'Hello'"
    - Describe sound effects: "tires screeching loudly"
    - Add ambient sounds: "quiet forest with birds chirping"
    """
    prompt = input_data.get("prompt", "")
    model = input_data.get("model", "veo-3.1-generate-preview")
    duration = input_data.get("duration", 8)
    aspect_ratio = input_data.get("aspect_ratio", "16:9")
    resolution = input_data.get("resolution", "720p")
    negative_prompt = input_data.get("negative_prompt", "")
    person_generation = input_data.get("person_generation")  # "allow_adult" or None

    # Frame control
    first_frame_asset_id = input_data.get("first_frame_asset_id")
    last_frame_asset_id = input_data.get("last_frame_asset_id")
    reference_asset_ids = input_data.get("reference_asset_ids", [])[:3]  # Max 3 for Veo 3.1

    # Video extension (Veo 3.1 only)
    extend_video_asset_id = input_data.get("extend_video_asset_id")

    # Validate duration
    model_config = VEO_MODELS.get(model, VEO_MODELS["veo-3.1-generate-preview"])
    valid_durations = model_config.get("durations", [4, 6, 8])
    if duration not in valid_durations:
        duration = max(valid_durations)

    # Validate resolution
    valid_resolutions = model_config.get("resolutions", ["720p"])
    if resolution not in valid_resolutions:
        resolution = valid_resolutions[0]

    job.api_model = model
    db.commit()

    try:
        from google import genai
        from google.genai import types

        # Initialize client
        client = genai.Client(api_key=settings.google_api_key)

        job.progress = 20
        db.commit()

        # Build generation config
        config_kwargs = {
            "aspect_ratio": aspect_ratio,
        }

        # Add negative prompt if provided
        if negative_prompt:
            config_kwargs["negative_prompt"] = negative_prompt

        # Add person generation setting if specified
        if person_generation:
            config_kwargs["person_generation"] = person_generation

        # Resolution for Veo 3.1
        if "3.1" in model or "3.0" in model:
            config_kwargs["resolution"] = resolution
            config_kwargs["duration_seconds"] = str(duration)

        # Prepare first frame image
        first_frame_image = None
        if first_frame_asset_id:
            first_asset = db.query(Asset).filter(Asset.id == first_frame_asset_id).first()
            if first_asset and os.path.exists(first_asset.file_path):
                with open(first_asset.file_path, "rb") as f:
                    first_frame_image = types.Image.from_bytes(
                        data=f.read(),
                        mime_type=first_asset.mime_type or "image/png"
                    )

        # Prepare last frame for interpolation
        if last_frame_asset_id:
            last_asset = db.query(Asset).filter(Asset.id == last_frame_asset_id).first()
            if last_asset and os.path.exists(last_asset.file_path):
                with open(last_asset.file_path, "rb") as f:
                    config_kwargs["last_frame"] = types.Image.from_bytes(
                        data=f.read(),
                        mime_type=last_asset.mime_type or "image/png"
                    )

        # Reference images for character/style consistency (Veo 3.1 only)
        if reference_asset_ids and model_config.get("supports_reference_images"):
            reference_images = []
            for ref_id in reference_asset_ids:
                ref_asset = db.query(Asset).filter(Asset.id == ref_id).first()
                if ref_asset and os.path.exists(ref_asset.file_path):
                    with open(ref_asset.file_path, "rb") as f:
                        # Create VideoGenerationReferenceImage
                        ref_image = types.VideoGenerationReferenceImage(
                            image=types.Image.from_bytes(
                                data=f.read(),
                                mime_type=ref_asset.mime_type or "image/png"
                            ),
                            reference_type="asset"  # or "style" for style reference
                        )
                        reference_images.append(ref_image)
            if reference_images:
                config_kwargs["reference_images"] = reference_images

        # Video extension (Veo 3.1 only)
        extend_video = None
        if extend_video_asset_id and model_config.get("supports_extension"):
            extend_asset = db.query(Asset).filter(Asset.id == extend_video_asset_id).first()
            if extend_asset and os.path.exists(extend_asset.file_path):
                with open(extend_asset.file_path, "rb") as f:
                    extend_video = types.Video.from_bytes(
                        data=f.read(),
                        mime_type=extend_asset.mime_type or "video/mp4"
                    )

        config = types.GenerateVideosConfig(**config_kwargs)

        job.progress = 40
        db.commit()

        # Generate video using the async long-running operation
        if extend_video:
            # Video extension mode
            operation = await asyncio.to_thread(
                client.models.generate_videos,
                model=model,
                video=extend_video,
                prompt=prompt,
                config=config
            )
        elif first_frame_image:
            # Image-to-video mode
            operation = await asyncio.to_thread(
                client.models.generate_videos,
                model=model,
                image=first_frame_image,
                prompt=prompt,
                config=config
            )
        else:
            # Text-to-video mode
            operation = await asyncio.to_thread(
                client.models.generate_videos,
                model=model,
                prompt=prompt,
                config=config
            )

        # Poll for completion (can take 11 seconds to 6 minutes)
        job.progress = 50
        db.commit()

        max_attempts = 72  # 6 minutes with 5 second intervals
        for attempt in range(max_attempts):
            await asyncio.sleep(5)

            # Check operation status
            operation = await asyncio.to_thread(
                client.operations.get,
                operation
            )

            if operation.done:
                break

            # Update progress
            progress = min(50 + (attempt * 0.5), 90)
            job.progress = int(progress)
            db.commit()

        job.progress = 90
        db.commit()

        # Extract video from response
        if operation.done and operation.response:
            generated_videos = operation.response.generated_videos
            if generated_videos and len(generated_videos) > 0:
                video = generated_videos[0]

                # Download the video file
                video_data = await asyncio.to_thread(
                    client.files.download,
                    file=video.video
                )

                filename = f"veo_{model.replace('.', '_').replace('-', '_')}_{uuid4()}.mp4"
                return video_data, filename

        # Check for errors
        if operation.error:
            raise ValueError(f"Veo generation failed: {operation.error}")

    except ImportError:
        raise ValueError("Google GenAI library not installed. Run: pip install google-genai")
    except Exception as e:
        raise ValueError(f"Veo generation error: {str(e)}")

    return None, None


async def extend_video(job_id: str):
    """Extend an existing video using Veo scene extension"""
    db = SessionLocal()
    try:
        job = db.query(Job).filter(Job.id == job_id).first()
        if not job:
            return

        input_data = job.input_data
        source_asset_id = input_data.get("source_asset_id")
        prompt = input_data.get("prompt", "")
        extension_seconds = min(input_data.get("extension_seconds", 4), 8)

        if not source_asset_id:
            raise ValueError("No source video provided for extension")

        source_asset = db.query(Asset).filter(Asset.id == source_asset_id).first()
        if not source_asset:
            raise ValueError("Source video not found")

        job.progress = 10
        job.api_provider = "veo"
        job.api_model = "veo-3.1-generate-preview"
        db.commit()

        # Implementation would use Veo's scene extension API
        # This extends video by building on the final seconds of the previous clip

        job.progress = 100
        job.status = "completed"
        job.completed_at = datetime.utcnow()
        db.commit()

    except Exception as e:
        job.status = "failed"
        job.error_message = str(e)
        db.commit()
    finally:
        db.close()


def get_available_models() -> Dict[str, Any]:
    """Get all available video generation models and their capabilities"""
    return {
        "runway": RUNWAY_MODELS,
        "veo": VEO_MODELS
    }