"""Video Generator Service - Runway and Google Veo Runway Models: - gen3_alpha: High quality, supports Motion Brush, Camera Control - gen3_alpha_turbo: 7x faster, half cost, good for most use cases - gen4: Latest model with highest fidelity Runway Features: - text_to_video: Generate from text prompt - image_to_video: Generate from starting image - camera_control: Pan, tilt, zoom, roll with intensity (-10 to 10) - motion_brush: Define motion areas with direction - first_frame/last_frame: Control start and end frames Google Veo Models (December 2025): - veo-3.1-generate-preview: Latest with native audio, 720p/1080p, reference images - veo-3.1-fast-generate-preview: Speed-optimized variant with audio - veo-3.0-generate-001: Stable Veo 3 with audio - veo-3.0-fast-generate-001: Fast Veo 3 variant - veo-2.0-generate-001: Legacy, supports 2 outputs per request Veo 3/3.1 Features: - Native audio generation with soundtrack, dialogue, ambient sounds - first_frame: Starting image for video (image-to-video) - last_frame: Ending image for video (creates frame interpolation) - reference_images: Up to 3 images for character/style/asset consistency - video_extension: Extend existing videos up to 20 times - negative_prompt: Describe unwanted elements - aspect_ratio: 16:9, 9:16 - resolution: 720p, 1080p (Veo 3.1 only) - duration: 4, 6, or 8 seconds - person_generation: Control adult face generation Audio Prompt Techniques (Veo 3+): - Dialogue: Use quotation marks ("She whispered, 'Hello'") - Sound Effects: Explicit descriptions (tires screeching loudly) - Ambient Noise: Environmental details (eerie hum in background) """ import httpx import os import base64 from uuid import uuid4 from datetime import datetime import asyncio from typing import Optional, Dict, Any, List, Tuple from app.database import SessionLocal from app.models.job import Job from app.models.asset import Asset from app.config import settings # Runway model configurations RUNWAY_MODELS = { "gen3_alpha": { "name": "Gen-3 Alpha", "description": "High quality with full feature support", "supports_camera_control": True, "supports_motion_brush": True, "max_duration": 10, "resolutions": ["1280x768", "768x1280"] }, "gen3_alpha_turbo": { "name": "Gen-3 Alpha Turbo", "description": "7x faster, half the cost", "supports_camera_control": True, "supports_motion_brush": False, "max_duration": 10, "resolutions": ["1280x768", "768x1280"] }, "gen4": { "name": "Gen-4", "description": "Latest model with highest fidelity", "supports_camera_control": True, "supports_motion_brush": True, "max_duration": 10, "resolutions": ["1280x768", "768x1280", "1920x1080"] } } # Veo model configurations (December 2025) VEO_MODELS = { "veo-3.1-generate-preview": { "name": "Veo 3.1", "description": "Latest with native audio, 720p/1080p, reference images", "supports_audio": True, "supports_first_last_frame": True, "supports_reference_images": True, "supports_extension": True, "resolutions": ["720p", "1080p"], "durations": [4, 6, 8], "max_references": 3 }, "veo-3.1-fast-generate-preview": { "name": "Veo 3.1 Fast", "description": "Speed-optimized with audio ($0.40/sec)", "supports_audio": True, "supports_first_last_frame": True, "supports_reference_images": True, "supports_extension": True, "resolutions": ["720p", "1080p"], "durations": [4, 6, 8], "max_references": 3 }, "veo-3.0-generate-001": { "name": "Veo 3", "description": "Stable Veo 3 with native audio", "supports_audio": True, "supports_first_last_frame": True, "supports_reference_images": False, "supports_extension": False, "resolutions": ["720p", "1080p"], "durations": [4, 6, 8], "max_references": 0 }, "veo-3.0-fast-generate-001": { "name": "Veo 3 Fast", "description": "Fast Veo 3 variant with audio", "supports_audio": True, "supports_first_last_frame": True, "supports_reference_images": False, "supports_extension": False, "resolutions": ["720p"], "durations": [4, 6, 8], "max_references": 0 }, "veo-2.0-generate-001": { "name": "Veo 2", "description": "Legacy model, supports 2 outputs per request", "supports_audio": False, "supports_first_last_frame": True, "supports_reference_images": False, "supports_extension": False, "resolutions": ["720p"], "durations": [5, 6, 8], "max_references": 0 } } async def generate(job_id: str): """Generate video using Runway or Veo Input parameters: - provider: 'runway' or 'veo' - prompt: Text description - model: Specific model to use - duration: Video length in seconds - aspect_ratio: '16:9', '9:16', '1:1' Runway-specific: - camera_control: {pan, tilt, zoom, roll} with values -10 to 10 - motion_brush: [{area_mask, direction, intensity}] - frame_position: 'first' or 'last' for input image Veo-specific: - first_frame_asset_id: Asset ID for starting frame - last_frame_asset_id: Asset ID for ending frame - reference_asset_ids: List of asset IDs for reference (max 4) """ db = SessionLocal() try: job = db.query(Job).filter(Job.id == job_id).first() if not job: return input_data = job.input_data provider = input_data.get("provider", "runway") prompt = input_data.get("prompt", "") job.progress = 10 job.api_provider = provider db.commit() video_data = None filename = None if provider == "runway": video_data, filename = await _generate_runway(job, input_data, db) elif provider == "veo": video_data, filename = await _generate_veo(job, input_data, db) else: raise ValueError(f"Unknown video provider: {provider}") if video_data: # Save video storage_path = os.path.join(settings.storage_path, "videos") os.makedirs(storage_path, exist_ok=True) file_path = os.path.join(storage_path, filename) with open(file_path, "wb") as f: f.write(video_data) # Create asset asset = Asset( user_id=job.user_id, project_id=job.project_id, original_filename=filename, stored_filename=filename, file_path=file_path, file_type="video", mime_type="video/mp4", file_size_bytes=len(video_data), duration_seconds=input_data.get("duration", 5), source_module="video_generator", source_job_id=job.id, asset_metadata={ "prompt": prompt, "provider": provider, "model": job.api_model } ) db.add(asset) db.commit() db.refresh(asset) job.output_asset_ids = [asset.id] job.output_data = {"asset_id": str(asset.id), "file_path": file_path} job.progress = 100 job.status = "completed" job.completed_at = datetime.utcnow() db.commit() except Exception as e: job.status = "failed" job.error_message = str(e) db.commit() finally: db.close() async def _generate_runway(job, input_data: dict, db) -> Tuple[Optional[bytes], Optional[str]]: """Generate video using Runway Supports: - Text to video - Image to video with first/middle/last frame positioning - Camera control (pan, tilt, zoom, roll) - Motion brush for targeted animation - Multiple resolutions """ prompt = input_data.get("prompt", "") model = input_data.get("model", "gen3_alpha_turbo") duration = min(input_data.get("duration", 5), 10) resolution = input_data.get("resolution", "1280x768") frame_position = input_data.get("frame_position", "first") # first, middle, last # Camera control settings camera_control = input_data.get("camera_control", {}) pan = camera_control.get("pan", 0) # -10 to 10, horizontal tilt = camera_control.get("tilt", 0) # -10 to 10, vertical zoom = camera_control.get("zoom", 0) # -10 to 10 roll = camera_control.get("roll", 0) # -10 to 10, rotation static = camera_control.get("static", False) # Reduce camera motion job.api_model = model db.commit() # Get input image if provided image_data = None if job.input_asset_ids: input_asset = db.query(Asset).filter(Asset.id == job.input_asset_ids[0]).first() if input_asset and os.path.exists(input_asset.file_path): with open(input_asset.file_path, "rb") as f: image_data = base64.b64encode(f.read()).decode() async with httpx.AsyncClient(timeout=600) as client: # Build payload based on whether we have an image if image_data: # Image to video payload = { "model": model, "promptImage": f"data:image/png;base64,{image_data}", "promptText": prompt, "duration": duration, "ratio": resolution.replace("x", ":") } # Frame position (Gen-3 Alpha Turbo supports first, middle, last) if model == "gen3_alpha_turbo": payload["imagePosition"] = frame_position endpoint = "https://api.runwayml.com/v1/image_to_video" else: # Text to video payload = { "model": model, "promptText": prompt, "duration": duration, "ratio": resolution.replace("x", ":") } endpoint = "https://api.runwayml.com/v1/text_to_video" # Add camera control if any values are set if any([pan, tilt, zoom, roll]) and not static: payload["cameraControl"] = { "pan": pan, "tilt": tilt, "zoom": zoom, "roll": roll } elif static: payload["cameraControl"] = {"static": True} # Create generation task response = await client.post( endpoint, headers={ "Authorization": f"Bearer {settings.runway_api_key}", "Content-Type": "application/json", "X-Runway-Version": "2024-11-06" }, json=payload ) response.raise_for_status() result = response.json() task_id = result.get("id") job.progress = 30 job.api_request_id = task_id db.commit() # Poll for completion for i in range(180): # Wait up to 6 minutes await asyncio.sleep(2) status_response = await client.get( f"https://api.runwayml.com/v1/tasks/{task_id}", headers={ "Authorization": f"Bearer {settings.runway_api_key}", "X-Runway-Version": "2024-11-06" } ) status_data = status_response.json() status = status_data.get("status", "") if status == "SUCCEEDED": output_url = status_data.get("output", [None])[0] if output_url: video_response = await client.get(output_url) filename = f"runway_{model}_{uuid4()}.mp4" return video_response.content, filename break elif status == "FAILED": raise ValueError(f"Runway generation failed: {status_data.get('error')}") job.progress = min(30 + (i * 0.35), 90) db.commit() return None, None async def _generate_veo(job, input_data: dict, db) -> Tuple[Optional[bytes], Optional[str]]: """Generate video using Google Veo 3/3.1 Supports: - Text to video with native audio generation - First frame image (video starts from this image) - Last frame image (video ends at this image, creates frame interpolation) - Reference images (up to 3, for character/style/asset consistency - Veo 3.1 only) - Video extension (continue from previous video - Veo 3.1 only) - Negative prompts - Multiple resolutions (720p, 1080p) - Duration options (4, 6, 8 seconds) Audio Prompting: - Use quotation marks for dialogue: "She said, 'Hello'" - Describe sound effects: "tires screeching loudly" - Add ambient sounds: "quiet forest with birds chirping" """ prompt = input_data.get("prompt", "") model = input_data.get("model", "veo-3.1-generate-preview") duration = input_data.get("duration", 8) aspect_ratio = input_data.get("aspect_ratio", "16:9") resolution = input_data.get("resolution", "720p") negative_prompt = input_data.get("negative_prompt", "") person_generation = input_data.get("person_generation") # "allow_adult" or None # Frame control first_frame_asset_id = input_data.get("first_frame_asset_id") last_frame_asset_id = input_data.get("last_frame_asset_id") reference_asset_ids = input_data.get("reference_asset_ids", [])[:3] # Max 3 for Veo 3.1 # Video extension (Veo 3.1 only) extend_video_asset_id = input_data.get("extend_video_asset_id") # Validate duration model_config = VEO_MODELS.get(model, VEO_MODELS["veo-3.1-generate-preview"]) valid_durations = model_config.get("durations", [4, 6, 8]) if duration not in valid_durations: duration = max(valid_durations) # Validate resolution valid_resolutions = model_config.get("resolutions", ["720p"]) if resolution not in valid_resolutions: resolution = valid_resolutions[0] job.api_model = model db.commit() try: from google import genai from google.genai import types # Initialize client client = genai.Client(api_key=settings.google_api_key) job.progress = 20 db.commit() # Build generation config config_kwargs = { "aspect_ratio": aspect_ratio, } # Add negative prompt if provided if negative_prompt: config_kwargs["negative_prompt"] = negative_prompt # Add person generation setting if specified if person_generation: config_kwargs["person_generation"] = person_generation # Resolution for Veo 3.1 if "3.1" in model or "3.0" in model: config_kwargs["resolution"] = resolution config_kwargs["duration_seconds"] = str(duration) # Prepare first frame image first_frame_image = None if first_frame_asset_id: first_asset = db.query(Asset).filter(Asset.id == first_frame_asset_id).first() if first_asset and os.path.exists(first_asset.file_path): with open(first_asset.file_path, "rb") as f: first_frame_image = types.Image.from_bytes( data=f.read(), mime_type=first_asset.mime_type or "image/png" ) # Prepare last frame for interpolation if last_frame_asset_id: last_asset = db.query(Asset).filter(Asset.id == last_frame_asset_id).first() if last_asset and os.path.exists(last_asset.file_path): with open(last_asset.file_path, "rb") as f: config_kwargs["last_frame"] = types.Image.from_bytes( data=f.read(), mime_type=last_asset.mime_type or "image/png" ) # Reference images for character/style consistency (Veo 3.1 only) if reference_asset_ids and model_config.get("supports_reference_images"): reference_images = [] for ref_id in reference_asset_ids: ref_asset = db.query(Asset).filter(Asset.id == ref_id).first() if ref_asset and os.path.exists(ref_asset.file_path): with open(ref_asset.file_path, "rb") as f: # Create VideoGenerationReferenceImage ref_image = types.VideoGenerationReferenceImage( image=types.Image.from_bytes( data=f.read(), mime_type=ref_asset.mime_type or "image/png" ), reference_type="asset" # or "style" for style reference ) reference_images.append(ref_image) if reference_images: config_kwargs["reference_images"] = reference_images # Video extension (Veo 3.1 only) extend_video = None if extend_video_asset_id and model_config.get("supports_extension"): extend_asset = db.query(Asset).filter(Asset.id == extend_video_asset_id).first() if extend_asset and os.path.exists(extend_asset.file_path): with open(extend_asset.file_path, "rb") as f: extend_video = types.Video.from_bytes( data=f.read(), mime_type=extend_asset.mime_type or "video/mp4" ) config = types.GenerateVideosConfig(**config_kwargs) job.progress = 40 db.commit() # Generate video using the async long-running operation if extend_video: # Video extension mode operation = await asyncio.to_thread( client.models.generate_videos, model=model, video=extend_video, prompt=prompt, config=config ) elif first_frame_image: # Image-to-video mode operation = await asyncio.to_thread( client.models.generate_videos, model=model, image=first_frame_image, prompt=prompt, config=config ) else: # Text-to-video mode operation = await asyncio.to_thread( client.models.generate_videos, model=model, prompt=prompt, config=config ) # Poll for completion (can take 11 seconds to 6 minutes) job.progress = 50 db.commit() max_attempts = 72 # 6 minutes with 5 second intervals for attempt in range(max_attempts): await asyncio.sleep(5) # Check operation status operation = await asyncio.to_thread( client.operations.get, operation ) if operation.done: break # Update progress progress = min(50 + (attempt * 0.5), 90) job.progress = int(progress) db.commit() job.progress = 90 db.commit() # Extract video from response if operation.done and operation.response: generated_videos = operation.response.generated_videos if generated_videos and len(generated_videos) > 0: video = generated_videos[0] # Download the video file video_data = await asyncio.to_thread( client.files.download, file=video.video ) filename = f"veo_{model.replace('.', '_').replace('-', '_')}_{uuid4()}.mp4" return video_data, filename # Check for errors if operation.error: raise ValueError(f"Veo generation failed: {operation.error}") except ImportError: raise ValueError("Google GenAI library not installed. Run: pip install google-genai") except Exception as e: raise ValueError(f"Veo generation error: {str(e)}") return None, None async def extend_video(job_id: str): """Extend an existing video using Veo scene extension""" db = SessionLocal() try: job = db.query(Job).filter(Job.id == job_id).first() if not job: return input_data = job.input_data source_asset_id = input_data.get("source_asset_id") prompt = input_data.get("prompt", "") extension_seconds = min(input_data.get("extension_seconds", 4), 8) if not source_asset_id: raise ValueError("No source video provided for extension") source_asset = db.query(Asset).filter(Asset.id == source_asset_id).first() if not source_asset: raise ValueError("Source video not found") job.progress = 10 job.api_provider = "veo" job.api_model = "veo-3.1-generate-preview" db.commit() # Implementation would use Veo's scene extension API # This extends video by building on the final seconds of the previous clip job.progress = 100 job.status = "completed" job.completed_at = datetime.utcnow() db.commit() except Exception as e: job.status = "failed" job.error_message = str(e) db.commit() finally: db.close() def get_available_models() -> Dict[str, Any]: """Get all available video generation models and their capabilities""" return { "runway": RUNWAY_MODELS, "veo": VEO_MODELS }