video-accessibility/backend/app/services/video_renderer.py
Vadym Samoilenko 31199f8705 chore: push all session changes — backend hardening, tests, apache config, deploy scripts
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-30 15:52:14 +01:00

1696 lines
66 KiB
Python

"""Service for rendering accessible video with embedded audio descriptions using ffmpeg.
FFmpeg operations are dispatched to a dedicated Celery queue (ffmpeg) with concurrency=1
to prevent server overload when multiple render tasks run in parallel.
When FFMPEG_SERVICE_URL is configured, operations are offloaded to Cloud Run for autoscaling.
"""
import asyncio
import json
import os
import tempfile
from pathlib import Path
from typing import Any
from uuid import uuid4
import google.auth.transport.requests
import httpx
from google.auth import default
from google.cloud import storage
from google.oauth2 import id_token
from ..core.config import settings
from ..core.logging import get_logger
from ..models.job import PausePointData, VideoSegmentMetadata
logger = get_logger(__name__)
def _get_cloud_run_id_token(audience: str) -> str:
"""
Get an ID token for authenticating to Cloud Run services.
Uses the service account credentials to generate an ID token
that Cloud Run will accept for authentication.
"""
credentials, _ = default()
request = google.auth.transport.requests.Request()
token = id_token.fetch_id_token(request, audience)
return token
class FFmpegExecutionError(Exception):
"""Raised when an FFmpeg/FFprobe command fails."""
pass
class VideoRendererService:
"""Service for rendering accessible video with embedded audio descriptions."""
def __init__(self):
self.ffmpeg_path = "ffmpeg"
self.ffprobe_path = "ffprobe"
# Audio ducking settings
self.duck_level = getattr(settings, 'accessible_video_duck_level', 0.3)
self.duck_fade_ms = getattr(settings, 'accessible_video_duck_fade_ms', 200)
# Cloud Run support
self._gcs_client: storage.Client | None = None
# Source video caching for Cloud Run (uploaded once, reused across operations)
self._cached_source_gcs_uri: str | None = None
@property
def _use_cloud_run(self) -> bool:
"""Check if Cloud Run FFmpeg service is configured."""
return bool(settings.ffmpeg_service_url)
def _get_gcs_client(self) -> storage.Client:
"""Get or create GCS client for file transfers."""
if self._gcs_client is None:
self._gcs_client = storage.Client()
return self._gcs_client
def _upload_to_gcs_temp(self, local_path: str, prefix: str) -> str:
"""
Upload local file to GCS temp location and return gs:// URI.
Args:
local_path: Path to local file
prefix: Prefix for temp path (e.g., "source", "frame", "audio")
Returns:
GCS URI (gs://bucket/temp/ffmpeg/{prefix}/{uuid}/{filename})
"""
client = self._get_gcs_client()
bucket = client.bucket(settings.gcs_bucket)
filename = os.path.basename(local_path)
gcs_path = f"temp/ffmpeg/{prefix}/{uuid4().hex}/{filename}"
blob = bucket.blob(gcs_path)
blob.upload_from_filename(local_path)
gcs_uri = f"gs://{settings.gcs_bucket}/{gcs_path}"
logger.debug(f"Uploaded {local_path} to {gcs_uri}")
return gcs_uri
def _download_from_gcs_temp(self, gcs_uri: str, local_path: str):
"""
Download file from GCS to local path.
Args:
gcs_uri: GCS URI (gs://bucket/path)
local_path: Local destination path
"""
client = self._get_gcs_client()
# Parse gs:// URI
if not gcs_uri.startswith("gs://"):
raise ValueError(f"Invalid GCS URI: {gcs_uri}")
path_parts = gcs_uri[5:].split("/", 1)
bucket_name = path_parts[0]
blob_path = path_parts[1] if len(path_parts) > 1 else ""
bucket = client.bucket(bucket_name)
blob = bucket.blob(blob_path)
# Ensure parent directory exists
os.makedirs(os.path.dirname(local_path), exist_ok=True)
blob.download_to_filename(local_path)
logger.debug(f"Downloaded {gcs_uri} to {local_path}")
def _delete_gcs_temp(self, gcs_uri: str):
"""
Delete temporary GCS file.
Args:
gcs_uri: GCS URI to delete (gs://bucket/path)
"""
if not gcs_uri:
return
try:
client = self._get_gcs_client()
# Parse gs:// URI
if not gcs_uri.startswith("gs://"):
logger.warning(f"Invalid GCS URI for deletion: {gcs_uri}")
return
path_parts = gcs_uri[5:].split("/", 1)
bucket_name = path_parts[0]
blob_path = path_parts[1] if len(path_parts) > 1 else ""
bucket = client.bucket(bucket_name)
blob = bucket.blob(blob_path)
blob.delete()
logger.debug(f"Deleted {gcs_uri}")
except Exception as e:
# Log but don't fail on cleanup errors
logger.warning(f"Failed to delete GCS temp file {gcs_uri}: {e}")
def _upload_to_gcs_permanent(self, local_path: str, gcs_path: str) -> str:
"""
Upload local file to permanent GCS location (not temp).
Args:
local_path: Path to local file
gcs_path: Full GCS path within the bucket (e.g., "job_id/en/segments/seg_0.mp4")
Returns:
GCS URI (gs://bucket/gcs_path)
"""
client = self._get_gcs_client()
bucket = client.bucket(settings.gcs_bucket)
blob = bucket.blob(gcs_path)
blob.upload_from_filename(local_path)
gcs_uri = f"gs://{settings.gcs_bucket}/{gcs_path}"
logger.debug(f"Uploaded {local_path} to {gcs_uri} (permanent)")
return gcs_uri
async def _call_cloud_run_probe(self, gcs_uri: str) -> dict[str, Any]:
"""
Call Cloud Run FFmpeg service /probe endpoint.
Args:
gcs_uri: GCS URI of video to probe (gs://bucket/path)
Returns:
Probe result with duration and stream info
"""
service_url = settings.ffmpeg_service_url.rstrip("/")
auth_token = _get_cloud_run_id_token(service_url)
async with httpx.AsyncClient(timeout=600.0) as client:
response = await client.post(
f"{service_url}/probe",
json={"gcs_uri": gcs_uri}, # Matches ProbeRequest model
headers={"Authorization": f"Bearer {auth_token}"}
)
response.raise_for_status()
return response.json()
async def _call_cloud_run_endpoint(
self,
endpoint: str,
payload: dict[str, Any],
output_gcs_uri: str | None = None
) -> dict[str, Any]:
"""
Call Cloud Run FFmpeg service endpoint.
Args:
endpoint: Endpoint path (e.g., "/encode-segment", "/extract-frame")
payload: Request payload
output_gcs_uri: Expected output GCS URI (for cleanup on error)
Returns:
Response JSON
"""
service_url = settings.ffmpeg_service_url.rstrip("/")
auth_token = _get_cloud_run_id_token(service_url)
async with httpx.AsyncClient(timeout=600.0) as client:
try:
response = await client.post(
f"{service_url}{endpoint}",
json=payload,
headers={"Authorization": f"Bearer {auth_token}"}
)
response.raise_for_status()
return response.json()
except httpx.HTTPStatusError as e:
# Try to get error details from response
try:
error_detail = e.response.json().get("detail", str(e))
except Exception:
error_detail = str(e)
raise FFmpegExecutionError(f"Cloud Run {endpoint} failed: {error_detail}")
async def _dispatch_ffmpeg(self, cmd: list[str], timeout: int = 3600) -> dict[str, Any]:
"""
Dispatch FFmpeg command to the dedicated ffmpeg queue and wait for result.
This method bridges the async render task with the sync Celery task.
Uses apply_async and polls for completion to avoid blocking the event loop.
Args:
cmd: FFmpeg command list
timeout: Command timeout in seconds
Returns:
dict with 'success', 'stdout', 'stderr', 'returncode'
Raises:
FFmpegExecutionError: If the command fails
"""
from celery.result import allow_join_result
from ..tasks.ffmpeg_operations import run_ffmpeg_command
# Dispatch to ffmpeg queue
task_result = run_ffmpeg_command.apply_async(
args=[cmd, timeout],
queue='ffmpeg'
)
# Poll for result with async sleep to avoid blocking
while not task_result.ready():
await asyncio.sleep(0.5)
# Get result - use allow_join_result since we're calling from within a task
# This is safe because we've already confirmed the task is complete via ready()
with allow_join_result():
result = task_result.get(timeout=30)
if not result['success']:
# FFmpeg writes version/config to stderr first, actual error is at the end
# Show last 1500 chars to capture the actual error message
stderr = result['stderr']
if len(stderr) > 1500:
stderr = f"...{stderr[-1500:]}"
raise FFmpegExecutionError(
f"FFmpeg failed with code {result['returncode']}: {stderr}"
)
return result
async def _dispatch_ffprobe(self, cmd: list[str]) -> dict[str, Any]:
"""
Dispatch FFprobe command to the dedicated ffmpeg queue and wait for result.
Args:
cmd: FFprobe command list
Returns:
dict with 'success', 'stdout', 'stderr', 'returncode'
Raises:
FFmpegExecutionError: If the command fails
"""
from celery.result import allow_join_result
from ..tasks.ffmpeg_operations import run_ffprobe_command
# Dispatch to ffmpeg queue
task_result = run_ffprobe_command.apply_async(
args=[cmd],
queue='ffmpeg'
)
# Poll for result with async sleep (shorter interval for fast probe)
while not task_result.ready():
await asyncio.sleep(0.2)
# Get result - use allow_join_result since we're calling from within a task
# This is safe because we've already confirmed the task is complete via ready()
with allow_join_result():
result = task_result.get(timeout=30)
if not result['success']:
raise FFmpegExecutionError(
f"FFprobe failed with code {result['returncode']}: {result['stderr'][:200]}"
)
return result
async def render_accessible_video(
self,
source_video_path: str,
ad_segments: list[tuple[int, str]], # [(cue_index, mp3_path), ...]
analysis: dict[str, Any],
output_path: str,
persist_segments: bool = False,
gcs_segment_prefix: str | None = None,
) -> tuple[str, list[dict] | None, list[VideoSegmentMetadata] | None, list[PausePointData] | None]:
"""
Render accessible video based on Gemini analysis.
Args:
source_video_path: Path to source MP4
ad_segments: List of (cue_index, mp3_path) tuples for each AD segment
analysis: Gemini analysis dict with method and placements
output_path: Where to save the output MP4
persist_segments: If True, upload video segments to GCS for QC editing
gcs_segment_prefix: GCS path prefix for segments (e.g., "job_id/en/segments/")
Returns:
Tuple of (output_path, updated_placements, segment_metadata, pause_points)
- output_path: Path to rendered accessible video
- updated_placements: Placements with actual_freeze_duration added (pause-insert only)
- segment_metadata: List of VideoSegmentMetadata if persist_segments=True, else None
- pause_points: List of PausePointData if persist_segments=True, else None
"""
method = analysis.get("method", "pause_insert")
# Cloud Run optimization: Upload source video once and cache for all operations
if self._use_cloud_run:
logger.info("Cloud Run mode: uploading source video to GCS for caching")
self._cached_source_gcs_uri = self._upload_to_gcs_temp(source_video_path, "source")
logger.info(f"Source video cached at: {self._cached_source_gcs_uri}")
try:
if method == "overlay":
result_path = await self._render_overlay_method(
source_video_path, ad_segments, analysis, output_path
)
return (result_path, None, None, None)
else:
return await self._render_pause_insert_method(
source_video_path, ad_segments, analysis, output_path,
persist_segments=persist_segments,
gcs_segment_prefix=gcs_segment_prefix
)
finally:
# Clean up cached source video from GCS
if self._use_cloud_run and self._cached_source_gcs_uri:
logger.info(f"Cleaning up cached source video: {self._cached_source_gcs_uri}")
self._delete_gcs_temp(self._cached_source_gcs_uri)
self._cached_source_gcs_uri = None
async def _render_overlay_method(
self,
source_video_path: str,
ad_segments: list[tuple[int, str]],
analysis: dict[str, Any],
output_path: str,
) -> str:
"""
Render with overlay method:
1. Create AD audio track with segments at target times
2. Apply ducking to original audio during AD playback
3. Mix tracks together
4. Mux with original video (copy video stream)
"""
logger.info(f"Starting overlay render for {source_video_path}")
placements = analysis.get("placements", [])
with tempfile.TemporaryDirectory() as temp_dir:
temp_dir_path = Path(temp_dir)
# Get source video duration
duration = await self._get_video_duration(source_video_path)
logger.info(f"Source video duration: {duration}s")
# Build ducking filter for original audio
duck_filters = []
for placement in placements:
duck_start = placement.get("duck_start")
duck_end = placement.get("duck_end")
if duck_start is not None and duck_end is not None:
# Volume filter: reduce to duck_level during AD, with fade
duck_filters.append(
f"volume=enable='between(t,{duck_start},{duck_end})':"
f"volume={self.duck_level}"
)
# Build ffmpeg command
inputs = ["-i", source_video_path]
filter_parts = []
# Add each AD segment as input
for cue_index, mp3_path in ad_segments:
inputs.extend(["-i", mp3_path])
# Build complex filter
# First, apply ducking to original audio
if duck_filters:
ducked_filter = ",".join(duck_filters)
filter_parts.append(f"[0:a]{ducked_filter}[ducked]")
base_audio = "[ducked]"
else:
base_audio = "[0:a]"
# Add delay to each AD segment and mix
ad_labels = []
for i, (cue_index, mp3_path) in enumerate(ad_segments):
# Find the placement for this cue
placement = next(
(p for p in placements if p.get("ad_cue_index") == cue_index),
None
)
if placement:
target_time = placement.get("target_start_time", 0)
delay_ms = int(target_time * 1000)
input_idx = i + 1 # 0 is source video
ad_label = f"ad{i}"
filter_parts.append(
f"[{input_idx}:a]adelay={delay_ms}|{delay_ms}[{ad_label}]"
)
ad_labels.append(f"[{ad_label}]")
# Mix all audio streams together
if ad_labels:
all_audio = base_audio + "".join(ad_labels)
num_inputs = 1 + len(ad_labels)
filter_parts.append(
f"{all_audio}amix=inputs={num_inputs}:duration=first:dropout_transition=0:normalize=0[mixed]"
)
audio_output = "[mixed]"
else:
audio_output = base_audio.replace("[", "").replace("]", "")
filter_complex = ";".join(filter_parts)
# Build final command
cmd = [
self.ffmpeg_path,
"-y", # Overwrite output
*inputs,
]
if filter_complex:
cmd.extend(["-filter_complex", filter_complex])
cmd.extend([
"-map", "0:v",
"-map", audio_output,
"-c:v", "copy", # Copy video stream (no re-encoding)
"-c:a", "aac",
"-b:a", "192k",
output_path
])
else:
cmd.extend([
"-c:v", "copy",
"-c:a", "copy",
output_path
])
logger.info("Running ffmpeg overlay command...")
await self._run_ffmpeg(cmd)
logger.info(f"Overlay render complete: {output_path}")
return output_path
async def _render_pause_insert_method(
self,
source_video_path: str,
ad_segments: list[tuple[int, str]],
analysis: dict[str, Any],
output_path: str,
persist_segments: bool = False,
gcs_segment_prefix: str | None = None,
) -> tuple[str, list[dict], list[VideoSegmentMetadata] | None, list[PausePointData] | None]:
"""
Render with pause-insert method:
1. Split video at each pause point
2. Extract freeze frame at each pause point
3. Create freeze-frame segment with AD audio
4. Concatenate all segments
5. Optionally persist segments to GCS for QC editing
Args:
persist_segments: If True, upload segments to GCS and return metadata
gcs_segment_prefix: GCS path prefix (e.g., "job_id/en/segments/")
Returns:
Tuple of (output_path, updated_placements, segment_metadata, pause_points)
"""
logger.info(f"Starting pause-insert render for {source_video_path}")
placements = analysis.get("placements", [])
# Defensive: enforce pause_point monotonicity in cue_index order before temporal sort.
# Whisper refinement or user adjustments can cause a later cue's pause_point to
# precede an earlier cue's, which would reorder cues in the rendered timeline.
cue_ordered = sorted(
[p for p in placements if p.get("pause_point") is not None],
key=lambda p: p.get("ad_cue_index", 0)
)
for i in range(1, len(cue_ordered)):
prev_pp = cue_ordered[i - 1]["pause_point"]
curr_pp = cue_ordered[i]["pause_point"]
if curr_pp < prev_pp:
logger.warning(
f"Renderer monotonicity fix: cue {cue_ordered[i].get('ad_cue_index')} "
f"pause_point {curr_pp:.2f}s < cue {cue_ordered[i-1].get('ad_cue_index')} "
f"pause_point {prev_pp:.2f}s, clamping to {prev_pp:.2f}s"
)
cue_ordered[i]["pause_point"] = prev_pp
cue_ordered[i]["resume_from"] = prev_pp
# Sort by pause_point time, with ad_cue_index as secondary key
# to ensure consolidated cues (sharing same pause_point) maintain VTT order
sorted_placements = sorted(
cue_ordered,
key=lambda p: (p["pause_point"], p.get("ad_cue_index", 0))
)
# Debug logging for pause points (midpoint algorithm with silence buffers)
logger.info(f"Pause-insert (midpoint + 500ms silence buffers): {len(sorted_placements)} placements")
for i, p in enumerate(sorted_placements):
logger.info(
f" Placement {i}: cue_index={p.get('ad_cue_index')}, "
f"pause_at={p.get('pause_point'):.2f}s, ad_duration={p.get('ad_duration'):.2f}s, "
f"consolidated={p.get('consolidated_with_previous', False)}"
)
if not sorted_placements:
logger.warning("No pause points found, copying source video")
await self._copy_video(source_video_path, output_path)
return (output_path, [], None, None)
with tempfile.TemporaryDirectory() as temp_dir:
temp_dir_path = Path(temp_dir)
# Get detailed video AND audio properties for uniform encoding
video_props = await self._get_video_properties(source_video_path)
source_duration = await self._get_video_duration(source_video_path)
logger.info(f"Source Properties: {video_props}, Duration: {source_duration:.2f}s")
# Create a mapping of cue_index to mp3_path
cue_to_mp3 = {cue_index: mp3_path for cue_index, mp3_path in ad_segments}
# Pre-process placements and validate
valid_placements = []
current_time = 0.0
for i, placement in enumerate(sorted_placements):
pause_point = placement["pause_point"]
cue_index = placement["ad_cue_index"]
ad_duration = placement["ad_duration"]
# Validate pause_point is within video bounds
if pause_point >= source_duration:
logger.warning(
f"Cue {cue_index}: pause_point {pause_point:.2f}s exceeds video duration "
f"{source_duration:.2f}s, clamping to {source_duration - 0.1:.2f}s"
)
pause_point = max(0, source_duration - 0.1)
ad_mp3_path = cue_to_mp3.get(cue_index)
if not ad_mp3_path:
logger.warning(f"No AD audio found for cue {cue_index}, skipping")
continue
valid_placements.append({
"index": i,
"pause_point": pause_point,
"cue_index": cue_index,
"ad_duration": ad_duration,
"ad_mp3_path": ad_mp3_path,
"segment_start": current_time,
})
current_time = pause_point
# Track final segment info
final_segment_start = current_time
final_segment_needed = final_segment_start < source_duration
# ============================================================
# PARALLEL PHASE 1: Generate shared silence + extract all frames + all video segments
# ============================================================
logger.info(f"Phase 1: Parallel extraction of {len(valid_placements)} frames and video segments")
silence_duration = 0.5 # 500ms shared by all
silence_path = temp_dir_path / "silence_shared.m4a"
# Build tasks for phase 1
phase1_tasks = []
# Task: Generate silence (just once, shared by all)
phase1_tasks.append(self._generate_silence(silence_duration, str(silence_path), video_props))
# Tasks: Extract all video segments
video_segment_paths = {}
for p in valid_placements:
i = p["index"]
if p["pause_point"] > p["segment_start"]:
segment_path = temp_dir_path / f"segment_{i}_video.mp4"
video_segment_paths[i] = str(segment_path)
phase1_tasks.append(self._extract_segment_reencoded(
source_video_path,
p["segment_start"],
p["pause_point"] - p["segment_start"],
str(segment_path),
video_props
))
# Task: Extract final segment if needed
final_segment_path = None
if final_segment_needed:
final_segment_path = temp_dir_path / "segment_final.mp4"
phase1_tasks.append(self._extract_segment_reencoded(
source_video_path,
final_segment_start,
source_duration - final_segment_start,
str(final_segment_path),
video_props
))
# Tasks: Extract all freeze frames
freeze_frame_paths = {}
for p in valid_placements:
i = p["index"]
freeze_frame_path = temp_dir_path / f"freeze_{i}.png"
freeze_frame_paths[i] = str(freeze_frame_path)
phase1_tasks.append(self._extract_frame(
source_video_path,
p["pause_point"],
str(freeze_frame_path)
))
# Execute phase 1 in parallel
await asyncio.gather(*phase1_tasks)
logger.info(f"Phase 1 complete: extracted {len(freeze_frame_paths)} frames, {len(video_segment_paths)} video segments")
# ============================================================
# PARALLEL PHASE 2: Concatenate all audio tracks
# ============================================================
logger.info(f"Phase 2: Parallel audio concatenation for {len(valid_placements)} placements")
phase2_tasks = []
combined_audio_paths = {}
for p in valid_placements:
i = p["index"]
combined_audio_path = temp_dir_path / f"combined_audio_{i}.m4a"
combined_audio_paths[i] = str(combined_audio_path)
phase2_tasks.append(self._concatenate_audio(
[str(silence_path), p["ad_mp3_path"], str(silence_path)],
str(combined_audio_path),
video_props
))
await asyncio.gather(*phase2_tasks)
logger.info(f"Phase 2 complete: concatenated {len(combined_audio_paths)} audio tracks")
# ============================================================
# PARALLEL PHASE 3: Create all freeze segments
# ============================================================
logger.info(f"Phase 3: Parallel freeze segment creation for {len(valid_placements)} placements")
phase3_tasks = []
freeze_segment_paths = {}
for p in valid_placements:
i = p["index"]
cue_index = p["cue_index"]
ad_duration = p["ad_duration"]
total_freeze_duration = ad_duration + (2 * silence_duration)
logger.info(
f"Cue {cue_index}: Freeze segment with silence buffers - "
f"500ms + AD={ad_duration:.2f}s + 500ms = {total_freeze_duration:.2f}s"
)
freeze_segment_path = temp_dir_path / f"freeze_segment_{i}.mp4"
freeze_segment_paths[i] = str(freeze_segment_path)
phase3_tasks.append(self._create_freeze_segment_matched(
freeze_frame_paths[i],
combined_audio_paths[i],
total_freeze_duration,
str(freeze_segment_path),
video_props
))
await asyncio.gather(*phase3_tasks)
logger.info(f"Phase 3 complete: created {len(freeze_segment_paths)} freeze segments")
# ============================================================
# PHASE 3.5: Measure actual freeze segment durations for VTT retiming
# ============================================================
# NOTE: Use _get_video_duration_local directly since freeze segments are
# local files. Using _get_video_duration would incorrectly use the cached
# source video URI in Cloud Run mode instead of measuring the freeze segment.
logger.info("Measuring actual freeze segment durations...")
for p in valid_placements:
i = p["index"]
freeze_path = freeze_segment_paths[i]
actual_duration = await self._get_video_duration_local(freeze_path)
p["actual_freeze_duration"] = actual_duration
# Log any discrepancy between expected and actual duration
expected = p["ad_duration"] + (2 * silence_duration)
discrepancy = actual_duration - expected
if abs(discrepancy) > 0.01: # 10ms threshold
logger.warning(
f"Freeze segment duration mismatch for cue {p['cue_index']}: "
f"expected={expected:.3f}s, actual={actual_duration:.3f}s, "
f"discrepancy={discrepancy:+.3f}s"
)
else:
logger.debug(
f"Freeze segment cue {p['cue_index']}: duration={actual_duration:.3f}s (expected={expected:.3f}s)"
)
# ============================================================
# PHASE 4: Assemble segment list in correct order
# ============================================================
segment_files = []
for p in valid_placements:
i = p["index"]
# Add video segment if it exists
if i in video_segment_paths:
segment_files.append(video_segment_paths[i])
# Add freeze segment
segment_files.append(freeze_segment_paths[i])
# Add final segment
logger.info(
f"Final segment check: current_time={final_segment_start:.2f}s, "
f"source_duration={source_duration:.2f}s, "
f"remaining={source_duration - final_segment_start:.2f}s"
)
if final_segment_path:
segment_files.append(str(final_segment_path))
logger.info(f"Added final segment: {final_segment_start:.2f}s to {source_duration:.2f}s")
else:
logger.info("No final segment needed (current_time >= source_duration)")
# Debug: Log all segments before concatenation
logger.info(f"Total segments to concatenate: {len(segment_files)}")
for idx, seg in enumerate(segment_files):
logger.info(f" Segment {idx}: {seg}")
# 5. Concatenate all segments
if segment_files:
await self._concatenate_segments(segment_files, output_path, temp_dir_path)
else:
await self._copy_video(source_video_path, output_path)
logger.info(f"Pause-insert render complete: {output_path}")
# Build updated placements with actual_freeze_duration
# Map from cue_index to actual_freeze_duration
actual_durations = {
p["cue_index"]: p["actual_freeze_duration"]
for p in valid_placements
}
# Update original placements with actual freeze durations
updated_placements = []
for placement in sorted_placements:
updated = placement.copy()
cue_index = placement.get("ad_cue_index")
if cue_index in actual_durations:
updated["actual_freeze_duration"] = actual_durations[cue_index]
updated_placements.append(updated)
# ============================================================
# PHASE 5: Persist segments to GCS for QC editing (optional)
# ============================================================
segment_metadata_list: list[VideoSegmentMetadata] | None = None
pause_point_data_list: list[PausePointData] | None = None
if persist_segments and gcs_segment_prefix:
logger.info(f"Persisting {len(segment_files)} segments to GCS at {gcs_segment_prefix}")
segment_metadata_list = []
segment_idx = 0
cumulative_time_ms = 0.0
for p in valid_placements:
i = p["index"]
# Upload video segment if it exists
if i in video_segment_paths:
local_path = video_segment_paths[i]
gcs_path = f"{gcs_segment_prefix}seg_{segment_idx}.mp4"
gcs_uri = self._upload_to_gcs_permanent(local_path, gcs_path)
segment_duration_ms = (p["pause_point"] - p["segment_start"]) * 1000
segment_metadata_list.append(VideoSegmentMetadata(
segment_index=segment_idx,
start_ms=cumulative_time_ms,
end_ms=cumulative_time_ms + segment_duration_ms,
gcs_uri=gcs_uri,
duration_ms=segment_duration_ms,
is_freeze_frame=False,
cue_index=None
))
cumulative_time_ms += segment_duration_ms
segment_idx += 1
# Upload freeze segment
freeze_local_path = freeze_segment_paths[i]
gcs_path = f"{gcs_segment_prefix}seg_{segment_idx}_freeze.mp4"
gcs_uri = self._upload_to_gcs_permanent(freeze_local_path, gcs_path)
freeze_duration_ms = p["actual_freeze_duration"] * 1000
segment_metadata_list.append(VideoSegmentMetadata(
segment_index=segment_idx,
start_ms=cumulative_time_ms,
end_ms=cumulative_time_ms + freeze_duration_ms,
gcs_uri=gcs_uri,
duration_ms=freeze_duration_ms,
is_freeze_frame=True,
cue_index=p["cue_index"]
))
cumulative_time_ms += freeze_duration_ms
segment_idx += 1
# Upload final segment if exists
if final_segment_path:
gcs_path = f"{gcs_segment_prefix}seg_{segment_idx}.mp4"
gcs_uri = self._upload_to_gcs_permanent(str(final_segment_path), gcs_path)
final_duration_ms = (source_duration - final_segment_start) * 1000
segment_metadata_list.append(VideoSegmentMetadata(
segment_index=segment_idx,
start_ms=cumulative_time_ms,
end_ms=cumulative_time_ms + final_duration_ms,
gcs_uri=gcs_uri,
duration_ms=final_duration_ms,
is_freeze_frame=False,
cue_index=None
))
logger.info(f"Persisted {len(segment_metadata_list)} segments to GCS")
# Build PausePointData list with bounds
# Pause points should be in RENDERED video coordinates (matching segment timeline)
pause_point_data_list = []
# Find the start position of each freeze frame segment in the rendered timeline
freeze_frame_starts = {}
for seg in segment_metadata_list:
if seg.is_freeze_frame and seg.cue_index is not None:
freeze_frame_starts[seg.cue_index] = seg.start_ms
for idx, p in enumerate(valid_placements):
cue_index = p["cue_index"]
# Pause point is at the START of the freeze frame in the rendered timeline
pause_ms = freeze_frame_starts.get(cue_index, p["pause_point"] * 1000)
# Find the freeze segment for this cue to get its end position
freeze_seg = next((s for s in segment_metadata_list if s.is_freeze_frame and s.cue_index == cue_index), None)
# Compute min bound: end of previous AD segment (or 0 for first)
if idx == 0:
min_bound_ms = 0.0
else:
prev_cue_index = valid_placements[idx - 1]["cue_index"]
prev_freeze_seg = next((s for s in segment_metadata_list if s.is_freeze_frame and s.cue_index == prev_cue_index), None)
if prev_freeze_seg:
min_bound_ms = prev_freeze_seg.end_ms
else:
min_bound_ms = 0.0
# Compute max bound: start of next freeze frame (or total duration for last)
if idx == len(valid_placements) - 1:
# Max bound is the total rendered video duration
max_bound_ms = segment_metadata_list[-1].end_ms if segment_metadata_list else source_duration * 1000
else:
next_cue_index = valid_placements[idx + 1]["cue_index"]
next_pause_ms = freeze_frame_starts.get(next_cue_index, valid_placements[idx + 1]["pause_point"] * 1000)
max_bound_ms = next_pause_ms
pause_point_data_list.append(PausePointData(
cue_index=cue_index,
original_ms=pause_ms, # Rendered timeline position
source_ms=p["pause_point"] * 1000, # Source video cut point
adjusted_ms=None,
min_bound_ms=min_bound_ms,
max_bound_ms=max_bound_ms
))
logger.info(f"Built {len(pause_point_data_list)} pause point data entries")
return (output_path, updated_placements, segment_metadata_list, pause_point_data_list)
async def _get_video_duration(self, video_path: str) -> float:
"""Get video duration in seconds using ffprobe."""
if self._use_cloud_run:
return await self._get_video_duration_cloud_run(video_path)
return await self._get_video_duration_local(video_path)
async def _get_video_duration_local(self, video_path: str) -> float:
"""Get video duration using local ffprobe via Celery queue."""
cmd = [
self.ffprobe_path,
"-v", "quiet",
"-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1",
video_path
]
result = await self._dispatch_ffprobe(cmd)
return float(result['stdout'].strip())
async def _get_video_duration_cloud_run(self, video_path: str) -> float:
"""Get video duration via Cloud Run FFmpeg service."""
# Use cached source if available, otherwise upload
if self._cached_source_gcs_uri:
gcs_uri = self._cached_source_gcs_uri
else:
gcs_uri = self._upload_to_gcs_temp(video_path, "probe")
try:
result = await self._call_cloud_run_probe(gcs_uri)
return result["duration"]
finally:
# Clean up if we uploaded specifically for this call
if not self._cached_source_gcs_uri:
self._delete_gcs_temp(gcs_uri)
async def _get_video_properties(self, video_path: str) -> dict[str, Any]:
"""Get detailed video and audio properties."""
if self._use_cloud_run:
return await self._get_video_properties_cloud_run(video_path)
return await self._get_video_properties_local(video_path)
async def _get_video_properties_local(self, video_path: str) -> dict[str, Any]:
"""Get video properties using local ffprobe via Celery queue."""
cmd = [
self.ffprobe_path,
"-v", "quiet",
"-show_streams",
"-of", "json",
video_path
]
result = await self._dispatch_ffprobe(cmd)
data = json.loads(result['stdout'])
# Defaults (44100 is common for MP3, but we detect from source)
props = {
"width": 1920,
"height": 1080,
"fps": 30.0,
"sample_rate": "44100",
"channels": "2",
"pix_fmt": "yuv420p",
"codec": "h264"
}
for stream in data.get("streams", []):
if stream.get("codec_type") == "video":
props["width"] = stream.get("width", props["width"])
props["height"] = stream.get("height", props["height"])
props["pix_fmt"] = stream.get("pix_fmt", props["pix_fmt"])
props["codec"] = stream.get("codec_name", props["codec"])
# Parse frame rate (e.g., "30000/1001" or "30/1")
fps_str = stream.get("r_frame_rate", "30/1")
if "/" in fps_str:
num, den = fps_str.split("/")
props["fps"] = float(num) / float(den)
else:
props["fps"] = float(fps_str)
elif stream.get("codec_type") == "audio":
props["sample_rate"] = stream.get("sample_rate", props["sample_rate"])
props["channels"] = str(stream.get("channels", props["channels"]))
return props
async def _get_video_properties_cloud_run(self, video_path: str) -> dict[str, Any]:
"""Get video properties via Cloud Run FFmpeg service."""
# Use cached source if available, otherwise upload
if self._cached_source_gcs_uri:
gcs_uri = self._cached_source_gcs_uri
else:
gcs_uri = self._upload_to_gcs_temp(video_path, "probe")
try:
result = await self._call_cloud_run_probe(gcs_uri)
# Defaults
props = {
"width": 1920,
"height": 1080,
"fps": 30.0,
"sample_rate": "44100",
"channels": "2",
"pix_fmt": "yuv420p",
"codec": "h264"
}
# Extract from probe result
for stream in result.get("streams", []):
if stream.get("codec_type") == "video":
props["width"] = stream.get("width", props["width"])
props["height"] = stream.get("height", props["height"])
props["pix_fmt"] = stream.get("pix_fmt", props["pix_fmt"])
props["codec"] = stream.get("codec_name", props["codec"])
fps_str = stream.get("r_frame_rate", "30/1")
if "/" in fps_str:
num, den = fps_str.split("/")
props["fps"] = float(num) / float(den)
else:
props["fps"] = float(fps_str)
elif stream.get("codec_type") == "audio":
props["sample_rate"] = stream.get("sample_rate", props["sample_rate"])
props["channels"] = str(stream.get("channels", props["channels"]))
return props
finally:
# Clean up if we uploaded specifically for this call
if not self._cached_source_gcs_uri:
self._delete_gcs_temp(gcs_uri)
async def _extract_segment(
self,
source_path: str,
start_time: float,
duration: float,
output_path: str
):
"""Extract a video segment using ffmpeg (stream copy - for overlay method)."""
cmd = [
self.ffmpeg_path,
"-y",
"-ss", str(start_time),
"-i", source_path,
"-t", str(duration),
"-c", "copy",
"-avoid_negative_ts", "make_zero",
output_path
]
await self._run_ffmpeg(cmd)
async def _extract_segment_reencoded(
self,
source_path: str,
start_time: float,
duration: float,
output_path: str,
props: dict[str, Any]
):
"""
Extract segment with RE-ENCODING for frame-accurate cuts.
Crucial for pause-insert method to avoid:
- Keyframe-only cuts causing audio dropouts
- Timestamp desynchronization
"""
if self._use_cloud_run:
await self._extract_segment_reencoded_cloud_run(
source_path, start_time, duration, output_path, props
)
else:
await self._extract_segment_reencoded_local(
source_path, start_time, duration, output_path, props
)
async def _extract_segment_reencoded_local(
self,
source_path: str,
start_time: float,
duration: float,
output_path: str,
props: dict[str, Any]
):
"""Extract segment with re-encoding using local ffmpeg via Celery queue."""
cmd = [
self.ffmpeg_path,
"-y",
"-ss", str(start_time),
"-i", source_path,
"-t", str(duration),
# Video Encoding
"-c:v", "libx264",
"-preset", "fast",
"-pix_fmt", props["pix_fmt"],
"-r", str(props["fps"]),
# Audio Encoding (Force match source)
"-c:a", "aac",
"-ar", props["sample_rate"],
"-ac", props["channels"],
"-b:a", "192k",
# Ensure timestamp continuity
"-video_track_timescale", "90000",
output_path
]
await self._run_ffmpeg(cmd)
async def _extract_segment_reencoded_cloud_run(
self,
source_path: str,
start_time: float,
duration: float,
output_path: str,
props: dict[str, Any]
):
"""Extract segment with re-encoding via Cloud Run FFmpeg service."""
# Use cached source GCS URI
if not self._cached_source_gcs_uri:
raise FFmpegExecutionError("Source video not cached for Cloud Run operation")
output_gcs_uri = f"gs://{settings.gcs_bucket}/temp/ffmpeg/segment/{uuid4().hex}/segment.mp4"
try:
await self._call_cloud_run_endpoint(
"/encode-segment",
{
"source_gcs_uri": self._cached_source_gcs_uri,
"output_gcs_uri": output_gcs_uri,
"start_time": start_time,
"duration": duration,
"width": props["width"],
"height": props["height"],
"fps": props["fps"],
"pix_fmt": props["pix_fmt"],
"sample_rate": props["sample_rate"],
"channels": props["channels"],
}
)
# Download result to local path
self._download_from_gcs_temp(output_gcs_uri, output_path)
finally:
self._delete_gcs_temp(output_gcs_uri)
async def _extract_frame(self, video_path: str, time_point: float, output_path: str):
"""Extract a single frame as PNG using ffmpeg."""
if self._use_cloud_run:
await self._extract_frame_cloud_run(video_path, time_point, output_path)
else:
await self._extract_frame_local(video_path, time_point, output_path)
async def _extract_frame_local(self, video_path: str, time_point: float, output_path: str):
"""Extract frame using local ffmpeg via Celery queue."""
logger.debug(f"Extracting frame at {time_point:.2f}s from {video_path}")
cmd = [
self.ffmpeg_path,
"-y",
"-ss", str(time_point),
"-i", video_path,
"-frames:v", "1",
"-q:v", "2",
output_path
]
await self._run_ffmpeg(cmd)
# Verify frame was actually created (FFmpeg can "succeed" but produce nothing
# if time_point is beyond video duration)
if not os.path.exists(output_path):
raise FileNotFoundError(
f"Frame extraction failed: no frame created at {time_point:.2f}s "
f"(time may be beyond video duration)"
)
async def _extract_frame_cloud_run(self, video_path: str, time_point: float, output_path: str):
"""Extract frame via Cloud Run FFmpeg service."""
if not self._cached_source_gcs_uri:
raise FFmpegExecutionError("Source video not cached for Cloud Run operation")
output_gcs_uri = f"gs://{settings.gcs_bucket}/temp/ffmpeg/frame/{uuid4().hex}/frame.png"
try:
await self._call_cloud_run_endpoint(
"/extract-frame",
{
"source_gcs_uri": self._cached_source_gcs_uri,
"output_gcs_uri": output_gcs_uri,
"time_point": time_point,
}
)
# Download result to local path
self._download_from_gcs_temp(output_gcs_uri, output_path)
# Verify frame was actually created
if not os.path.exists(output_path):
raise FileNotFoundError(
f"Frame extraction failed: no frame created at {time_point:.2f}s "
f"(time may be beyond video duration)"
)
finally:
self._delete_gcs_temp(output_gcs_uri)
async def _create_freeze_segment(
self,
frame_path: str,
audio_path: str,
duration: float,
output_path: str,
video_props: dict[str, Any]
):
"""Create a freeze-frame video segment with audio overlay (for overlay method)."""
width = video_props.get("width", 1920)
height = video_props.get("height", 1080)
fps = video_props.get("fps", 30)
cmd = [
self.ffmpeg_path,
"-y",
"-loop", "1",
"-i", frame_path,
"-i", audio_path,
"-c:v", "libx264",
"-preset", "fast",
"-tune", "stillimage",
"-c:a", "aac",
"-b:a", "192k",
"-pix_fmt", "yuv420p",
"-vf", f"scale={width}:{height}:force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2",
"-r", str(fps),
"-t", str(duration),
"-shortest",
output_path
]
await self._run_ffmpeg(cmd)
async def _create_freeze_segment_matched(
self,
frame_path: str,
audio_path: str,
duration: float,
output_path: str,
props: dict[str, Any]
):
"""
Create freeze frame that rigidly matches the source video properties.
This fixes the "silent pause" issue caused by sample rate mismatch
when concatenating with extracted video segments.
"""
if self._use_cloud_run:
await self._create_freeze_segment_matched_cloud_run(
frame_path, audio_path, duration, output_path, props
)
else:
await self._create_freeze_segment_matched_local(
frame_path, audio_path, duration, output_path, props
)
async def _create_freeze_segment_matched_local(
self,
frame_path: str,
audio_path: str,
duration: float,
output_path: str,
props: dict[str, Any]
):
"""Create freeze segment using local ffmpeg via Celery queue."""
# Validate inputs
if duration <= 0:
raise ValueError(f"Invalid freeze segment duration: {duration}")
if not os.path.exists(frame_path):
raise FileNotFoundError(f"Frame file not found: {frame_path}")
if not os.path.exists(audio_path):
raise FileNotFoundError(f"Audio file not found: {audio_path}")
logger.debug(
f"Creating freeze segment: frame={frame_path}, audio={audio_path}, "
f"duration={duration:.2f}s, props={props}"
)
cmd = [
self.ffmpeg_path,
"-y",
"-loop", "1",
"-i", frame_path,
"-i", audio_path,
"-c:v", "libx264",
"-preset", "fast",
"-tune", "stillimage",
"-pix_fmt", props["pix_fmt"],
"-r", str(props["fps"]),
# Scale filter to ensure dimensions match exactly
"-vf", f"scale={props['width']}:{props['height']}:force_original_aspect_ratio=decrease,pad={props['width']}:{props['height']}:(ow-iw)/2:(oh-ih)/2",
# Audio Encoding (CRITICAL: Match source sample rate and channels)
"-c:a", "aac",
"-ar", props["sample_rate"],
"-ac", props["channels"],
"-b:a", "192k",
"-t", str(duration),
"-video_track_timescale", "90000",
"-shortest",
output_path
]
await self._run_ffmpeg(cmd)
async def _create_freeze_segment_matched_cloud_run(
self,
frame_path: str,
audio_path: str,
duration: float,
output_path: str,
props: dict[str, Any]
):
"""Create freeze segment via Cloud Run FFmpeg service."""
# Validate inputs
if duration <= 0:
raise ValueError(f"Invalid freeze segment duration: {duration}")
if not os.path.exists(frame_path):
raise FileNotFoundError(f"Frame file not found: {frame_path}")
if not os.path.exists(audio_path):
raise FileNotFoundError(f"Audio file not found: {audio_path}")
# Upload frame and audio to GCS
frame_gcs_uri = self._upload_to_gcs_temp(frame_path, "frame")
audio_gcs_uri = self._upload_to_gcs_temp(audio_path, "audio")
output_gcs_uri = f"gs://{settings.gcs_bucket}/temp/ffmpeg/freeze/{uuid4().hex}/freeze.mp4"
try:
await self._call_cloud_run_endpoint(
"/create-freeze-segment",
{
"frame_gcs_uri": frame_gcs_uri,
"audio_gcs_uri": audio_gcs_uri,
"output_gcs_uri": output_gcs_uri,
"duration": duration,
"width": props["width"],
"height": props["height"],
"fps": props["fps"],
"pix_fmt": props["pix_fmt"],
"sample_rate": props["sample_rate"],
"channels": props["channels"],
}
)
# Download result to local path
self._download_from_gcs_temp(output_gcs_uri, output_path)
finally:
self._delete_gcs_temp(frame_gcs_uri)
self._delete_gcs_temp(audio_gcs_uri)
self._delete_gcs_temp(output_gcs_uri)
async def _extract_audio_segment(
self,
source_path: str,
start_time: float,
duration: float,
output_path: str,
props: dict[str, Any]
):
"""
Extract audio segment from video for catch-up audio.
Used to extract the source audio from [resume_from, pause_point]
which plays during the freeze frame extension after AD audio.
Args:
source_path: Path to source video
start_time: Start time in seconds
duration: Duration in seconds
output_path: Path to output audio file
props: Video properties (for sample_rate, channels)
"""
if duration <= 0:
raise ValueError(f"Invalid audio segment duration: {duration}")
logger.debug(
f"Extracting audio segment: start={start_time:.2f}s, "
f"duration={duration:.2f}s, output={output_path}"
)
cmd = [
self.ffmpeg_path,
"-y",
"-ss", str(start_time),
"-i", source_path,
"-t", str(duration),
"-vn", # No video
"-c:a", "aac", # Match freeze segment encoding
"-ar", props["sample_rate"],
"-ac", props["channels"],
"-b:a", "192k",
output_path
]
await self._run_ffmpeg(cmd)
async def _concatenate_audio(
self,
audio_paths: list[str],
output_path: str,
props: dict[str, Any]
):
"""
Concatenate multiple audio files for combined AD + catch-up audio.
Uses FFmpeg concat filter for seamless joining with consistent encoding.
"""
if self._use_cloud_run:
await self._concatenate_audio_cloud_run(audio_paths, output_path, props)
else:
await self._concatenate_audio_local(audio_paths, output_path, props)
async def _concatenate_audio_local(
self,
audio_paths: list[str],
output_path: str,
props: dict[str, Any]
):
"""Concatenate audio files using local ffmpeg via Celery queue."""
if not audio_paths:
raise ValueError("No audio files to concatenate")
if len(audio_paths) == 1:
# Just copy the single file with re-encoding to ensure format consistency
cmd = [
self.ffmpeg_path,
"-y",
"-i", audio_paths[0],
"-c:a", "aac",
"-ar", props["sample_rate"],
"-ac", props["channels"],
"-b:a", "192k",
output_path
]
await self._run_ffmpeg(cmd)
return
# Build filter complex for concatenation
inputs = []
filter_parts = []
for i, path in enumerate(audio_paths):
inputs.extend(["-i", path])
filter_parts.append(f"[{i}:a]")
filter_complex = "".join(filter_parts) + f"concat=n={len(audio_paths)}:v=0:a=1[out]"
logger.debug(
f"Concatenating {len(audio_paths)} audio files: {audio_paths}"
)
cmd = [
self.ffmpeg_path,
"-y",
*inputs,
"-filter_complex", filter_complex,
"-map", "[out]",
"-c:a", "aac",
"-ar", props["sample_rate"],
"-ac", props["channels"],
"-b:a", "192k",
output_path
]
await self._run_ffmpeg(cmd)
async def _concatenate_audio_cloud_run(
self,
audio_paths: list[str],
output_path: str,
props: dict[str, Any]
):
"""Concatenate audio files via Cloud Run FFmpeg service."""
if not audio_paths:
raise ValueError("No audio files to concatenate")
# Upload all audio files to GCS
audio_gcs_uris = []
for path in audio_paths:
gcs_uri = self._upload_to_gcs_temp(path, "audio")
audio_gcs_uris.append(gcs_uri)
output_gcs_uri = f"gs://{settings.gcs_bucket}/temp/ffmpeg/audio/{uuid4().hex}/combined.m4a"
try:
# Build ffmpeg command for Cloud Run
# Use filter_complex for concatenation
# Build command template with placeholders for each input
filter_parts = []
for i in range(len(audio_gcs_uris)):
filter_parts.append(f"[{i}:a]")
filter_complex = "".join(filter_parts) + f"concat=n={len(audio_gcs_uris)}:v=0:a=1[out]"
# Build command template with -i {input_N} for each input file
cmd_template = ["ffmpeg", "-y"]
for i in range(len(audio_gcs_uris)):
cmd_template.extend(["-i", f"{{input_{i}}}"])
cmd_template.extend([
"-filter_complex", filter_complex,
"-map", "[out]",
"-c:a", "aac",
"-ar", props["sample_rate"],
"-ac", props["channels"],
"-b:a", "192k",
"{output}"
])
await self._call_cloud_run_endpoint(
"/run-ffmpeg",
{
"input_gcs_uris": audio_gcs_uris,
"output_gcs_uri": output_gcs_uri,
"command_template": cmd_template
}
)
# Download result to local path
self._download_from_gcs_temp(output_gcs_uri, output_path)
finally:
# Cleanup
for gcs_uri in audio_gcs_uris:
self._delete_gcs_temp(gcs_uri)
self._delete_gcs_temp(output_gcs_uri)
async def _generate_silence(
self,
duration: float,
output_path: str,
props: dict[str, Any]
):
"""
Generate a silent audio file of specified duration.
Used to create 500ms silence buffers before/after AD audio.
"""
if self._use_cloud_run:
await self._generate_silence_cloud_run(duration, output_path, props)
else:
await self._generate_silence_local(duration, output_path, props)
async def _generate_silence_local(
self,
duration: float,
output_path: str,
props: dict[str, Any]
):
"""Generate silence using local ffmpeg via Celery queue."""
if duration <= 0:
raise ValueError(f"Invalid silence duration: {duration}")
logger.debug(
f"Generating {duration:.2f}s silence: output={output_path}"
)
cmd = [
self.ffmpeg_path,
"-y",
"-f", "lavfi",
"-i", f"anullsrc=r={props['sample_rate']}:cl={'stereo' if props['channels'] == '2' else 'mono'}",
"-t", str(duration),
"-c:a", "aac",
"-ar", props["sample_rate"],
"-ac", props["channels"],
"-b:a", "192k",
output_path
]
await self._run_ffmpeg(cmd)
async def _generate_silence_cloud_run(
self,
duration: float,
output_path: str,
props: dict[str, Any]
):
"""Generate silence via Cloud Run FFmpeg service."""
if duration <= 0:
raise ValueError(f"Invalid silence duration: {duration}")
output_gcs_uri = f"gs://{settings.gcs_bucket}/temp/ffmpeg/silence/{uuid4().hex}/silence.m4a"
try:
channel_layout = "stereo" if props["channels"] == "2" else "mono"
await self._call_cloud_run_endpoint(
"/run-ffmpeg",
{
"input_gcs_uris": [], # No input files, using lavfi
"output_gcs_uri": output_gcs_uri,
"command_template": [
"ffmpeg", "-y",
"-f", "lavfi",
"-i", f"anullsrc=r={props['sample_rate']}:cl={channel_layout}",
"-t", str(duration),
"-c:a", "aac",
"-ar", props["sample_rate"],
"-ac", props["channels"],
"-b:a", "192k",
"{output}"
]
}
)
# Download result to local path
self._download_from_gcs_temp(output_gcs_uri, output_path)
finally:
self._delete_gcs_temp(output_gcs_uri)
async def _concatenate_segments(
self,
segment_paths: list[str],
output_path: str,
temp_dir: Path
):
"""Concatenate video segments using ffmpeg concat demuxer."""
if self._use_cloud_run:
await self._concatenate_segments_cloud_run(segment_paths, output_path)
else:
await self._concatenate_segments_local(segment_paths, output_path, temp_dir)
async def _concatenate_segments_local(
self,
segment_paths: list[str],
output_path: str,
temp_dir: Path
):
"""Concatenate video segments using local ffmpeg via Celery queue."""
# Create concat file
concat_file = temp_dir / "concat.txt"
with open(concat_file, "w") as f:
for path in segment_paths:
# Escape single quotes in path
escaped_path = path.replace("'", "'\\''")
f.write(f"file '{escaped_path}'\n")
cmd = [
self.ffmpeg_path,
"-y",
"-f", "concat",
"-safe", "0",
"-i", str(concat_file),
"-c", "copy",
output_path
]
await self._run_ffmpeg(cmd)
async def _concatenate_segments_cloud_run(
self,
segment_paths: list[str],
output_path: str
):
"""Concatenate video segments via Cloud Run FFmpeg service."""
# Upload all segment files to GCS
segment_gcs_uris = []
for path in segment_paths:
gcs_uri = self._upload_to_gcs_temp(path, "segment")
segment_gcs_uris.append(gcs_uri)
output_gcs_uri = f"gs://{settings.gcs_bucket}/temp/ffmpeg/concat/{uuid4().hex}/final.mp4"
try:
await self._call_cloud_run_endpoint(
"/concatenate",
{
"segment_gcs_uris": segment_gcs_uris,
"output_gcs_uri": output_gcs_uri,
}
)
# Download result to local path
self._download_from_gcs_temp(output_gcs_uri, output_path)
finally:
# Cleanup uploaded segments
for gcs_uri in segment_gcs_uris:
self._delete_gcs_temp(gcs_uri)
self._delete_gcs_temp(output_gcs_uri)
async def _copy_video(self, source_path: str, output_path: str):
"""Copy video without modification."""
cmd = [
self.ffmpeg_path,
"-y",
"-i", source_path,
"-c", "copy",
output_path
]
await self._run_ffmpeg(cmd)
async def _run_ffmpeg(self, cmd: list[str], timeout: int = 3600):
"""Run ffmpeg command via the dedicated ffmpeg queue."""
logger.debug(f"Running command: {' '.join(cmd)}")
await self._dispatch_ffmpeg(cmd, timeout)
# Global service instance
video_renderer_service = VideoRendererService()