diff --git a/backend/app/services/video_renderer.py b/backend/app/services/video_renderer.py index 741ae15..89ffc0a 100644 --- a/backend/app/services/video_renderer.py +++ b/backend/app/services/video_renderer.py @@ -300,7 +300,7 @@ class VideoRendererService: ad_segments: list[tuple[int, str]], # [(cue_index, mp3_path), ...] analysis: dict[str, Any], output_path: str, - ) -> str: + ) -> tuple[str, list[dict] | None]: """ Render accessible video based on Gemini analysis. @@ -311,7 +311,9 @@ class VideoRendererService: output_path: Where to save the output MP4 Returns: - Path to rendered accessible video + Tuple of (output_path, updated_placements) + - output_path: Path to rendered accessible video + - updated_placements: Placements with actual_freeze_duration added (pause-insert only) """ method = analysis.get("method", "pause_insert") @@ -323,9 +325,10 @@ class VideoRendererService: try: if method == "overlay": - return await self._render_overlay_method( + result_path = await self._render_overlay_method( source_video_path, ad_segments, analysis, output_path ) + return (result_path, None) else: return await self._render_pause_insert_method( source_video_path, ad_segments, analysis, output_path @@ -457,7 +460,7 @@ class VideoRendererService: ad_segments: list[tuple[int, str]], analysis: dict[str, Any], output_path: str, - ) -> str: + ) -> tuple[str, list[dict]]: """ Render with pause-insert method: 1. Split video at each pause point @@ -486,7 +489,7 @@ class VideoRendererService: if not sorted_placements: logger.warning("No pause points found, copying source video") await self._copy_video(source_video_path, output_path) - return output_path + return (output_path, []) with tempfile.TemporaryDirectory() as temp_dir: temp_dir_path = Path(temp_dir) @@ -642,6 +645,30 @@ class VideoRendererService: await asyncio.gather(*phase3_tasks) logger.info(f"Phase 3 complete: created {len(freeze_segment_paths)} freeze segments") + # ============================================================ + # PHASE 3.5: Measure actual freeze segment durations for VTT retiming + # ============================================================ + logger.info("Measuring actual freeze segment durations...") + for p in valid_placements: + i = p["index"] + freeze_path = freeze_segment_paths[i] + actual_duration = await self._get_video_duration(freeze_path) + p["actual_freeze_duration"] = actual_duration + + # Log any discrepancy between expected and actual duration + expected = p["ad_duration"] + (2 * silence_duration) + discrepancy = actual_duration - expected + if abs(discrepancy) > 0.01: # 10ms threshold + logger.warning( + f"Freeze segment duration mismatch for cue {p['cue_index']}: " + f"expected={expected:.3f}s, actual={actual_duration:.3f}s, " + f"discrepancy={discrepancy:+.3f}s" + ) + else: + logger.debug( + f"Freeze segment cue {p['cue_index']}: duration={actual_duration:.3f}s (expected={expected:.3f}s)" + ) + # ============================================================ # PHASE 4: Assemble segment list in correct order # ============================================================ @@ -678,7 +705,24 @@ class VideoRendererService: await self._copy_video(source_video_path, output_path) logger.info(f"Pause-insert render complete: {output_path}") - return output_path + + # Build updated placements with actual_freeze_duration + # Map from cue_index to actual_freeze_duration + actual_durations = { + p["cue_index"]: p["actual_freeze_duration"] + for p in valid_placements + } + + # Update original placements with actual freeze durations + updated_placements = [] + for placement in sorted_placements: + updated = placement.copy() + cue_index = placement.get("ad_cue_index") + if cue_index in actual_durations: + updated["actual_freeze_duration"] = actual_durations[cue_index] + updated_placements.append(updated) + + return (output_path, updated_placements) async def _get_video_duration(self, video_path: str) -> float: """Get video duration in seconds using ffprobe.""" diff --git a/backend/app/services/vtt_retimer.py b/backend/app/services/vtt_retimer.py index 6f3ce5f..c26ea3f 100644 --- a/backend/app/services/vtt_retimer.py +++ b/backend/app/services/vtt_retimer.py @@ -66,8 +66,12 @@ class VTTRetimerService: self, placements: list[dict] ) -> list[tuple[float, float]]: - """Build sorted list of (pause_point, effective_offset) tuples.""" - silence_buffer_total = 1.0 # 500ms + 500ms + """Build sorted list of (pause_point, effective_offset) tuples. + + Uses actual_freeze_duration when available (measured from rendered video), + otherwise falls back to calculated value (ad_duration + 1.0s). + """ + silence_buffer_total = 1.0 # 500ms + 500ms (fallback calculation) pauses = [] for placement in placements: @@ -75,12 +79,22 @@ class VTTRetimerService: ad_duration = placement.get("ad_duration", 0) if pause_point is not None and ad_duration > 0: - effective_offset = ad_duration + silence_buffer_total + # Prefer actual freeze duration if available (measured from rendered video) + actual_freeze = placement.get("actual_freeze_duration") + if actual_freeze is not None: + effective_offset = actual_freeze + logger.debug( + f"Pause at {pause_point:.2f}s: using actual_freeze_duration={effective_offset:.2f}s " + f"(ad_duration={ad_duration:.2f}s)" + ) + else: + effective_offset = ad_duration + silence_buffer_total + logger.debug( + f"Pause at {pause_point:.2f}s: using calculated freeze_duration={effective_offset:.2f}s " + f"(ad_duration={ad_duration:.2f}s + 1.0s buffer)" + ) + pauses.append((pause_point, effective_offset)) - logger.debug( - f"Pause at {pause_point:.2f}s: ad_duration={ad_duration:.2f}s, " - f"freeze_duration={effective_offset:.2f}s" - ) return sorted(pauses, key=lambda x: x[0]) diff --git a/backend/app/tasks/render_accessible_video.py b/backend/app/tasks/render_accessible_video.py index f6c163d..1337fa7 100644 --- a/backend/app/tasks/render_accessible_video.py +++ b/backend/app/tasks/render_accessible_video.py @@ -202,13 +202,18 @@ async def _async_render_accessible_video(job_id: str, language: str): output_video_path = os.path.join(temp_dir, "accessible_video.mp4") logger.info(f"Rendering accessible video using {method} method...") - await video_renderer_service.render_accessible_video( + rendered_path, updated_placements = await video_renderer_service.render_accessible_video( source_video_path, ad_segments, analysis, output_video_path ) + # Update analysis with actual freeze durations for VTT retiming + if updated_placements: + analysis["placements"] = updated_placements + logger.info(f"Updated {len(updated_placements)} placements with actual freeze durations") + # 7. Upload rendered video to GCS video_blob_path = f"{job_id}/{language}/accessible_video.mp4" video_blob = gcs_service.bucket.blob(video_blob_path) diff --git a/backend/tests/unit/test_vtt_retimer.py b/backend/tests/unit/test_vtt_retimer.py index 40d6f9d..2b459ab 100644 --- a/backend/tests/unit/test_vtt_retimer.py +++ b/backend/tests/unit/test_vtt_retimer.py @@ -389,3 +389,124 @@ class TestVTTRetimerHelpers: assert len(filtered) == 2 assert filtered[0]["text"] == "long enough" assert filtered[1]["text"] == "just enough" + + def test_build_pause_list_uses_actual_freeze_duration(self, retimer): + """_build_pause_list should use actual_freeze_duration when available.""" + placements = [ + {"pause_point": 10.0, "ad_duration": 3.0, "actual_freeze_duration": 4.5}, + {"pause_point": 20.0, "ad_duration": 5.0}, # No actual, should calculate + ] + + pauses = retimer._build_pause_list(placements) + + assert len(pauses) == 2 + # First pause: should use actual_freeze_duration (4.5s) + assert pauses[0] == (10.0, 4.5) + # Second pause: should use calculated (5.0 + 1.0 = 6.0s) + assert pauses[1] == (20.0, 6.0) + + +class TestVTTRetimerActualFreezeDuration: + """Test VTT retiming with actual_freeze_duration field.""" + + @pytest.fixture + def retimer(self): + """Create a VTTRetimerService instance.""" + return VTTRetimerService() + + def test_uses_actual_freeze_duration_when_provided(self, retimer): + """Should use actual_freeze_duration instead of calculated value.""" + vtt = """WEBVTT + +00:00:15.000 --> 00:00:18.000 +Later caption +""" + analysis = { + "placements": [{ + "pause_point": 10.0, + "ad_duration": 3.0, + "actual_freeze_duration": 4.5 # Actual is 0.5s longer than calculated + }] + } + + result = retimer.retime_for_pause_insert(vtt, analysis) + cues = retimer._parse_vtt(result) + + # Should use actual_freeze_duration (4.5s) not calculated (4.0s) + assert len(cues) == 1 + assert cues[0]["start_time"] == 19.5 # 15 + 4.5 + assert cues[0]["end_time"] == 22.5 # 18 + 4.5 + + def test_falls_back_to_calculated_when_actual_not_provided(self, retimer): + """Should use calculated value when actual_freeze_duration not provided.""" + vtt = """WEBVTT + +00:00:15.000 --> 00:00:18.000 +Later caption +""" + analysis = { + "placements": [{ + "pause_point": 10.0, + "ad_duration": 3.0 + # No actual_freeze_duration + }] + } + + result = retimer.retime_for_pause_insert(vtt, analysis) + cues = retimer._parse_vtt(result) + + # Should use calculated (3.0 + 1.0 = 4.0s) + assert len(cues) == 1 + assert cues[0]["start_time"] == 19.0 # 15 + 4 + assert cues[0]["end_time"] == 22.0 # 18 + 4 + + def test_mixed_actual_and_calculated(self, retimer): + """Should handle mix of actual and calculated freeze durations.""" + vtt = """WEBVTT + +00:00:25.000 --> 00:00:28.000 +Late caption +""" + analysis = { + "placements": [ + {"pause_point": 10.0, "ad_duration": 3.0, "actual_freeze_duration": 4.2}, + {"pause_point": 20.0, "ad_duration": 5.0} # No actual + ] + } + + result = retimer.retime_for_pause_insert(vtt, analysis) + cues = retimer._parse_vtt(result) + + # Total offset = 4.2 (actual) + 6.0 (calculated: 5+1) = 10.2 + assert len(cues) == 1 + assert cues[0]["start_time"] == pytest.approx(35.2, rel=1e-3) # 25 + 10.2 + assert cues[0]["end_time"] == pytest.approx(38.2, rel=1e-3) # 28 + 10.2 + + def test_cue_spanning_pause_with_actual_duration(self, retimer): + """Cue spanning pause should use actual_freeze_duration for split timing.""" + vtt = """WEBVTT + +00:00:08.000 --> 00:00:12.000 +Spanning caption +""" + analysis = { + "placements": [{ + "pause_point": 10.0, + "ad_duration": 3.0, + "actual_freeze_duration": 4.3 # Slightly longer than calculated + }] + } + + result = retimer.retime_for_pause_insert(vtt, analysis) + cues = retimer._parse_vtt(result) + + # Should split into two segments + assert len(cues) == 2 + + # Segment 1: 8s-10s (before freeze, no offset) + assert cues[0]["start_time"] == 8.0 + assert cues[0]["end_time"] == 10.0 + + # Segment 2: 14.3s-16.3s (after freeze, +4.3s offset) + assert cues[1]["start_time"] == pytest.approx(14.3, rel=1e-3) + assert cues[1]["end_time"] == pytest.approx(16.3, rel=1e-3)