fix: use actual freeze segment durations for VTT subtitle retiming
Subtitles were appearing progressively out of sync (~1.0s early per AD) because the VTT retimer calculated freeze durations theoretically rather than using actual rendered segment durations. Changes: - video_renderer: Measure actual freeze segment duration after creation - video_renderer: Return updated placements with actual_freeze_duration - vtt_retimer: Prefer actual_freeze_duration over calculated values - render_task: Pass actual durations to VTT retimer This ensures subtitle timing matches the real video timeline regardless of any FFmpeg encoding variations. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
e44210ea64
commit
add958008a
4 changed files with 198 additions and 14 deletions
|
|
@ -300,7 +300,7 @@ class VideoRendererService:
|
|||
ad_segments: list[tuple[int, str]], # [(cue_index, mp3_path), ...]
|
||||
analysis: dict[str, Any],
|
||||
output_path: str,
|
||||
) -> str:
|
||||
) -> tuple[str, list[dict] | None]:
|
||||
"""
|
||||
Render accessible video based on Gemini analysis.
|
||||
|
||||
|
|
@ -311,7 +311,9 @@ class VideoRendererService:
|
|||
output_path: Where to save the output MP4
|
||||
|
||||
Returns:
|
||||
Path to rendered accessible video
|
||||
Tuple of (output_path, updated_placements)
|
||||
- output_path: Path to rendered accessible video
|
||||
- updated_placements: Placements with actual_freeze_duration added (pause-insert only)
|
||||
"""
|
||||
method = analysis.get("method", "pause_insert")
|
||||
|
||||
|
|
@ -323,9 +325,10 @@ class VideoRendererService:
|
|||
|
||||
try:
|
||||
if method == "overlay":
|
||||
return await self._render_overlay_method(
|
||||
result_path = await self._render_overlay_method(
|
||||
source_video_path, ad_segments, analysis, output_path
|
||||
)
|
||||
return (result_path, None)
|
||||
else:
|
||||
return await self._render_pause_insert_method(
|
||||
source_video_path, ad_segments, analysis, output_path
|
||||
|
|
@ -457,7 +460,7 @@ class VideoRendererService:
|
|||
ad_segments: list[tuple[int, str]],
|
||||
analysis: dict[str, Any],
|
||||
output_path: str,
|
||||
) -> str:
|
||||
) -> tuple[str, list[dict]]:
|
||||
"""
|
||||
Render with pause-insert method:
|
||||
1. Split video at each pause point
|
||||
|
|
@ -486,7 +489,7 @@ class VideoRendererService:
|
|||
if not sorted_placements:
|
||||
logger.warning("No pause points found, copying source video")
|
||||
await self._copy_video(source_video_path, output_path)
|
||||
return output_path
|
||||
return (output_path, [])
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_dir_path = Path(temp_dir)
|
||||
|
|
@ -642,6 +645,30 @@ class VideoRendererService:
|
|||
await asyncio.gather(*phase3_tasks)
|
||||
logger.info(f"Phase 3 complete: created {len(freeze_segment_paths)} freeze segments")
|
||||
|
||||
# ============================================================
|
||||
# PHASE 3.5: Measure actual freeze segment durations for VTT retiming
|
||||
# ============================================================
|
||||
logger.info("Measuring actual freeze segment durations...")
|
||||
for p in valid_placements:
|
||||
i = p["index"]
|
||||
freeze_path = freeze_segment_paths[i]
|
||||
actual_duration = await self._get_video_duration(freeze_path)
|
||||
p["actual_freeze_duration"] = actual_duration
|
||||
|
||||
# Log any discrepancy between expected and actual duration
|
||||
expected = p["ad_duration"] + (2 * silence_duration)
|
||||
discrepancy = actual_duration - expected
|
||||
if abs(discrepancy) > 0.01: # 10ms threshold
|
||||
logger.warning(
|
||||
f"Freeze segment duration mismatch for cue {p['cue_index']}: "
|
||||
f"expected={expected:.3f}s, actual={actual_duration:.3f}s, "
|
||||
f"discrepancy={discrepancy:+.3f}s"
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
f"Freeze segment cue {p['cue_index']}: duration={actual_duration:.3f}s (expected={expected:.3f}s)"
|
||||
)
|
||||
|
||||
# ============================================================
|
||||
# PHASE 4: Assemble segment list in correct order
|
||||
# ============================================================
|
||||
|
|
@ -678,7 +705,24 @@ class VideoRendererService:
|
|||
await self._copy_video(source_video_path, output_path)
|
||||
|
||||
logger.info(f"Pause-insert render complete: {output_path}")
|
||||
return output_path
|
||||
|
||||
# Build updated placements with actual_freeze_duration
|
||||
# Map from cue_index to actual_freeze_duration
|
||||
actual_durations = {
|
||||
p["cue_index"]: p["actual_freeze_duration"]
|
||||
for p in valid_placements
|
||||
}
|
||||
|
||||
# Update original placements with actual freeze durations
|
||||
updated_placements = []
|
||||
for placement in sorted_placements:
|
||||
updated = placement.copy()
|
||||
cue_index = placement.get("ad_cue_index")
|
||||
if cue_index in actual_durations:
|
||||
updated["actual_freeze_duration"] = actual_durations[cue_index]
|
||||
updated_placements.append(updated)
|
||||
|
||||
return (output_path, updated_placements)
|
||||
|
||||
async def _get_video_duration(self, video_path: str) -> float:
|
||||
"""Get video duration in seconds using ffprobe."""
|
||||
|
|
|
|||
|
|
@ -66,8 +66,12 @@ class VTTRetimerService:
|
|||
self,
|
||||
placements: list[dict]
|
||||
) -> list[tuple[float, float]]:
|
||||
"""Build sorted list of (pause_point, effective_offset) tuples."""
|
||||
silence_buffer_total = 1.0 # 500ms + 500ms
|
||||
"""Build sorted list of (pause_point, effective_offset) tuples.
|
||||
|
||||
Uses actual_freeze_duration when available (measured from rendered video),
|
||||
otherwise falls back to calculated value (ad_duration + 1.0s).
|
||||
"""
|
||||
silence_buffer_total = 1.0 # 500ms + 500ms (fallback calculation)
|
||||
|
||||
pauses = []
|
||||
for placement in placements:
|
||||
|
|
@ -75,12 +79,22 @@ class VTTRetimerService:
|
|||
ad_duration = placement.get("ad_duration", 0)
|
||||
|
||||
if pause_point is not None and ad_duration > 0:
|
||||
effective_offset = ad_duration + silence_buffer_total
|
||||
# Prefer actual freeze duration if available (measured from rendered video)
|
||||
actual_freeze = placement.get("actual_freeze_duration")
|
||||
if actual_freeze is not None:
|
||||
effective_offset = actual_freeze
|
||||
logger.debug(
|
||||
f"Pause at {pause_point:.2f}s: using actual_freeze_duration={effective_offset:.2f}s "
|
||||
f"(ad_duration={ad_duration:.2f}s)"
|
||||
)
|
||||
else:
|
||||
effective_offset = ad_duration + silence_buffer_total
|
||||
logger.debug(
|
||||
f"Pause at {pause_point:.2f}s: using calculated freeze_duration={effective_offset:.2f}s "
|
||||
f"(ad_duration={ad_duration:.2f}s + 1.0s buffer)"
|
||||
)
|
||||
|
||||
pauses.append((pause_point, effective_offset))
|
||||
logger.debug(
|
||||
f"Pause at {pause_point:.2f}s: ad_duration={ad_duration:.2f}s, "
|
||||
f"freeze_duration={effective_offset:.2f}s"
|
||||
)
|
||||
|
||||
return sorted(pauses, key=lambda x: x[0])
|
||||
|
||||
|
|
|
|||
|
|
@ -202,13 +202,18 @@ async def _async_render_accessible_video(job_id: str, language: str):
|
|||
output_video_path = os.path.join(temp_dir, "accessible_video.mp4")
|
||||
|
||||
logger.info(f"Rendering accessible video using {method} method...")
|
||||
await video_renderer_service.render_accessible_video(
|
||||
rendered_path, updated_placements = await video_renderer_service.render_accessible_video(
|
||||
source_video_path,
|
||||
ad_segments,
|
||||
analysis,
|
||||
output_video_path
|
||||
)
|
||||
|
||||
# Update analysis with actual freeze durations for VTT retiming
|
||||
if updated_placements:
|
||||
analysis["placements"] = updated_placements
|
||||
logger.info(f"Updated {len(updated_placements)} placements with actual freeze durations")
|
||||
|
||||
# 7. Upload rendered video to GCS
|
||||
video_blob_path = f"{job_id}/{language}/accessible_video.mp4"
|
||||
video_blob = gcs_service.bucket.blob(video_blob_path)
|
||||
|
|
|
|||
|
|
@ -389,3 +389,124 @@ class TestVTTRetimerHelpers:
|
|||
assert len(filtered) == 2
|
||||
assert filtered[0]["text"] == "long enough"
|
||||
assert filtered[1]["text"] == "just enough"
|
||||
|
||||
def test_build_pause_list_uses_actual_freeze_duration(self, retimer):
|
||||
"""_build_pause_list should use actual_freeze_duration when available."""
|
||||
placements = [
|
||||
{"pause_point": 10.0, "ad_duration": 3.0, "actual_freeze_duration": 4.5},
|
||||
{"pause_point": 20.0, "ad_duration": 5.0}, # No actual, should calculate
|
||||
]
|
||||
|
||||
pauses = retimer._build_pause_list(placements)
|
||||
|
||||
assert len(pauses) == 2
|
||||
# First pause: should use actual_freeze_duration (4.5s)
|
||||
assert pauses[0] == (10.0, 4.5)
|
||||
# Second pause: should use calculated (5.0 + 1.0 = 6.0s)
|
||||
assert pauses[1] == (20.0, 6.0)
|
||||
|
||||
|
||||
class TestVTTRetimerActualFreezeDuration:
|
||||
"""Test VTT retiming with actual_freeze_duration field."""
|
||||
|
||||
@pytest.fixture
|
||||
def retimer(self):
|
||||
"""Create a VTTRetimerService instance."""
|
||||
return VTTRetimerService()
|
||||
|
||||
def test_uses_actual_freeze_duration_when_provided(self, retimer):
|
||||
"""Should use actual_freeze_duration instead of calculated value."""
|
||||
vtt = """WEBVTT
|
||||
|
||||
00:00:15.000 --> 00:00:18.000
|
||||
Later caption
|
||||
"""
|
||||
analysis = {
|
||||
"placements": [{
|
||||
"pause_point": 10.0,
|
||||
"ad_duration": 3.0,
|
||||
"actual_freeze_duration": 4.5 # Actual is 0.5s longer than calculated
|
||||
}]
|
||||
}
|
||||
|
||||
result = retimer.retime_for_pause_insert(vtt, analysis)
|
||||
cues = retimer._parse_vtt(result)
|
||||
|
||||
# Should use actual_freeze_duration (4.5s) not calculated (4.0s)
|
||||
assert len(cues) == 1
|
||||
assert cues[0]["start_time"] == 19.5 # 15 + 4.5
|
||||
assert cues[0]["end_time"] == 22.5 # 18 + 4.5
|
||||
|
||||
def test_falls_back_to_calculated_when_actual_not_provided(self, retimer):
|
||||
"""Should use calculated value when actual_freeze_duration not provided."""
|
||||
vtt = """WEBVTT
|
||||
|
||||
00:00:15.000 --> 00:00:18.000
|
||||
Later caption
|
||||
"""
|
||||
analysis = {
|
||||
"placements": [{
|
||||
"pause_point": 10.0,
|
||||
"ad_duration": 3.0
|
||||
# No actual_freeze_duration
|
||||
}]
|
||||
}
|
||||
|
||||
result = retimer.retime_for_pause_insert(vtt, analysis)
|
||||
cues = retimer._parse_vtt(result)
|
||||
|
||||
# Should use calculated (3.0 + 1.0 = 4.0s)
|
||||
assert len(cues) == 1
|
||||
assert cues[0]["start_time"] == 19.0 # 15 + 4
|
||||
assert cues[0]["end_time"] == 22.0 # 18 + 4
|
||||
|
||||
def test_mixed_actual_and_calculated(self, retimer):
|
||||
"""Should handle mix of actual and calculated freeze durations."""
|
||||
vtt = """WEBVTT
|
||||
|
||||
00:00:25.000 --> 00:00:28.000
|
||||
Late caption
|
||||
"""
|
||||
analysis = {
|
||||
"placements": [
|
||||
{"pause_point": 10.0, "ad_duration": 3.0, "actual_freeze_duration": 4.2},
|
||||
{"pause_point": 20.0, "ad_duration": 5.0} # No actual
|
||||
]
|
||||
}
|
||||
|
||||
result = retimer.retime_for_pause_insert(vtt, analysis)
|
||||
cues = retimer._parse_vtt(result)
|
||||
|
||||
# Total offset = 4.2 (actual) + 6.0 (calculated: 5+1) = 10.2
|
||||
assert len(cues) == 1
|
||||
assert cues[0]["start_time"] == pytest.approx(35.2, rel=1e-3) # 25 + 10.2
|
||||
assert cues[0]["end_time"] == pytest.approx(38.2, rel=1e-3) # 28 + 10.2
|
||||
|
||||
def test_cue_spanning_pause_with_actual_duration(self, retimer):
|
||||
"""Cue spanning pause should use actual_freeze_duration for split timing."""
|
||||
vtt = """WEBVTT
|
||||
|
||||
00:00:08.000 --> 00:00:12.000
|
||||
Spanning caption
|
||||
"""
|
||||
analysis = {
|
||||
"placements": [{
|
||||
"pause_point": 10.0,
|
||||
"ad_duration": 3.0,
|
||||
"actual_freeze_duration": 4.3 # Slightly longer than calculated
|
||||
}]
|
||||
}
|
||||
|
||||
result = retimer.retime_for_pause_insert(vtt, analysis)
|
||||
cues = retimer._parse_vtt(result)
|
||||
|
||||
# Should split into two segments
|
||||
assert len(cues) == 2
|
||||
|
||||
# Segment 1: 8s-10s (before freeze, no offset)
|
||||
assert cues[0]["start_time"] == 8.0
|
||||
assert cues[0]["end_time"] == 10.0
|
||||
|
||||
# Segment 2: 14.3s-16.3s (after freeze, +4.3s offset)
|
||||
assert cues[1]["start_time"] == pytest.approx(14.3, rel=1e-3)
|
||||
assert cues[1]["end_time"] == pytest.approx(16.3, rel=1e-3)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue