video-accessibility/backend/tests/unit/test_vtt_retimer.py
michael add958008a fix: use actual freeze segment durations for VTT subtitle retiming
Subtitles were appearing progressively out of sync (~1.0s early per AD)
because the VTT retimer calculated freeze durations theoretically
rather than using actual rendered segment durations.

Changes:
- video_renderer: Measure actual freeze segment duration after creation
- video_renderer: Return updated placements with actual_freeze_duration
- vtt_retimer: Prefer actual_freeze_duration over calculated values
- render_task: Pass actual durations to VTT retimer

This ensures subtitle timing matches the real video timeline regardless
of any FFmpeg encoding variations.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 15:52:57 -06:00

512 lines
17 KiB
Python

"""Tests for VTT retiming service for pause-insert accessible videos."""
import pytest
from app.services.vtt_retimer import VTTRetimerService
class TestVTTRetimerPauseInsert:
"""Test VTT retiming for pause-insert accessible videos."""
@pytest.fixture
def retimer(self):
"""Create a VTTRetimerService instance."""
return VTTRetimerService()
def test_cue_spanning_single_pause(self, retimer):
"""Cue 8s-12s with pause at 10s (4s freeze) should split into 8s-10s + 14s-16s."""
vtt = """WEBVTT
00:00:08.000 --> 00:00:12.000
Hello world
"""
analysis = {
"placements": [{
"pause_point": 10.0,
"ad_duration": 3.0 # freeze = 3.0 + 1.0 = 4.0s
}]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
# Should split into two segments
assert len(cues) == 2
# Segment 1: 8s-10s (before freeze, no offset)
assert cues[0]["start_time"] == 8.0
assert cues[0]["end_time"] == 10.0
assert cues[0]["text"] == "Hello world"
# Segment 2: 14s-16s (after freeze, +4s offset)
# Maps source 10s-12s to accessible 14s-16s
assert cues[1]["start_time"] == 14.0
assert cues[1]["end_time"] == 16.0
assert cues[1]["text"] == "Hello world"
def test_cue_entirely_before_pause(self, retimer):
"""Cue 2s-4s with pause at 10s should remain unchanged."""
vtt = """WEBVTT
00:00:02.000 --> 00:00:04.000
Early caption
"""
analysis = {
"placements": [{
"pause_point": 10.0,
"ad_duration": 3.0
}]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
# No offset applied (cue is before pause)
assert len(cues) == 1
assert cues[0]["start_time"] == 2.0
assert cues[0]["end_time"] == 4.0
assert cues[0]["text"] == "Early caption"
def test_cue_entirely_after_pause(self, retimer):
"""Cue 15s-18s with pause at 10s (4s freeze) should shift to 19s-22s."""
vtt = """WEBVTT
00:00:15.000 --> 00:00:18.000
Later caption
"""
analysis = {
"placements": [{
"pause_point": 10.0,
"ad_duration": 3.0 # freeze = 4.0s
}]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
# Full offset applied (cue is after pause)
assert len(cues) == 1
assert cues[0]["start_time"] == 19.0 # 15 + 4
assert cues[0]["end_time"] == 22.0 # 18 + 4
assert cues[0]["text"] == "Later caption"
def test_cue_starting_at_pause_point(self, retimer):
"""Cue 10s-12s with pause at 10s (4s freeze) should shift to 14s-16s."""
vtt = """WEBVTT
00:00:10.000 --> 00:00:12.000
Caption at pause
"""
analysis = {
"placements": [{
"pause_point": 10.0,
"ad_duration": 3.0
}]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
# Cue starts AT pause point, so gets full offset
# No split needed (pause_point is not STRICTLY between start and end)
assert len(cues) == 1
assert cues[0]["start_time"] == 14.0 # 10 + 4
assert cues[0]["end_time"] == 16.0 # 12 + 4
def test_cue_ending_at_pause_point(self, retimer):
"""Cue 8s-10s with pause at 10s should remain at 8s-10s."""
vtt = """WEBVTT
00:00:08.000 --> 00:00:10.000
Caption before pause
"""
analysis = {
"placements": [{
"pause_point": 10.0,
"ad_duration": 3.0
}]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
# Cue ends AT pause point, so no offset (pause not strictly between)
# Uses offset_at(8) = 0 (pause at 10 is not <= 8)
assert len(cues) == 1
assert cues[0]["start_time"] == 8.0
assert cues[0]["end_time"] == 10.0
def test_cue_spanning_multiple_pauses(self, retimer):
"""Cue 5s-25s with pauses at 10s and 20s should split into 3 segments."""
vtt = """WEBVTT
00:00:05.000 --> 00:00:25.000
Long caption
"""
analysis = {
"placements": [
{"pause_point": 10.0, "ad_duration": 3.0}, # 4s freeze
{"pause_point": 20.0, "ad_duration": 5.0} # 6s freeze
]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
# Should split into 3 segments
assert len(cues) == 3
# Segment 1: 5s-10s (no offset, before first pause)
assert cues[0]["start_time"] == 5.0
assert cues[0]["end_time"] == 10.0
assert cues[0]["text"] == "Long caption"
# Segment 2: 14s-24s (source 10s-20s + 4s offset from first pause)
assert cues[1]["start_time"] == 14.0
assert cues[1]["end_time"] == 24.0
assert cues[1]["text"] == "Long caption"
# Segment 3: 30s-35s (source 20s-25s + 10s total offset)
assert cues[2]["start_time"] == 30.0 # 20 + 4 + 6
assert cues[2]["end_time"] == 35.0 # 25 + 4 + 6
assert cues[2]["text"] == "Long caption"
def test_short_segment_filtered(self, retimer):
"""Very short segments (<100ms) should be filtered out."""
vtt = """WEBVTT
00:00:09.950 --> 00:00:10.050
Tiny caption
"""
analysis = {
"placements": [{
"pause_point": 10.0,
"ad_duration": 3.0
}]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
# Both segments are < 100ms, should be filtered out
# Segment 1: 9.95s-10.0s = 50ms
# Segment 2: 14.0s-14.05s = 50ms
assert len(cues) == 0
def test_no_pauses_passthrough(self, retimer):
"""No placements should return original VTT unchanged."""
vtt = """WEBVTT
00:00:05.000 --> 00:00:08.000
Caption
"""
analysis = {"placements": []}
result = retimer.retime_for_pause_insert(vtt, analysis)
# Should return original unchanged
assert result == vtt
def test_multiple_cues_with_pause(self, retimer):
"""Multiple cues with a pause between them."""
vtt = """WEBVTT
00:00:02.000 --> 00:00:05.000
First caption
00:00:15.000 --> 00:00:18.000
Second caption
"""
analysis = {
"placements": [{
"pause_point": 10.0,
"ad_duration": 3.0 # 4s freeze
}]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
assert len(cues) == 2
# First cue: before pause, no offset
assert cues[0]["start_time"] == 2.0
assert cues[0]["end_time"] == 5.0
assert cues[0]["text"] == "First caption"
# Second cue: after pause, +4s offset
assert cues[1]["start_time"] == 19.0 # 15 + 4
assert cues[1]["end_time"] == 22.0 # 18 + 4
assert cues[1]["text"] == "Second caption"
def test_cue_with_pause_at_start_of_video(self, retimer):
"""Pause at 0.0 should shift all subsequent cues."""
vtt = """WEBVTT
00:00:05.000 --> 00:00:08.000
Caption after start
"""
analysis = {
"placements": [{
"pause_point": 0.0,
"ad_duration": 3.0 # 4s freeze
}]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
assert len(cues) == 1
assert cues[0]["start_time"] == 9.0 # 5 + 4
assert cues[0]["end_time"] == 12.0 # 8 + 4
def test_consolidated_pauses_same_pause_point(self, retimer):
"""Multiple placements at the same pause point should be handled correctly."""
vtt = """WEBVTT
00:00:15.000 --> 00:00:18.000
Caption after pauses
"""
analysis = {
"placements": [
{"pause_point": 10.0, "ad_duration": 2.0}, # 3s freeze
{"pause_point": 10.0, "ad_duration": 3.0} # 4s freeze (consolidated at same point)
]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
assert len(cues) == 1
# Total offset = 3s + 4s = 7s
assert cues[0]["start_time"] == 22.0 # 15 + 7
assert cues[0]["end_time"] == 25.0 # 18 + 7
def test_multiline_cue_text_preserved(self, retimer):
"""Multi-line cue text should be preserved when retiming."""
vtt = """WEBVTT
00:00:08.000 --> 00:00:12.000
Line one
Line two
"""
analysis = {
"placements": [{
"pause_point": 10.0,
"ad_duration": 3.0
}]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
assert len(cues) == 2
# Both segments should have the multi-line text
assert cues[0]["text"] == "Line one\nLine two"
assert cues[1]["text"] == "Line one\nLine two"
class TestVTTRetimerHelpers:
"""Test helper methods of VTTRetimerService."""
@pytest.fixture
def retimer(self):
"""Create a VTTRetimerService instance."""
return VTTRetimerService()
def test_offset_at_before_pause(self, retimer):
"""offset_at should return 0 for timestamps before any pause."""
pauses = [(10.0, 4.0)]
assert retimer._offset_at(5.0, pauses) == 0.0
def test_offset_at_at_pause(self, retimer):
"""offset_at should include offset for timestamps AT pause point."""
pauses = [(10.0, 4.0)]
assert retimer._offset_at(10.0, pauses) == 4.0
def test_offset_at_after_pause(self, retimer):
"""offset_at should include offset for timestamps after pause."""
pauses = [(10.0, 4.0)]
assert retimer._offset_at(15.0, pauses) == 4.0
def test_offset_at_multiple_pauses(self, retimer):
"""offset_at should sum offsets from all applicable pauses."""
pauses = [(10.0, 4.0), (20.0, 6.0)]
assert retimer._offset_at(25.0, pauses) == 10.0 # 4 + 6
def test_offset_before_before_pause(self, retimer):
"""offset_before should return 0 for timestamps before any pause."""
pauses = [(10.0, 4.0)]
assert retimer._offset_before(5.0, pauses) == 0.0
def test_offset_before_at_pause(self, retimer):
"""offset_before should NOT include offset for timestamps AT pause point."""
pauses = [(10.0, 4.0)]
assert retimer._offset_before(10.0, pauses) == 0.0
def test_offset_before_after_pause(self, retimer):
"""offset_before should include offset for timestamps after pause."""
pauses = [(10.0, 4.0)]
assert retimer._offset_before(15.0, pauses) == 4.0
def test_build_pause_list(self, retimer):
"""_build_pause_list should create sorted list of (pause_point, effective_offset)."""
placements = [
{"pause_point": 20.0, "ad_duration": 5.0},
{"pause_point": 10.0, "ad_duration": 3.0},
]
pauses = retimer._build_pause_list(placements)
assert len(pauses) == 2
# Should be sorted by pause_point
assert pauses[0] == (10.0, 4.0) # 3.0 + 1.0 silence
assert pauses[1] == (20.0, 6.0) # 5.0 + 1.0 silence
def test_build_pause_list_filters_invalid(self, retimer):
"""_build_pause_list should filter out invalid placements."""
placements = [
{"pause_point": 10.0, "ad_duration": 3.0}, # valid
{"pause_point": None, "ad_duration": 3.0}, # invalid: no pause_point
{"pause_point": 20.0, "ad_duration": 0}, # invalid: zero duration
{"pause_point": 30.0}, # invalid: no ad_duration
]
pauses = retimer._build_pause_list(placements)
assert len(pauses) == 1
assert pauses[0] == (10.0, 4.0)
def test_filter_short_segments(self, retimer):
"""_filter_short_segments should remove segments shorter than MIN_SEGMENT_DURATION."""
cues = [
{"start_time": 0.0, "end_time": 1.0, "text": "long enough"},
{"start_time": 2.0, "end_time": 2.05, "text": "too short"}, # 50ms
{"start_time": 3.0, "end_time": 3.1, "text": "just enough"}, # 100ms
]
filtered = retimer._filter_short_segments(cues)
assert len(filtered) == 2
assert filtered[0]["text"] == "long enough"
assert filtered[1]["text"] == "just enough"
def test_build_pause_list_uses_actual_freeze_duration(self, retimer):
"""_build_pause_list should use actual_freeze_duration when available."""
placements = [
{"pause_point": 10.0, "ad_duration": 3.0, "actual_freeze_duration": 4.5},
{"pause_point": 20.0, "ad_duration": 5.0}, # No actual, should calculate
]
pauses = retimer._build_pause_list(placements)
assert len(pauses) == 2
# First pause: should use actual_freeze_duration (4.5s)
assert pauses[0] == (10.0, 4.5)
# Second pause: should use calculated (5.0 + 1.0 = 6.0s)
assert pauses[1] == (20.0, 6.0)
class TestVTTRetimerActualFreezeDuration:
"""Test VTT retiming with actual_freeze_duration field."""
@pytest.fixture
def retimer(self):
"""Create a VTTRetimerService instance."""
return VTTRetimerService()
def test_uses_actual_freeze_duration_when_provided(self, retimer):
"""Should use actual_freeze_duration instead of calculated value."""
vtt = """WEBVTT
00:00:15.000 --> 00:00:18.000
Later caption
"""
analysis = {
"placements": [{
"pause_point": 10.0,
"ad_duration": 3.0,
"actual_freeze_duration": 4.5 # Actual is 0.5s longer than calculated
}]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
# Should use actual_freeze_duration (4.5s) not calculated (4.0s)
assert len(cues) == 1
assert cues[0]["start_time"] == 19.5 # 15 + 4.5
assert cues[0]["end_time"] == 22.5 # 18 + 4.5
def test_falls_back_to_calculated_when_actual_not_provided(self, retimer):
"""Should use calculated value when actual_freeze_duration not provided."""
vtt = """WEBVTT
00:00:15.000 --> 00:00:18.000
Later caption
"""
analysis = {
"placements": [{
"pause_point": 10.0,
"ad_duration": 3.0
# No actual_freeze_duration
}]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
# Should use calculated (3.0 + 1.0 = 4.0s)
assert len(cues) == 1
assert cues[0]["start_time"] == 19.0 # 15 + 4
assert cues[0]["end_time"] == 22.0 # 18 + 4
def test_mixed_actual_and_calculated(self, retimer):
"""Should handle mix of actual and calculated freeze durations."""
vtt = """WEBVTT
00:00:25.000 --> 00:00:28.000
Late caption
"""
analysis = {
"placements": [
{"pause_point": 10.0, "ad_duration": 3.0, "actual_freeze_duration": 4.2},
{"pause_point": 20.0, "ad_duration": 5.0} # No actual
]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
# Total offset = 4.2 (actual) + 6.0 (calculated: 5+1) = 10.2
assert len(cues) == 1
assert cues[0]["start_time"] == pytest.approx(35.2, rel=1e-3) # 25 + 10.2
assert cues[0]["end_time"] == pytest.approx(38.2, rel=1e-3) # 28 + 10.2
def test_cue_spanning_pause_with_actual_duration(self, retimer):
"""Cue spanning pause should use actual_freeze_duration for split timing."""
vtt = """WEBVTT
00:00:08.000 --> 00:00:12.000
Spanning caption
"""
analysis = {
"placements": [{
"pause_point": 10.0,
"ad_duration": 3.0,
"actual_freeze_duration": 4.3 # Slightly longer than calculated
}]
}
result = retimer.retime_for_pause_insert(vtt, analysis)
cues = retimer._parse_vtt(result)
# Should split into two segments
assert len(cues) == 2
# Segment 1: 8s-10s (before freeze, no offset)
assert cues[0]["start_time"] == 8.0
assert cues[0]["end_time"] == 10.0
# Segment 2: 14.3s-16.3s (after freeze, +4.3s offset)
assert cues[1]["start_time"] == pytest.approx(14.3, rel=1e-3)
assert cues[1]["end_time"] == pytest.approx(16.3, rel=1e-3)