video-accessibility/backend/tests/unit/test_vtt.py
2025-08-24 16:28:33 -05:00

350 lines
No EOL
9.6 KiB
Python

import pytest
from app.lib.vtt import VTTParser, VTTEditor, VTTCue
class TestVTTParser:
"""Test VTT parsing and building functionality"""
def test_parse_simple_vtt(self):
"""Test parsing a simple VTT file"""
vtt_content = """WEBVTT
00:00:01.000 --> 00:00:03.000
Hello world
00:00:04.500 --> 00:00:06.000
This is a test
"""
cues = VTTParser.parse(vtt_content)
assert len(cues) == 2
assert cues[0].start_time == 1.0
assert cues[0].end_time == 3.0
assert cues[0].text == "Hello world"
assert cues[1].start_time == 4.5
assert cues[1].end_time == 6.0
assert cues[1].text == "This is a test"
def test_parse_vtt_with_identifier(self):
"""Test parsing VTT with cue identifiers"""
vtt_content = """WEBVTT
cue1
00:00:01.000 --> 00:00:03.000
Hello world
cue2
00:00:04.500 --> 00:00:06.000
This is a test
"""
cues = VTTParser.parse(vtt_content)
assert len(cues) == 2
assert cues[0].identifier == "cue1"
assert cues[0].text == "Hello world"
assert cues[1].identifier == "cue2"
assert cues[1].text == "This is a test"
def test_parse_multiline_cue(self):
"""Test parsing VTT with multi-line cue text"""
vtt_content = """WEBVTT
00:00:01.000 --> 00:00:03.000
Line one
Line two
Line three
"""
cues = VTTParser.parse(vtt_content)
assert len(cues) == 1
assert cues[0].text == "Line one\nLine two\nLine three"
def test_parse_with_notes(self):
"""Test parsing VTT with NOTE sections"""
vtt_content = """WEBVTT
NOTE This is a note
00:00:01.000 --> 00:00:03.000
Hello world
"""
cues = VTTParser.parse(vtt_content)
assert len(cues) == 1
assert cues[0].text == "Hello world"
def test_build_simple_vtt(self):
"""Test building VTT from cues"""
cues = [
VTTCue(start_time=1.0, end_time=3.0, text="Hello world"),
VTTCue(start_time=4.5, end_time=6.0, text="This is a test")
]
vtt_content = VTTParser.build(cues)
expected = """WEBVTT
00:00:01.000 --> 00:00:03.000
Hello world
00:00:04.500 --> 00:00:06.000
This is a test
"""
assert vtt_content == expected
def test_build_vtt_with_identifier(self):
"""Test building VTT with cue identifiers"""
cues = [
VTTCue(start_time=1.0, end_time=3.0, text="Hello world", identifier="cue1"),
VTTCue(start_time=4.5, end_time=6.0, text="This is a test", identifier="cue2")
]
vtt_content = VTTParser.build(cues)
expected = """WEBVTT
cue1
00:00:01.000 --> 00:00:03.000
Hello world
cue2
00:00:04.500 --> 00:00:06.000
This is a test
"""
assert vtt_content == expected
def test_parse_timestamp_formats(self):
"""Test parsing different timestamp formats"""
# Test HH:MM:SS.mmm format
assert VTTParser._parse_timestamp("01:23:45.678") == 5025.678
# Test MM:SS.mmm format
assert VTTParser._parse_timestamp("23:45.678") == 1425.678
# Test with comma decimal separator
assert VTTParser._parse_timestamp("01:23:45,678") == 5025.678
def test_format_timestamp(self):
"""Test formatting seconds to VTT timestamp"""
assert VTTParser._format_timestamp(5025.678) == "01:23:45.678"
assert VTTParser._format_timestamp(1425.678) == "00:23:45.678"
assert VTTParser._format_timestamp(65.500) == "00:01:05.500"
def test_parse_invalid_timestamp(self):
"""Test error handling for invalid timestamps"""
with pytest.raises(ValueError, match="Invalid timestamp format"):
VTTParser._parse_timestamp("invalid")
def test_roundtrip_consistency(self):
"""Test that parsing and building are consistent"""
original_vtt = """WEBVTT
00:00:01.000 --> 00:00:03.000
Hello world
00:00:04.500 --> 00:00:06.000
This is a test
"""
cues = VTTParser.parse(original_vtt)
rebuilt_vtt = VTTParser.build(cues)
assert rebuilt_vtt == original_vtt
class TestVTTEditor:
"""Test VTT editing functionality"""
def test_translate_preserving_timing(self):
"""Test translating text while preserving timing"""
vtt_content = """WEBVTT
00:00:01.000 --> 00:00:03.000
Hello world
00:00:04.500 --> 00:00:06.000
This is a test
"""
translated_texts = ["Hola mundo", "Esta es una prueba"]
result = VTTEditor.translate_preserving_timing(vtt_content, translated_texts)
# Parse to verify timing is preserved
cues = VTTParser.parse(result)
assert len(cues) == 2
assert cues[0].start_time == 1.0
assert cues[0].end_time == 3.0
assert cues[0].text == "Hola mundo"
assert cues[1].start_time == 4.5
assert cues[1].end_time == 6.0
assert cues[1].text == "Esta es una prueba"
def test_translate_text_count_mismatch(self):
"""Test error handling for text count mismatch"""
vtt_content = """WEBVTT
00:00:01.000 --> 00:00:03.000
Hello world
"""
translated_texts = ["Hola mundo", "Extra text"]
with pytest.raises(ValueError, match="Text count mismatch"):
VTTEditor.translate_preserving_timing(vtt_content, translated_texts)
def test_update_cue_text(self):
"""Test updating a specific cue's text"""
vtt_content = """WEBVTT
00:00:01.000 --> 00:00:03.000
Hello world
00:00:04.500 --> 00:00:06.000
This is a test
"""
result = VTTEditor.update_cue_text(vtt_content, 0, "Updated text")
cues = VTTParser.parse(result)
assert cues[0].text == "Updated text"
assert cues[1].text == "This is a test" # Other cue unchanged
def test_update_cue_invalid_index(self):
"""Test error handling for invalid cue index"""
vtt_content = """WEBVTT
00:00:01.000 --> 00:00:03.000
Hello world
"""
with pytest.raises(ValueError, match="Invalid cue index"):
VTTEditor.update_cue_text(vtt_content, 5, "New text")
def test_validate_valid_vtt(self):
"""Test validation of valid VTT content"""
vtt_content = """WEBVTT
00:00:01.000 --> 00:00:03.000
Hello world
00:00:04.500 --> 00:00:06.000
This is a test
"""
is_valid, errors = VTTEditor.validate_vtt(vtt_content)
assert is_valid
assert len(errors) == 0
def test_validate_missing_webvtt_header(self):
"""Test validation fails without WEBVTT header"""
vtt_content = """00:00:01.000 --> 00:00:03.000
Hello world
"""
is_valid, errors = VTTEditor.validate_vtt(vtt_content)
assert not is_valid
assert "VTT must start with 'WEBVTT'" in errors
def test_validate_invalid_timing(self):
"""Test validation catches timing errors"""
# Start time after end time
vtt_content = """WEBVTT
00:00:03.000 --> 00:00:01.000
Hello world
"""
is_valid, errors = VTTEditor.validate_vtt(vtt_content)
assert not is_valid
assert any("Start time must be before end time" in error for error in errors)
def test_validate_overlapping_cues(self):
"""Test validation catches overlapping cues"""
vtt_content = """WEBVTT
00:00:01.000 --> 00:00:05.000
First cue
00:00:03.000 --> 00:00:06.000
Overlapping cue
"""
is_valid, errors = VTTEditor.validate_vtt(vtt_content)
assert not is_valid
assert any("Overlapping with previous cue" in error for error in errors)
def test_validate_empty_text(self):
"""Test validation catches empty cue text"""
vtt_content = """WEBVTT
00:00:01.000 --> 00:00:03.000
"""
is_valid, errors = VTTEditor.validate_vtt(vtt_content)
assert not is_valid
assert any("Empty text content" in error for error in errors)
def test_get_cue_count(self):
"""Test getting cue count from VTT"""
vtt_content = """WEBVTT
00:00:01.000 --> 00:00:03.000
Hello world
00:00:04.500 --> 00:00:06.000
This is a test
"""
count = VTTEditor.get_cue_count(vtt_content)
assert count == 2
def test_get_cue_count_invalid_vtt(self):
"""Test cue count returns 0 for invalid VTT"""
count = VTTEditor.get_cue_count("invalid vtt content")
assert count == 0
def test_get_total_duration(self):
"""Test getting total duration from VTT"""
vtt_content = """WEBVTT
00:00:01.000 --> 00:00:03.000
Hello world
00:00:04.500 --> 00:00:06.000
This is a test
"""
duration = VTTEditor.get_total_duration(vtt_content)
assert duration == 6.0
def test_get_total_duration_empty_vtt(self):
"""Test total duration returns 0 for empty VTT"""
duration = VTTEditor.get_total_duration("WEBVTT\n")
assert duration == 0.0
def test_get_total_duration_invalid_vtt(self):
"""Test total duration returns 0 for invalid VTT"""
duration = VTTEditor.get_total_duration("invalid content")
assert duration == 0.0
class TestVTTCue:
"""Test VTTCue dataclass"""
def test_cue_creation(self):
"""Test creating a VTT cue"""
cue = VTTCue(
start_time=1.0,
end_time=3.0,
text="Hello world",
identifier="cue1"
)
assert cue.start_time == 1.0
assert cue.end_time == 3.0
assert cue.text == "Hello world"
assert cue.identifier == "cue1"
def test_cue_without_identifier(self):
"""Test creating a VTT cue without identifier"""
cue = VTTCue(start_time=1.0, end_time=3.0, text="Hello world")
assert cue.identifier is None