import pytest from app.lib.vtt import VTTParser, VTTEditor, VTTCue class TestVTTParser: """Test VTT parsing and building functionality""" def test_parse_simple_vtt(self): """Test parsing a simple VTT file""" vtt_content = """WEBVTT 00:00:01.000 --> 00:00:03.000 Hello world 00:00:04.500 --> 00:00:06.000 This is a test """ cues = VTTParser.parse(vtt_content) assert len(cues) == 2 assert cues[0].start_time == 1.0 assert cues[0].end_time == 3.0 assert cues[0].text == "Hello world" assert cues[1].start_time == 4.5 assert cues[1].end_time == 6.0 assert cues[1].text == "This is a test" def test_parse_vtt_with_identifier(self): """Test parsing VTT with cue identifiers""" vtt_content = """WEBVTT cue1 00:00:01.000 --> 00:00:03.000 Hello world cue2 00:00:04.500 --> 00:00:06.000 This is a test """ cues = VTTParser.parse(vtt_content) assert len(cues) == 2 assert cues[0].identifier == "cue1" assert cues[0].text == "Hello world" assert cues[1].identifier == "cue2" assert cues[1].text == "This is a test" def test_parse_multiline_cue(self): """Test parsing VTT with multi-line cue text""" vtt_content = """WEBVTT 00:00:01.000 --> 00:00:03.000 Line one Line two Line three """ cues = VTTParser.parse(vtt_content) assert len(cues) == 1 assert cues[0].text == "Line one\nLine two\nLine three" def test_parse_with_notes(self): """Test parsing VTT with NOTE sections""" vtt_content = """WEBVTT NOTE This is a note 00:00:01.000 --> 00:00:03.000 Hello world """ cues = VTTParser.parse(vtt_content) assert len(cues) == 1 assert cues[0].text == "Hello world" def test_build_simple_vtt(self): """Test building VTT from cues""" cues = [ VTTCue(start_time=1.0, end_time=3.0, text="Hello world"), VTTCue(start_time=4.5, end_time=6.0, text="This is a test") ] vtt_content = VTTParser.build(cues) expected = """WEBVTT 00:00:01.000 --> 00:00:03.000 Hello world 00:00:04.500 --> 00:00:06.000 This is a test """ assert vtt_content == expected def test_build_vtt_with_identifier(self): """Test building VTT with cue identifiers""" cues = [ VTTCue(start_time=1.0, end_time=3.0, text="Hello world", identifier="cue1"), VTTCue(start_time=4.5, end_time=6.0, text="This is a test", identifier="cue2") ] vtt_content = VTTParser.build(cues) expected = """WEBVTT cue1 00:00:01.000 --> 00:00:03.000 Hello world cue2 00:00:04.500 --> 00:00:06.000 This is a test """ assert vtt_content == expected def test_parse_timestamp_formats(self): """Test parsing different timestamp formats""" # Test HH:MM:SS.mmm format assert VTTParser._parse_timestamp("01:23:45.678") == 5025.678 # Test MM:SS.mmm format assert VTTParser._parse_timestamp("23:45.678") == 1425.678 # Test with comma decimal separator assert VTTParser._parse_timestamp("01:23:45,678") == 5025.678 def test_format_timestamp(self): """Test formatting seconds to VTT timestamp""" assert VTTParser._format_timestamp(5025.678) == "01:23:45.678" assert VTTParser._format_timestamp(1425.678) == "00:23:45.678" assert VTTParser._format_timestamp(65.500) == "00:01:05.500" def test_parse_invalid_timestamp(self): """Test error handling for invalid timestamps""" with pytest.raises(ValueError, match="Invalid timestamp format"): VTTParser._parse_timestamp("invalid") def test_roundtrip_consistency(self): """Test that parsing and building are consistent""" original_vtt = """WEBVTT 00:00:01.000 --> 00:00:03.000 Hello world 00:00:04.500 --> 00:00:06.000 This is a test """ cues = VTTParser.parse(original_vtt) rebuilt_vtt = VTTParser.build(cues) assert rebuilt_vtt == original_vtt class TestVTTEditor: """Test VTT editing functionality""" def test_translate_preserving_timing(self): """Test translating text while preserving timing""" vtt_content = """WEBVTT 00:00:01.000 --> 00:00:03.000 Hello world 00:00:04.500 --> 00:00:06.000 This is a test """ translated_texts = ["Hola mundo", "Esta es una prueba"] result = VTTEditor.translate_preserving_timing(vtt_content, translated_texts) # Parse to verify timing is preserved cues = VTTParser.parse(result) assert len(cues) == 2 assert cues[0].start_time == 1.0 assert cues[0].end_time == 3.0 assert cues[0].text == "Hola mundo" assert cues[1].start_time == 4.5 assert cues[1].end_time == 6.0 assert cues[1].text == "Esta es una prueba" def test_translate_text_count_mismatch(self): """Test error handling for text count mismatch""" vtt_content = """WEBVTT 00:00:01.000 --> 00:00:03.000 Hello world """ translated_texts = ["Hola mundo", "Extra text"] with pytest.raises(ValueError, match="Text count mismatch"): VTTEditor.translate_preserving_timing(vtt_content, translated_texts) def test_update_cue_text(self): """Test updating a specific cue's text""" vtt_content = """WEBVTT 00:00:01.000 --> 00:00:03.000 Hello world 00:00:04.500 --> 00:00:06.000 This is a test """ result = VTTEditor.update_cue_text(vtt_content, 0, "Updated text") cues = VTTParser.parse(result) assert cues[0].text == "Updated text" assert cues[1].text == "This is a test" # Other cue unchanged def test_update_cue_invalid_index(self): """Test error handling for invalid cue index""" vtt_content = """WEBVTT 00:00:01.000 --> 00:00:03.000 Hello world """ with pytest.raises(ValueError, match="Invalid cue index"): VTTEditor.update_cue_text(vtt_content, 5, "New text") def test_validate_valid_vtt(self): """Test validation of valid VTT content""" vtt_content = """WEBVTT 00:00:01.000 --> 00:00:03.000 Hello world 00:00:04.500 --> 00:00:06.000 This is a test """ is_valid, errors = VTTEditor.validate_vtt(vtt_content) assert is_valid assert len(errors) == 0 def test_validate_missing_webvtt_header(self): """Test validation fails without WEBVTT header""" vtt_content = """00:00:01.000 --> 00:00:03.000 Hello world """ is_valid, errors = VTTEditor.validate_vtt(vtt_content) assert not is_valid assert "VTT must start with 'WEBVTT'" in errors def test_validate_invalid_timing(self): """Test validation catches timing errors""" # Start time after end time vtt_content = """WEBVTT 00:00:03.000 --> 00:00:01.000 Hello world """ is_valid, errors = VTTEditor.validate_vtt(vtt_content) assert not is_valid assert any("Start time must be before end time" in error for error in errors) def test_validate_overlapping_cues(self): """Test validation catches overlapping cues""" vtt_content = """WEBVTT 00:00:01.000 --> 00:00:05.000 First cue 00:00:03.000 --> 00:00:06.000 Overlapping cue """ is_valid, errors = VTTEditor.validate_vtt(vtt_content) assert not is_valid assert any("Overlapping with previous cue" in error for error in errors) def test_validate_empty_text(self): """Test validation catches empty cue text""" vtt_content = """WEBVTT 00:00:01.000 --> 00:00:03.000 """ is_valid, errors = VTTEditor.validate_vtt(vtt_content) assert not is_valid assert any("Empty text content" in error for error in errors) def test_get_cue_count(self): """Test getting cue count from VTT""" vtt_content = """WEBVTT 00:00:01.000 --> 00:00:03.000 Hello world 00:00:04.500 --> 00:00:06.000 This is a test """ count = VTTEditor.get_cue_count(vtt_content) assert count == 2 def test_get_cue_count_invalid_vtt(self): """Test cue count returns 0 for invalid VTT""" count = VTTEditor.get_cue_count("invalid vtt content") assert count == 0 def test_get_total_duration(self): """Test getting total duration from VTT""" vtt_content = """WEBVTT 00:00:01.000 --> 00:00:03.000 Hello world 00:00:04.500 --> 00:00:06.000 This is a test """ duration = VTTEditor.get_total_duration(vtt_content) assert duration == 6.0 def test_get_total_duration_empty_vtt(self): """Test total duration returns 0 for empty VTT""" duration = VTTEditor.get_total_duration("WEBVTT\n") assert duration == 0.0 def test_get_total_duration_invalid_vtt(self): """Test total duration returns 0 for invalid VTT""" duration = VTTEditor.get_total_duration("invalid content") assert duration == 0.0 class TestVTTCue: """Test VTTCue dataclass""" def test_cue_creation(self): """Test creating a VTT cue""" cue = VTTCue( start_time=1.0, end_time=3.0, text="Hello world", identifier="cue1" ) assert cue.start_time == 1.0 assert cue.end_time == 3.0 assert cue.text == "Hello world" assert cue.identifier == "cue1" def test_cue_without_identifier(self): """Test creating a VTT cue without identifier""" cue = VTTCue(start_time=1.0, end_time=3.0, text="Hello world") assert cue.identifier is None