fix(vtt): auto-fix overlapping cues from AI-generated output

Gemini occasionally produces captions where a cue's start_time is
earlier than the previous cue's end_time. Add VTTEditor.fix_overlapping_cues()
that trims each cue's end_time to 1ms before the next cue's start, applied
to both captions and AD VTT immediately after AI generation.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-05-08 13:23:08 +01:00
parent 3371466e10
commit c8a610b3f7
2 changed files with 21 additions and 0 deletions

View file

@ -207,6 +207,20 @@ class VTTEditor:
return len(errors) == 0, errors
@staticmethod
def fix_overlapping_cues(vtt_content: str) -> str:
"""Trim end_time of each cue so it does not overlap the next cue's start_time."""
cues = VTTParser.parse(vtt_content)
for i in range(1, len(cues)):
if cues[i].start_time < cues[i - 1].end_time:
# Clamp previous cue end to 1ms before next cue start
new_end = cues[i].start_time - 0.001
# Never let end_time go at or below start_time
if new_end <= cues[i - 1].start_time:
new_end = cues[i - 1].start_time + 0.001
cues[i - 1].end_time = new_end
return VTTParser.build(cues)
@staticmethod
def get_cue_count(vtt_content: str) -> int:
"""Get the number of cues in VTT content"""

View file

@ -218,8 +218,15 @@ async def ingest_and_ai_task_impl(job_id: str):
# Align caption timings with Whisper word-level timestamps (Bug 5)
captions_vtt = await _align_captions_with_whisper(captions_vtt, temp_path, job_id)
# Fix overlapping cues that Gemini occasionally produces
captions_vtt = VTTEditor.fix_overlapping_cues(captions_vtt)
ai_result["captions_vtt"] = captions_vtt
# Fix overlapping cues in AD VTT as well
ai_result["audio_description_vtt"] = VTTEditor.fix_overlapping_cues(
ai_result["audio_description_vtt"]
)
# Upload VTT files to GCS using detected language
captions_gcs_uri = await upload_vtt_to_gcs(
ai_result["captions_vtt"],