fix(vtt): auto-fix overlapping cues from AI-generated output

Gemini occasionally produces captions where a cue's start_time is earlier than the previous cue's end_time. Add VTTEditor.fix_overlapping_cues() that trims each cue's end_time to 1ms before the next cue's start, applied to both captions and AD VTT immediately after AI generation. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-08 13:23:08 +01:00 · 2026-05-08 13:23:08 +01:00 · c8a610b3f7
commit c8a610b3f7
parent 3371466e10
2 changed files with 21 additions and 0 deletions
--- a/backend/app/lib/vtt.py
+++ b/backend/app/lib/vtt.py
@ -207,6 +207,20 @@ class VTTEditor:

        return len(errors) == 0, errors

+    @staticmethod
+    def fix_overlapping_cues(vtt_content: str) -> str:
+        """Trim end_time of each cue so it does not overlap the next cue's start_time."""
+        cues = VTTParser.parse(vtt_content)
+        for i in range(1, len(cues)):
+            if cues[i].start_time < cues[i - 1].end_time:
+                # Clamp previous cue end to 1ms before next cue start
+                new_end = cues[i].start_time - 0.001
+                # Never let end_time go at or below start_time
+                if new_end <= cues[i - 1].start_time:
+                    new_end = cues[i - 1].start_time + 0.001
+                cues[i - 1].end_time = new_end
+        return VTTParser.build(cues)
+
    @staticmethod
    def get_cue_count(vtt_content: str) -> int:
        """Get the number of cues in VTT content"""
--- a/backend/app/tasks/ingest_and_ai.py
+++ b/backend/app/tasks/ingest_and_ai.py
@ -218,8 +218,15 @@ async def ingest_and_ai_task_impl(job_id: str):

                # Align caption timings with Whisper word-level timestamps (Bug 5)
                captions_vtt = await _align_captions_with_whisper(captions_vtt, temp_path, job_id)
+                # Fix overlapping cues that Gemini occasionally produces
+                captions_vtt = VTTEditor.fix_overlapping_cues(captions_vtt)
                ai_result["captions_vtt"] = captions_vtt

+                # Fix overlapping cues in AD VTT as well
+                ai_result["audio_description_vtt"] = VTTEditor.fix_overlapping_cues(
+                    ai_result["audio_description_vtt"]
+                )
+
                # Upload VTT files to GCS using detected language
                captions_gcs_uri = await upload_vtt_to_gcs(
                    ai_result["captions_vtt"],