fix(glossary,vtt): 4 bugs — locale fallback, ingestion source, cue settings, overlap on save

- glossary_service: _get_translation now handles bare→specific fallback (fr→fr-FR); previously only specific→bare worked, causing zero term matches when job uses bare locale codes ("fr") but XLSX has region columns ("fr_fr" → "fr-FR") - ingest_and_ai: use title + brand_context as glossary source text; previously empty brand_context caused glossary to be skipped entirely during AI ingestion - routes_jobs.py: apply fix_overlapping_cues before validating PATCH /vtt; mirrors what AI generation already does — prevents save errors for minor overlaps - frontend/vtt.ts: preserve raw cue settings (line:0%, align:end, etc.) through parse→build round-trip; previously settings were parsed into positionTop flag only and dropped on serialization, losing caption positioning after edit Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-13 16:58:13 +01:00 · 2026-05-13 16:58:13 +01:00 · 16000a8bd9
commit 16000a8bd9
parent 69eff9ca9d
4 changed files with 37 additions and 20 deletions
--- a/backend/app/api/v1/routes_jobs.py
+++ b/backend/app/api/v1/routes_jobs.py
@ -1618,8 +1618,9 @@ async def update_job_vtt_content(

    # Validate and update captions VTT
    if request.captions_vtt:  # treat empty string same as None — nothing to update
-        # Validate VTT format
-        is_valid, errors = VTTEditor.validate_vtt(request.captions_vtt)
+        # Auto-fix minor overlaps before validation (mirrors AI-generation pipeline)
+        captions_vtt_fixed = VTTEditor.fix_overlapping_cues(request.captions_vtt)
+        is_valid, errors = VTTEditor.validate_vtt(captions_vtt_fixed)
        if not is_valid:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
@ -1628,20 +1629,20 @@ async def update_job_vtt_content(

        # Snapshot before overwriting live file
        await vtt_versioning.create_version(
-            db, job_id, target_language, "captions", request.captions_vtt, current_user
+            db, job_id, target_language, "captions", captions_vtt_fixed, current_user
        )

        # Upload updated VTT
        new_captions_uri = await upload_vtt_to_gcs(
-            request.captions_vtt,
+            captions_vtt_fixed,
            f"{job_id}/{target_language}/captions.vtt"
        )
        lang_output["captions_vtt_gcs"] = new_captions_uri

    # Validate and update audio description VTT
    if request.audio_description_vtt:  # treat empty string same as None — nothing to update
-        # Validate VTT format
-        is_valid, errors = VTTEditor.validate_vtt(request.audio_description_vtt)
+        ad_vtt_fixed = VTTEditor.fix_overlapping_cues(request.audio_description_vtt)
+        is_valid, errors = VTTEditor.validate_vtt(ad_vtt_fixed)
        if not is_valid:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
@ -1666,7 +1667,7 @@ async def update_job_vtt_content(
            except Exception as _e:
                logger.warning(f"Could not read old AD VTT for diff: {_e}")

-        new_cues = [c["text"] for c in _parse_ad_cues_for_diff(request.audio_description_vtt)]
+        new_cues = [c["text"] for c in _parse_ad_cues_for_diff(ad_vtt_fixed)]

        # Queue TTS regeneration for any cue whose text changed or that is newly added
        edit_state = lang_output.get("accessible_video_edit_state") or {}
@ -1713,12 +1714,12 @@ async def update_job_vtt_content(

        # Snapshot before overwriting live file
        await vtt_versioning.create_version(
-            db, job_id, target_language, "ad", request.audio_description_vtt, current_user
+            db, job_id, target_language, "ad", ad_vtt_fixed, current_user
        )

        # Upload updated VTT
        new_ad_uri = await upload_vtt_to_gcs(
-            request.audio_description_vtt,
+            ad_vtt_fixed,
            f"{job_id}/{target_language}/ad.vtt"
        )
        lang_output["ad_vtt_gcs"] = new_ad_uri
@ -1731,7 +1732,7 @@ async def update_job_vtt_content(
                generate_descriptive_transcript as _gen_transcript,
            )

-            captions_text = request.captions_vtt
+            captions_text = captions_vtt_fixed if request.captions_vtt else None
            if not captions_text:
                cc_gcs = lang_output.get("captions_vtt_gcs")
                if cc_gcs:
@ -1742,7 +1743,7 @@ async def update_job_vtt_content(
                        gcs_service.executor, _cc_blob.download_as_text
                    )

-            ad_text = request.audio_description_vtt
+            ad_text = ad_vtt_fixed if request.audio_description_vtt else None
            if not ad_text:
                ad_gcs = lang_output.get("ad_vtt_gcs")
                if ad_gcs:
--- a/backend/app/services/glossary_service.py
+++ b/backend/app/services/glossary_service.py
@ -547,18 +547,26 @@ async def _vector_match(


 def _get_translation(translations: dict[str, str], target_locale: str) -> str | None:
-    """Look up a translation with locale-fallback: fr-CA → fr-FR → fr → None."""
+    """Look up a translation with locale-fallback.
+
+    Specific → bare: fr-CA → fr-FR siblings → fr
+    Bare → specific: fr → fr-FR, fr-CA (first match)
+    """
    if not translations or not target_locale:
        return None
    if target_locale in translations:
        return translations[target_locale]
-    # Try parent language
-    parent = target_locale.split("-")[0] if "-" in target_locale else None
-    if parent:
-        # Try sibling locales, e.g. fr-CA not found → try fr-FR
+    if "-" in target_locale:
+        # Specific locale: try sibling regions and bare parent (fr-CA → fr-FR → fr)
+        parent = target_locale.split("-")[0]
        for code, text in translations.items():
            if code.startswith(parent + "-") or code == parent:
                return text
+    else:
+        # Bare code (fr): try any fr-* region variant stored in the glossary
+        for code, text in translations.items():
+            if code == target_locale or code.startswith(target_locale + "-"):
+                return text
    return None


--- a/backend/app/tasks/ingest_and_ai.py
+++ b/backend/app/tasks/ingest_and_ai.py
@ -169,10 +169,12 @@ async def ingest_and_ai_task_impl(job_id: str):
                    user_external_id=_cost_ctx["user_id"],
                    project_id=_cost_ctx["project_id"],
                )
-                # Load glossary for source language — use brand context as vocabulary hint
+                # Load glossary for source language — use title + brand context for term matching
                from ..services.glossary_service import get_glossary_block_for_job
                _source_lang = job_doc.get("source", {}).get("language", "en")
-                _job_for_glossary = {**job_doc, "_glossary_source_text": brand_context or ""}
+                _job_title = job_doc.get("title") or ""
+                _source_for_glossary = " ".join(filter(None, [_job_title, brand_context]))
+                _job_for_glossary = {**job_doc, "_glossary_source_text": _source_for_glossary}
                glossary_block = await get_glossary_block_for_job(_job_for_glossary, _source_lang, db)
                ai_result = await gemini_service.extract_accessibility(
                    temp_path,
--- a/frontend/src/lib/vtt.ts
+++ b/frontend/src/lib/vtt.ts
@ -3,6 +3,8 @@ export interface VTTCue {
  endTime: number;   // seconds
  text: string;
  identifier?: string;
+  /** Raw cue settings string from the VTT timing line (e.g. "line:0% align:start") */
+  settings?: string;
  /** When true, caption should be rendered at the top of the video (line:0% cue setting) */
  positionTop?: boolean;
 }
@ -54,6 +56,7 @@ export class VTTParser {
              endTime,
              text: textLines.join('\n'),
              identifier,
+              settings: cueSettings.trim() || undefined,
              ...(positionTop ? { positionTop: true } : {})
            });
          }
@ -75,10 +78,13 @@ export class VTTParser {
        lines.push(cue.identifier);
      }
      
-      // Add timing line
+      // Add timing line (preserve cue settings like line:0%)
      const startTimestamp = this.formatTimestamp(cue.startTime);
      const endTimestamp = this.formatTimestamp(cue.endTime);
-      lines.push(`${startTimestamp} --> ${endTimestamp}`);
+      const timingLine = cue.settings
+        ? `${startTimestamp} --> ${endTimestamp} ${cue.settings}`
+        : `${startTimestamp} --> ${endTimestamp}`;
+      lines.push(timingLine);
      
      // Add text (can be multi-line)
      lines.push(cue.text);