From 16000a8bd9377e79007209da73b56c25de50bb21 Mon Sep 17 00:00:00 2001 From: Vadym Samoilenko Date: Wed, 13 May 2026 16:58:13 +0100 Subject: [PATCH] =?UTF-8?q?fix(glossary,vtt):=204=20bugs=20=E2=80=94=20loc?= =?UTF-8?q?ale=20fallback,=20ingestion=20source,=20cue=20settings,=20overl?= =?UTF-8?q?ap=20on=20save?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - glossary_service: _get_translation now handles bare→specific fallback (fr→fr-FR); previously only specific→bare worked, causing zero term matches when job uses bare locale codes ("fr") but XLSX has region columns ("fr_fr" → "fr-FR") - ingest_and_ai: use title + brand_context as glossary source text; previously empty brand_context caused glossary to be skipped entirely during AI ingestion - routes_jobs.py: apply fix_overlapping_cues before validating PATCH /vtt; mirrors what AI generation already does — prevents save errors for minor overlaps - frontend/vtt.ts: preserve raw cue settings (line:0%, align:end, etc.) through parse→build round-trip; previously settings were parsed into positionTop flag only and dropped on serialization, losing caption positioning after edit Co-Authored-By: Claude Sonnet 4.6 --- backend/app/api/v1/routes_jobs.py | 23 ++++++++++++----------- backend/app/services/glossary_service.py | 18 +++++++++++++----- backend/app/tasks/ingest_and_ai.py | 6 ++++-- frontend/src/lib/vtt.ts | 10 ++++++++-- 4 files changed, 37 insertions(+), 20 deletions(-) diff --git a/backend/app/api/v1/routes_jobs.py b/backend/app/api/v1/routes_jobs.py index 4eaab46..2576c8b 100644 --- a/backend/app/api/v1/routes_jobs.py +++ b/backend/app/api/v1/routes_jobs.py @@ -1618,8 +1618,9 @@ async def update_job_vtt_content( # Validate and update captions VTT if request.captions_vtt: # treat empty string same as None — nothing to update - # Validate VTT format - is_valid, errors = VTTEditor.validate_vtt(request.captions_vtt) + # Auto-fix minor overlaps before validation (mirrors AI-generation pipeline) + captions_vtt_fixed = VTTEditor.fix_overlapping_cues(request.captions_vtt) + is_valid, errors = VTTEditor.validate_vtt(captions_vtt_fixed) if not is_valid: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, @@ -1628,20 +1629,20 @@ async def update_job_vtt_content( # Snapshot before overwriting live file await vtt_versioning.create_version( - db, job_id, target_language, "captions", request.captions_vtt, current_user + db, job_id, target_language, "captions", captions_vtt_fixed, current_user ) # Upload updated VTT new_captions_uri = await upload_vtt_to_gcs( - request.captions_vtt, + captions_vtt_fixed, f"{job_id}/{target_language}/captions.vtt" ) lang_output["captions_vtt_gcs"] = new_captions_uri # Validate and update audio description VTT if request.audio_description_vtt: # treat empty string same as None — nothing to update - # Validate VTT format - is_valid, errors = VTTEditor.validate_vtt(request.audio_description_vtt) + ad_vtt_fixed = VTTEditor.fix_overlapping_cues(request.audio_description_vtt) + is_valid, errors = VTTEditor.validate_vtt(ad_vtt_fixed) if not is_valid: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, @@ -1666,7 +1667,7 @@ async def update_job_vtt_content( except Exception as _e: logger.warning(f"Could not read old AD VTT for diff: {_e}") - new_cues = [c["text"] for c in _parse_ad_cues_for_diff(request.audio_description_vtt)] + new_cues = [c["text"] for c in _parse_ad_cues_for_diff(ad_vtt_fixed)] # Queue TTS regeneration for any cue whose text changed or that is newly added edit_state = lang_output.get("accessible_video_edit_state") or {} @@ -1713,12 +1714,12 @@ async def update_job_vtt_content( # Snapshot before overwriting live file await vtt_versioning.create_version( - db, job_id, target_language, "ad", request.audio_description_vtt, current_user + db, job_id, target_language, "ad", ad_vtt_fixed, current_user ) # Upload updated VTT new_ad_uri = await upload_vtt_to_gcs( - request.audio_description_vtt, + ad_vtt_fixed, f"{job_id}/{target_language}/ad.vtt" ) lang_output["ad_vtt_gcs"] = new_ad_uri @@ -1731,7 +1732,7 @@ async def update_job_vtt_content( generate_descriptive_transcript as _gen_transcript, ) - captions_text = request.captions_vtt + captions_text = captions_vtt_fixed if request.captions_vtt else None if not captions_text: cc_gcs = lang_output.get("captions_vtt_gcs") if cc_gcs: @@ -1742,7 +1743,7 @@ async def update_job_vtt_content( gcs_service.executor, _cc_blob.download_as_text ) - ad_text = request.audio_description_vtt + ad_text = ad_vtt_fixed if request.audio_description_vtt else None if not ad_text: ad_gcs = lang_output.get("ad_vtt_gcs") if ad_gcs: diff --git a/backend/app/services/glossary_service.py b/backend/app/services/glossary_service.py index e079f80..566f123 100644 --- a/backend/app/services/glossary_service.py +++ b/backend/app/services/glossary_service.py @@ -547,18 +547,26 @@ async def _vector_match( def _get_translation(translations: dict[str, str], target_locale: str) -> str | None: - """Look up a translation with locale-fallback: fr-CA → fr-FR → fr → None.""" + """Look up a translation with locale-fallback. + + Specific → bare: fr-CA → fr-FR siblings → fr + Bare → specific: fr → fr-FR, fr-CA (first match) + """ if not translations or not target_locale: return None if target_locale in translations: return translations[target_locale] - # Try parent language - parent = target_locale.split("-")[0] if "-" in target_locale else None - if parent: - # Try sibling locales, e.g. fr-CA not found → try fr-FR + if "-" in target_locale: + # Specific locale: try sibling regions and bare parent (fr-CA → fr-FR → fr) + parent = target_locale.split("-")[0] for code, text in translations.items(): if code.startswith(parent + "-") or code == parent: return text + else: + # Bare code (fr): try any fr-* region variant stored in the glossary + for code, text in translations.items(): + if code == target_locale or code.startswith(target_locale + "-"): + return text return None diff --git a/backend/app/tasks/ingest_and_ai.py b/backend/app/tasks/ingest_and_ai.py index 1c7fd05..c4b5f7e 100644 --- a/backend/app/tasks/ingest_and_ai.py +++ b/backend/app/tasks/ingest_and_ai.py @@ -169,10 +169,12 @@ async def ingest_and_ai_task_impl(job_id: str): user_external_id=_cost_ctx["user_id"], project_id=_cost_ctx["project_id"], ) - # Load glossary for source language — use brand context as vocabulary hint + # Load glossary for source language — use title + brand context for term matching from ..services.glossary_service import get_glossary_block_for_job _source_lang = job_doc.get("source", {}).get("language", "en") - _job_for_glossary = {**job_doc, "_glossary_source_text": brand_context or ""} + _job_title = job_doc.get("title") or "" + _source_for_glossary = " ".join(filter(None, [_job_title, brand_context])) + _job_for_glossary = {**job_doc, "_glossary_source_text": _source_for_glossary} glossary_block = await get_glossary_block_for_job(_job_for_glossary, _source_lang, db) ai_result = await gemini_service.extract_accessibility( temp_path, diff --git a/frontend/src/lib/vtt.ts b/frontend/src/lib/vtt.ts index dfcb43a..156305b 100644 --- a/frontend/src/lib/vtt.ts +++ b/frontend/src/lib/vtt.ts @@ -3,6 +3,8 @@ export interface VTTCue { endTime: number; // seconds text: string; identifier?: string; + /** Raw cue settings string from the VTT timing line (e.g. "line:0% align:start") */ + settings?: string; /** When true, caption should be rendered at the top of the video (line:0% cue setting) */ positionTop?: boolean; } @@ -54,6 +56,7 @@ export class VTTParser { endTime, text: textLines.join('\n'), identifier, + settings: cueSettings.trim() || undefined, ...(positionTop ? { positionTop: true } : {}) }); } @@ -75,10 +78,13 @@ export class VTTParser { lines.push(cue.identifier); } - // Add timing line + // Add timing line (preserve cue settings like line:0%) const startTimestamp = this.formatTimestamp(cue.startTime); const endTimestamp = this.formatTimestamp(cue.endTime); - lines.push(`${startTimestamp} --> ${endTimestamp}`); + const timingLine = cue.settings + ? `${startTimestamp} --> ${endTimestamp} ${cue.settings}` + : `${startTimestamp} --> ${endTimestamp}`; + lines.push(timingLine); // Add text (can be multi-line) lines.push(cue.text);