fix(glossary,vtt): 4 bugs — locale fallback, ingestion source, cue settings, overlap on save

- glossary_service: _get_translation now handles bare→specific fallback (fr→fr-FR);
  previously only specific→bare worked, causing zero term matches when job uses
  bare locale codes ("fr") but XLSX has region columns ("fr_fr" → "fr-FR")
- ingest_and_ai: use title + brand_context as glossary source text; previously
  empty brand_context caused glossary to be skipped entirely during AI ingestion
- routes_jobs.py: apply fix_overlapping_cues before validating PATCH /vtt;
  mirrors what AI generation already does — prevents save errors for minor overlaps
- frontend/vtt.ts: preserve raw cue settings (line:0%, align:end, etc.) through
  parse→build round-trip; previously settings were parsed into positionTop flag
  only and dropped on serialization, losing caption positioning after edit

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-05-13 16:58:13 +01:00
parent 69eff9ca9d
commit 16000a8bd9
4 changed files with 37 additions and 20 deletions

View file

@ -1618,8 +1618,9 @@ async def update_job_vtt_content(
# Validate and update captions VTT
if request.captions_vtt: # treat empty string same as None — nothing to update
# Validate VTT format
is_valid, errors = VTTEditor.validate_vtt(request.captions_vtt)
# Auto-fix minor overlaps before validation (mirrors AI-generation pipeline)
captions_vtt_fixed = VTTEditor.fix_overlapping_cues(request.captions_vtt)
is_valid, errors = VTTEditor.validate_vtt(captions_vtt_fixed)
if not is_valid:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
@ -1628,20 +1629,20 @@ async def update_job_vtt_content(
# Snapshot before overwriting live file
await vtt_versioning.create_version(
db, job_id, target_language, "captions", request.captions_vtt, current_user
db, job_id, target_language, "captions", captions_vtt_fixed, current_user
)
# Upload updated VTT
new_captions_uri = await upload_vtt_to_gcs(
request.captions_vtt,
captions_vtt_fixed,
f"{job_id}/{target_language}/captions.vtt"
)
lang_output["captions_vtt_gcs"] = new_captions_uri
# Validate and update audio description VTT
if request.audio_description_vtt: # treat empty string same as None — nothing to update
# Validate VTT format
is_valid, errors = VTTEditor.validate_vtt(request.audio_description_vtt)
ad_vtt_fixed = VTTEditor.fix_overlapping_cues(request.audio_description_vtt)
is_valid, errors = VTTEditor.validate_vtt(ad_vtt_fixed)
if not is_valid:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
@ -1666,7 +1667,7 @@ async def update_job_vtt_content(
except Exception as _e:
logger.warning(f"Could not read old AD VTT for diff: {_e}")
new_cues = [c["text"] for c in _parse_ad_cues_for_diff(request.audio_description_vtt)]
new_cues = [c["text"] for c in _parse_ad_cues_for_diff(ad_vtt_fixed)]
# Queue TTS regeneration for any cue whose text changed or that is newly added
edit_state = lang_output.get("accessible_video_edit_state") or {}
@ -1713,12 +1714,12 @@ async def update_job_vtt_content(
# Snapshot before overwriting live file
await vtt_versioning.create_version(
db, job_id, target_language, "ad", request.audio_description_vtt, current_user
db, job_id, target_language, "ad", ad_vtt_fixed, current_user
)
# Upload updated VTT
new_ad_uri = await upload_vtt_to_gcs(
request.audio_description_vtt,
ad_vtt_fixed,
f"{job_id}/{target_language}/ad.vtt"
)
lang_output["ad_vtt_gcs"] = new_ad_uri
@ -1731,7 +1732,7 @@ async def update_job_vtt_content(
generate_descriptive_transcript as _gen_transcript,
)
captions_text = request.captions_vtt
captions_text = captions_vtt_fixed if request.captions_vtt else None
if not captions_text:
cc_gcs = lang_output.get("captions_vtt_gcs")
if cc_gcs:
@ -1742,7 +1743,7 @@ async def update_job_vtt_content(
gcs_service.executor, _cc_blob.download_as_text
)
ad_text = request.audio_description_vtt
ad_text = ad_vtt_fixed if request.audio_description_vtt else None
if not ad_text:
ad_gcs = lang_output.get("ad_vtt_gcs")
if ad_gcs:

View file

@ -547,18 +547,26 @@ async def _vector_match(
def _get_translation(translations: dict[str, str], target_locale: str) -> str | None:
"""Look up a translation with locale-fallback: fr-CA → fr-FR → fr → None."""
"""Look up a translation with locale-fallback.
Specific bare: fr-CA fr-FR siblings fr
Bare specific: fr fr-FR, fr-CA (first match)
"""
if not translations or not target_locale:
return None
if target_locale in translations:
return translations[target_locale]
# Try parent language
parent = target_locale.split("-")[0] if "-" in target_locale else None
if parent:
# Try sibling locales, e.g. fr-CA not found → try fr-FR
if "-" in target_locale:
# Specific locale: try sibling regions and bare parent (fr-CA → fr-FR → fr)
parent = target_locale.split("-")[0]
for code, text in translations.items():
if code.startswith(parent + "-") or code == parent:
return text
else:
# Bare code (fr): try any fr-* region variant stored in the glossary
for code, text in translations.items():
if code == target_locale or code.startswith(target_locale + "-"):
return text
return None

View file

@ -169,10 +169,12 @@ async def ingest_and_ai_task_impl(job_id: str):
user_external_id=_cost_ctx["user_id"],
project_id=_cost_ctx["project_id"],
)
# Load glossary for source language — use brand context as vocabulary hint
# Load glossary for source language — use title + brand context for term matching
from ..services.glossary_service import get_glossary_block_for_job
_source_lang = job_doc.get("source", {}).get("language", "en")
_job_for_glossary = {**job_doc, "_glossary_source_text": brand_context or ""}
_job_title = job_doc.get("title") or ""
_source_for_glossary = " ".join(filter(None, [_job_title, brand_context]))
_job_for_glossary = {**job_doc, "_glossary_source_text": _source_for_glossary}
glossary_block = await get_glossary_block_for_job(_job_for_glossary, _source_lang, db)
ai_result = await gemini_service.extract_accessibility(
temp_path,

View file

@ -3,6 +3,8 @@ export interface VTTCue {
endTime: number; // seconds
text: string;
identifier?: string;
/** Raw cue settings string from the VTT timing line (e.g. "line:0% align:start") */
settings?: string;
/** When true, caption should be rendered at the top of the video (line:0% cue setting) */
positionTop?: boolean;
}
@ -54,6 +56,7 @@ export class VTTParser {
endTime,
text: textLines.join('\n'),
identifier,
settings: cueSettings.trim() || undefined,
...(positionTop ? { positionTop: true } : {})
});
}
@ -75,10 +78,13 @@ export class VTTParser {
lines.push(cue.identifier);
}
// Add timing line
// Add timing line (preserve cue settings like line:0%)
const startTimestamp = this.formatTimestamp(cue.startTime);
const endTimestamp = this.formatTimestamp(cue.endTime);
lines.push(`${startTimestamp} --> ${endTimestamp}`);
const timingLine = cue.settings
? `${startTimestamp} --> ${endTimestamp} ${cue.settings}`
: `${startTimestamp} --> ${endTimestamp}`;
lines.push(timingLine);
// Add text (can be multi-line)
lines.push(cue.text);