fix(glossary,vtt): 4 bugs — locale fallback, ingestion source, cue settings, overlap on save
- glossary_service: _get_translation now handles bare→specific fallback (fr→fr-FR);
previously only specific→bare worked, causing zero term matches when job uses
bare locale codes ("fr") but XLSX has region columns ("fr_fr" → "fr-FR")
- ingest_and_ai: use title + brand_context as glossary source text; previously
empty brand_context caused glossary to be skipped entirely during AI ingestion
- routes_jobs.py: apply fix_overlapping_cues before validating PATCH /vtt;
mirrors what AI generation already does — prevents save errors for minor overlaps
- frontend/vtt.ts: preserve raw cue settings (line:0%, align:end, etc.) through
parse→build round-trip; previously settings were parsed into positionTop flag
only and dropped on serialization, losing caption positioning after edit
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
69eff9ca9d
commit
16000a8bd9
4 changed files with 37 additions and 20 deletions
|
|
@ -1618,8 +1618,9 @@ async def update_job_vtt_content(
|
|||
|
||||
# Validate and update captions VTT
|
||||
if request.captions_vtt: # treat empty string same as None — nothing to update
|
||||
# Validate VTT format
|
||||
is_valid, errors = VTTEditor.validate_vtt(request.captions_vtt)
|
||||
# Auto-fix minor overlaps before validation (mirrors AI-generation pipeline)
|
||||
captions_vtt_fixed = VTTEditor.fix_overlapping_cues(request.captions_vtt)
|
||||
is_valid, errors = VTTEditor.validate_vtt(captions_vtt_fixed)
|
||||
if not is_valid:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
|
|
@ -1628,20 +1629,20 @@ async def update_job_vtt_content(
|
|||
|
||||
# Snapshot before overwriting live file
|
||||
await vtt_versioning.create_version(
|
||||
db, job_id, target_language, "captions", request.captions_vtt, current_user
|
||||
db, job_id, target_language, "captions", captions_vtt_fixed, current_user
|
||||
)
|
||||
|
||||
# Upload updated VTT
|
||||
new_captions_uri = await upload_vtt_to_gcs(
|
||||
request.captions_vtt,
|
||||
captions_vtt_fixed,
|
||||
f"{job_id}/{target_language}/captions.vtt"
|
||||
)
|
||||
lang_output["captions_vtt_gcs"] = new_captions_uri
|
||||
|
||||
# Validate and update audio description VTT
|
||||
if request.audio_description_vtt: # treat empty string same as None — nothing to update
|
||||
# Validate VTT format
|
||||
is_valid, errors = VTTEditor.validate_vtt(request.audio_description_vtt)
|
||||
ad_vtt_fixed = VTTEditor.fix_overlapping_cues(request.audio_description_vtt)
|
||||
is_valid, errors = VTTEditor.validate_vtt(ad_vtt_fixed)
|
||||
if not is_valid:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
|
|
@ -1666,7 +1667,7 @@ async def update_job_vtt_content(
|
|||
except Exception as _e:
|
||||
logger.warning(f"Could not read old AD VTT for diff: {_e}")
|
||||
|
||||
new_cues = [c["text"] for c in _parse_ad_cues_for_diff(request.audio_description_vtt)]
|
||||
new_cues = [c["text"] for c in _parse_ad_cues_for_diff(ad_vtt_fixed)]
|
||||
|
||||
# Queue TTS regeneration for any cue whose text changed or that is newly added
|
||||
edit_state = lang_output.get("accessible_video_edit_state") or {}
|
||||
|
|
@ -1713,12 +1714,12 @@ async def update_job_vtt_content(
|
|||
|
||||
# Snapshot before overwriting live file
|
||||
await vtt_versioning.create_version(
|
||||
db, job_id, target_language, "ad", request.audio_description_vtt, current_user
|
||||
db, job_id, target_language, "ad", ad_vtt_fixed, current_user
|
||||
)
|
||||
|
||||
# Upload updated VTT
|
||||
new_ad_uri = await upload_vtt_to_gcs(
|
||||
request.audio_description_vtt,
|
||||
ad_vtt_fixed,
|
||||
f"{job_id}/{target_language}/ad.vtt"
|
||||
)
|
||||
lang_output["ad_vtt_gcs"] = new_ad_uri
|
||||
|
|
@ -1731,7 +1732,7 @@ async def update_job_vtt_content(
|
|||
generate_descriptive_transcript as _gen_transcript,
|
||||
)
|
||||
|
||||
captions_text = request.captions_vtt
|
||||
captions_text = captions_vtt_fixed if request.captions_vtt else None
|
||||
if not captions_text:
|
||||
cc_gcs = lang_output.get("captions_vtt_gcs")
|
||||
if cc_gcs:
|
||||
|
|
@ -1742,7 +1743,7 @@ async def update_job_vtt_content(
|
|||
gcs_service.executor, _cc_blob.download_as_text
|
||||
)
|
||||
|
||||
ad_text = request.audio_description_vtt
|
||||
ad_text = ad_vtt_fixed if request.audio_description_vtt else None
|
||||
if not ad_text:
|
||||
ad_gcs = lang_output.get("ad_vtt_gcs")
|
||||
if ad_gcs:
|
||||
|
|
|
|||
|
|
@ -547,18 +547,26 @@ async def _vector_match(
|
|||
|
||||
|
||||
def _get_translation(translations: dict[str, str], target_locale: str) -> str | None:
|
||||
"""Look up a translation with locale-fallback: fr-CA → fr-FR → fr → None."""
|
||||
"""Look up a translation with locale-fallback.
|
||||
|
||||
Specific → bare: fr-CA → fr-FR siblings → fr
|
||||
Bare → specific: fr → fr-FR, fr-CA (first match)
|
||||
"""
|
||||
if not translations or not target_locale:
|
||||
return None
|
||||
if target_locale in translations:
|
||||
return translations[target_locale]
|
||||
# Try parent language
|
||||
parent = target_locale.split("-")[0] if "-" in target_locale else None
|
||||
if parent:
|
||||
# Try sibling locales, e.g. fr-CA not found → try fr-FR
|
||||
if "-" in target_locale:
|
||||
# Specific locale: try sibling regions and bare parent (fr-CA → fr-FR → fr)
|
||||
parent = target_locale.split("-")[0]
|
||||
for code, text in translations.items():
|
||||
if code.startswith(parent + "-") or code == parent:
|
||||
return text
|
||||
else:
|
||||
# Bare code (fr): try any fr-* region variant stored in the glossary
|
||||
for code, text in translations.items():
|
||||
if code == target_locale or code.startswith(target_locale + "-"):
|
||||
return text
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -169,10 +169,12 @@ async def ingest_and_ai_task_impl(job_id: str):
|
|||
user_external_id=_cost_ctx["user_id"],
|
||||
project_id=_cost_ctx["project_id"],
|
||||
)
|
||||
# Load glossary for source language — use brand context as vocabulary hint
|
||||
# Load glossary for source language — use title + brand context for term matching
|
||||
from ..services.glossary_service import get_glossary_block_for_job
|
||||
_source_lang = job_doc.get("source", {}).get("language", "en")
|
||||
_job_for_glossary = {**job_doc, "_glossary_source_text": brand_context or ""}
|
||||
_job_title = job_doc.get("title") or ""
|
||||
_source_for_glossary = " ".join(filter(None, [_job_title, brand_context]))
|
||||
_job_for_glossary = {**job_doc, "_glossary_source_text": _source_for_glossary}
|
||||
glossary_block = await get_glossary_block_for_job(_job_for_glossary, _source_lang, db)
|
||||
ai_result = await gemini_service.extract_accessibility(
|
||||
temp_path,
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ export interface VTTCue {
|
|||
endTime: number; // seconds
|
||||
text: string;
|
||||
identifier?: string;
|
||||
/** Raw cue settings string from the VTT timing line (e.g. "line:0% align:start") */
|
||||
settings?: string;
|
||||
/** When true, caption should be rendered at the top of the video (line:0% cue setting) */
|
||||
positionTop?: boolean;
|
||||
}
|
||||
|
|
@ -54,6 +56,7 @@ export class VTTParser {
|
|||
endTime,
|
||||
text: textLines.join('\n'),
|
||||
identifier,
|
||||
settings: cueSettings.trim() || undefined,
|
||||
...(positionTop ? { positionTop: true } : {})
|
||||
});
|
||||
}
|
||||
|
|
@ -75,10 +78,13 @@ export class VTTParser {
|
|||
lines.push(cue.identifier);
|
||||
}
|
||||
|
||||
// Add timing line
|
||||
// Add timing line (preserve cue settings like line:0%)
|
||||
const startTimestamp = this.formatTimestamp(cue.startTime);
|
||||
const endTimestamp = this.formatTimestamp(cue.endTime);
|
||||
lines.push(`${startTimestamp} --> ${endTimestamp}`);
|
||||
const timingLine = cue.settings
|
||||
? `${startTimestamp} --> ${endTimestamp} ${cue.settings}`
|
||||
: `${startTimestamp} --> ${endTimestamp}`;
|
||||
lines.push(timingLine);
|
||||
|
||||
// Add text (can be multi-line)
|
||||
lines.push(cue.text);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue