diff --git a/docs/superpowers/plans/2026-05-15-srt-qc.md b/docs/superpowers/plans/2026-05-15-srt-qc.md new file mode 100644 index 0000000..451c40b --- /dev/null +++ b/docs/superpowers/plans/2026-05-15-srt-qc.md @@ -0,0 +1,1568 @@ +# SRT Subtitle QC Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add SRT subtitle QC to the existing Video QC pipeline — fuzzy filename pairing of SRTs to videos, plus three new checks (`srt_structure`, `srt_timing`, `srt_language`) that run alongside `visual_quality`, `censorship`, `price_currency`, etc. + +**Architecture:** New `modules/video_qc/utils/srt_pairing.py` holds pure-function pairing helpers. `VideoQCExecutor` gains an `srt_path` constructor parameter and three new check methods (deterministic structure + timing, text-only Gemini language detection). `BatchVideoQCExecutor` runs `pair_batch(videos, srts)` at the top of `execute()` and threads the resulting `srt_path` into each per-video executor. `routes.py` accepts `.srt` uploads alongside `.mp4`, surfaces a pre-flight pairing summary in the configure view, and the unpaired SRTs are listed in a collapsible section. Unified Video QC report — three new cards appear inline, with `skipped` status when no SRT was paired. + +**Tech Stack:** Python 3.13, Flask, `srt` (PyPI library — pure Python SRT parser), existing `LLMConfig.call_llm_api` (Gemini 2.5 Flash, text-only — much cheaper than video calls), existing `get_video_metadata` for duration. + +**Source spec:** `docs/superpowers/specs/2026-05-15-srt-qc-design.md` + +**Execution prerequisites:** Be on branch `develop`. Plan 1 (`2026-05-15-video-qc-tuning.md`) does not need to be implemented first, but if both are being executed in the same window, do Plan 1 first to avoid trivial merge conflicts in `executor.py` and `profiles.yaml`. + +**Test infrastructure:** The codebase has no pytest setup. Pure-function helpers in this plan get inline Python REPL verification (using the asset folder `testing_15may/srt/`); LLM-integrated and Flask-wired code gets manual smoke tests through the web UI. + +--- + +## Task 1: Add the `srt` library to requirements.txt + +**Files:** +- Modify: `requirements.txt` + +- [ ] **Step 1: Append the library** + +Open `requirements.txt`. Find the existing block (e.g. video processing section around moviepy/scenedetect) and add this line at the end of the relevant section (or at the very end of the file): + +``` +# SRT subtitle parsing (Video QC — SRT checks) +srt==3.5.3 +``` + +- [ ] **Step 2: Install into the venv** + +```bash +cd /Users/nickviljoen/Desktop/HM_QC_Bitbucket/hm_ai_qc_report_tool +source venv/bin/activate +pip install srt==3.5.3 +python -c "import srt; print(srt.__version__)" +``` + +Expected output: `3.5.3`. + +- [ ] **Step 3: Commit** + +```bash +git add requirements.txt +git commit -m "SRT QC: add srt library to requirements + +Pure-Python SRT parser used by the upcoming srt_structure and srt_timing +checks. Pinned to 3.5.3." +``` + +--- + +## Task 2: Create `srt_pairing.py` with `normalise_slug` + `canonical_locale` + +Pure functions, no Flask/SQLAlchemy. The whole module is built incrementally over Tasks 2–4. + +**Files:** +- Create: `modules/video_qc/utils/srt_pairing.py` + +- [ ] **Step 1: Create the module with the two simplest helpers** + +```python +""" +SRT ↔ Video filename pairing helpers. + +Pure functions. Used by BatchVideoQCExecutor at pre-flight to pair each +SRT file to its video, even when filenames diverge in style (e.g. +`CFUL262B01_PP_RIO_INTRO_15C.srt_8852296_de-AT.srt` should pair with +`7147775_AT-de_CFUL262B01_PP_RIO_INTRO_15C_4x5_SoMe_MASTER.mp4`). + +Token parsing extracts campaign_code, clip_slug, and locale from each +filename. score_pair() combines them into a 0.0–1.0 score; pair_batch() +greedily assigns SRTs to videos above a 0.7 threshold. + +Verified at REPL against the test folder testing_15may/srt/. +""" +import os +import re +from typing import Iterable + +# Campaign-code regex — see core/utils/campaign_code.py for the canonical +# version. We re-declare here to keep this module dependency-free, but the +# patterns must stay in sync. Two formats supported: +# • Legacy: 4 digits + optional uppercase letter (1013A, 4116) +# • New: 11-char alphanumeric with year at positions 5-6 (CFUL263C01D) +_CAMPAIGN_RE = re.compile( + r"\b(" + r"\d{4}[A-Z]?" # legacy + r"|[A-Z]{4}\d{3}[A-Z]\d{2}[A-Z]" # new + r")\b" +) + + +def normalise_slug(s: str) -> str: + """Lowercase and strip every non-alphanumeric character. + + `PP_RIO_INTRO_15C` -> `ppriointro15c` + `RIO_INTRO15C` -> `riointro15c` + `RIO-INTRO 15c` -> `riointro15c` + + The slug pair (video_slug, srt_slug) is then compared by substring + containment in score_pair(), so a video with a `PP_` prefix that the + SRT lacks will still pair: `ppriointro15c` contains `riointro15c` + (no — actually `priointro15c` is the substring; `ppriointro15c` and + `riointro15c` overlap but neither is a substring of the other). The + real test data shows this works: SRT `CFUL262B01_PP_RIO_INTRO_15C…` + pairs with video containing `PP_RIO_INTRO_15C` because both end with + `ppriointro15c` — they share that suffix. The substring check in + score_pair() is implemented carefully to allow this. + """ + if not s: + return '' + return re.sub(r"[^a-z0-9]", "", s.lower()) + + +def canonical_locale(s: str) -> str | None: + """Canonicalise locale strings to 'lang-MARKET' (lowercase-Uppercase). + + Accepts: 'AT-de', 'de-AT', 'de_AT', 'AT_de', 'de-at', etc. + Returns 'de-AT' for any of the above. None when input doesn't look + like a 2-2 locale pair. + + Heuristic: of the two halves, the all-uppercase shape is the market. + If both/neither are uppercase, treat the first as market (H&M + convention for video filenames is Market_lang). + """ + if not s: + return None + m = re.match(r"^([A-Za-z]{2})[-_]([A-Za-z]{2})$", s.strip()) + if not m: + return None + a, b = m.group(1), m.group(2) + if a.isupper() and b.islower(): + market, lang = a.upper(), b.lower() + elif b.isupper() and a.islower(): + market, lang = b.upper(), a.lower() + else: + # Ambiguous case shape — assume Market_lang (H&M video convention). + market, lang = a.upper(), b.lower() + return f"{lang}-{market}" +``` + +- [ ] **Step 2: Verify at REPL** + +```bash +python -c " +from modules.video_qc.utils.srt_pairing import normalise_slug, canonical_locale +print(normalise_slug('PP_RIO_INTRO_15C')) # -> ppriointro15c +print(normalise_slug('RIO_INTRO15C')) # -> riointro15c +print(normalise_slug('RIO-INTRO 15c')) # -> riointro15c +print(normalise_slug('')) # -> (empty) +print('---') +print(canonical_locale('AT-de')) # -> de-AT +print(canonical_locale('de-AT')) # -> de-AT +print(canonical_locale('de_AT')) # -> de-AT +print(canonical_locale('CH-en')) # -> en-CH +print(canonical_locale('en-CH')) # -> en-CH +print(canonical_locale('xx')) # -> None +print(canonical_locale('')) # -> None +" +``` + +Expected to match the inline comments. + +- [ ] **Step 3: Commit** + +```bash +git add modules/video_qc/utils/srt_pairing.py +git commit -m "SRT QC: scaffold srt_pairing module with normalise_slug + canonical_locale + +Pure-function helpers, verified at REPL. canonical_locale handles the +de-AT ↔ AT-de order flip between SRT and video filenames; normalise_slug +strips non-alphanumerics so RIO_INTRO_15C ≈ RIO_INTRO15C." +``` + +--- + +## Task 3: Add `parse_video_tokens` and `parse_srt_tokens` to `srt_pairing.py` + +**Files:** +- Modify: `modules/video_qc/utils/srt_pairing.py` + +- [ ] **Step 1: Append the parsers** + +Append at the end of `srt_pairing.py`: + +```python +def parse_video_tokens(filename: str) -> dict: + """Extract {campaign_code, clip_slug, locale} from a video filename. + + Video filename forms (H&M conventions): + • `7147775_AT-de_CFUL262B01_PP_RIO_INTRO_15C_4x5_SoMe_MASTER.mp4` + • `6898354_1013A_SPRING_W_10A_TT_9x16_TK_Stories_Vogue_ES-es.mp4` + + clip_slug = everything between (locale OR campaign_code) and any + aspect-ratio token (1x1 / 4x5 / 9x16) or the file extension. We + normalise it via normalise_slug() before storage. + """ + base = os.path.splitext(os.path.basename(filename))[0] + parts = base.split('_') + + # Locale: scan for a token that canonicalises + locale = None + for p in parts: + cl = canonical_locale(p) + if cl: + locale = cl + break + + # Campaign code: scan tokens, then full string fallback + campaign_code = None + for p in parts: + if _CAMPAIGN_RE.fullmatch(p): + campaign_code = p + break + if not campaign_code: + m = _CAMPAIGN_RE.search(base) + if m: + campaign_code = m.group(1) + + # Clip slug: drop leading numeric ID, leading locale, leading code, + # trailing aspect-ratio + platform + locale tokens. What remains is + # the slug. We do a quick filter pass. + slug_parts = [] + aspect_pat = re.compile(r"^\d+x\d+$", re.IGNORECASE) + for p in parts: + if p.isdigit(): + continue + if canonical_locale(p): + continue + if _CAMPAIGN_RE.fullmatch(p): + continue + if aspect_pat.match(p): + continue + slug_parts.append(p) + + raw_slug = '_'.join(slug_parts) + return { + 'campaign_code': campaign_code, + 'clip_slug': normalise_slug(raw_slug), + 'locale': locale, + '_raw_slug_parts': slug_parts, # for debug + } + + +def parse_srt_tokens(filename: str) -> dict: + """Extract {campaign_code, clip_slug, locale} from an SRT filename. + + Two known forms (both seen in testing_15may/srt/): + A. `CFUL262B01_PP_RIO_INTRO_15C.srt_8852296_de-AT.srt` + — campaign code + clip slug + locale all present, with an + internal `.srt__` artifact from the upstream tool. + B. `RIO_INTRO6B_en-CH.srt` + — no campaign code; clip slug and locale only. + + We strip ALL `.srt` occurrences from the base, drop leading numeric + IDs, and run the same campaign/locale/slug extraction as parse_video_tokens. + """ + base = os.path.basename(filename) + # Strip every .srt occurrence (handles `.srt__.srt` form) + base = base.replace('.srt', '') + parts = base.split('_') + + locale = None + for p in parts: + cl = canonical_locale(p) + if cl: + locale = cl + break + + campaign_code = None + for p in parts: + if _CAMPAIGN_RE.fullmatch(p): + campaign_code = p + break + if not campaign_code: + m = _CAMPAIGN_RE.search(base) + if m: + campaign_code = m.group(1) + + slug_parts = [] + for p in parts: + if p.isdigit(): + continue + if canonical_locale(p): + continue + if _CAMPAIGN_RE.fullmatch(p): + continue + if not p: + continue + slug_parts.append(p) + + raw_slug = '_'.join(slug_parts) + return { + 'campaign_code': campaign_code, + 'clip_slug': normalise_slug(raw_slug), + 'locale': locale, + '_raw_slug_parts': slug_parts, + } +``` + +- [ ] **Step 2: Verify at REPL against the test folder** + +```bash +python -c " +from modules.video_qc.utils.srt_pairing import parse_video_tokens, parse_srt_tokens +videos = [ + '7147775_AT-de_CFUL262B01_PP_RIO_INTRO_15C_4x5_SoMe_MASTER.mp4', + '7158980_CH-en_CFUL262B01_PP_RIO_INTRO_6B_1x1_SoMe_MASTER.mp4', +] +srts = [ + 'CFUL262B01_PP_RIO_INTRO_15C.srt_8852357_de-AT.srt', + 'RIO_INTRO6B_en-CH.srt', +] +print('VIDEOS') +for v in videos: + t = parse_video_tokens(v) + print(f' {v[:50]:50s} -> code={t[\"campaign_code\"]} loc={t[\"locale\"]} slug={t[\"clip_slug\"]}') +print('SRTS') +for s in srts: + t = parse_srt_tokens(s) + print(f' {s[:50]:50s} -> code={t[\"campaign_code\"]} loc={t[\"locale\"]} slug={t[\"clip_slug\"]}') +" +``` + +Expected output: +``` +VIDEOS + 7147775_AT-de_CFUL262B01_PP_RIO_INTRO_15C_4x5_SoM... -> code=CFUL262B01 loc=de-AT slug=ppriointro15csomemaster + 7158980_CH-en_CFUL262B01_PP_RIO_INTRO_6B_1x1_SoMe... -> code=CFUL262B01 loc=en-CH slug=ppriointro6bsomemaster +SRTS + CFUL262B01_PP_RIO_INTRO_15C.srt_8852357_de-AT.srt -> code=CFUL262B01 loc=de-AT slug=ppriointro15c + RIO_INTRO6B_en-CH.srt -> code=None loc=en-CH slug=riointro6b +``` + +The pair-able slug fragments: +- Video1 slug `ppriointro15csomemaster` contains SRT1 slug `ppriointro15c` → substring match ✓ (handled in Task 4's `score_pair`). +- Video2 slug `ppriointro6bsomemaster` contains SRT2 slug `riointro6b`? Yes — `priointro6b` is in the video's slug, but we need `riointro6b` exactly. Check: `ppriointro6bsomemaster`.find(`riointro6b`) → 1 (matches starting at index 1). ✓ + +If the output doesn't show the expected `code` / `loc` / `slug`, edit the parser and re-run the REPL command. + +- [ ] **Step 3: Commit** + +```bash +git add modules/video_qc/utils/srt_pairing.py +git commit -m "SRT QC: add parse_video_tokens + parse_srt_tokens + +Extract campaign code, clip slug, and locale from both video and SRT +filenames. Handles the two SRT styles seen in testing_15may/srt/ +(campaign-code-prefixed CFUL... form, and abbreviated RIO_INTRO6B form). +Verified at REPL against test data." +``` + +--- + +## Task 4: Add `score_pair` and `pair_batch` to `srt_pairing.py` + +**Files:** +- Modify: `modules/video_qc/utils/srt_pairing.py` + +- [ ] **Step 1: Append the scoring + batching helpers** + +```python +PAIR_THRESHOLD = 0.7 + + +def score_pair(video_tokens: dict, srt_tokens: dict) -> float: + """Score how confidently this SRT belongs to this video. 0.0..1.0. + + Weights (additive, capped at 1.0): + • Locale match (both present, equal after canonicalisation): 0.5 + • Campaign code match (both present, equal): 0.3 + • Clip slug match (one is substring of the other, after + normalise_slug — see parse_*_tokens): 0.4 + + Hard zero rules: + • Both locales present AND differ -> 0.0 + • Both slugs present AND neither is substring of the other -> 0.0 + """ + v_loc = video_tokens.get('locale') + s_loc = srt_tokens.get('locale') + v_code = video_tokens.get('campaign_code') + s_code = srt_tokens.get('campaign_code') + v_slug = video_tokens.get('clip_slug') or '' + s_slug = srt_tokens.get('clip_slug') or '' + + # Hard reject: locales both present and divergent + if v_loc and s_loc and v_loc != s_loc: + return 0.0 + # Hard reject: slugs both present and neither contains the other + if v_slug and s_slug and v_slug not in s_slug and s_slug not in v_slug: + return 0.0 + + score = 0.0 + if v_loc and s_loc and v_loc == s_loc: + score += 0.5 + if v_code and s_code and v_code == s_code: + score += 0.3 + if v_slug and s_slug and (v_slug in s_slug or s_slug in v_slug): + score += 0.4 + + return min(score, 1.0) + + +def pair_batch( + video_paths: Iterable[str], + srt_paths: Iterable[str], +) -> tuple[dict, list[str], list[str]]: + """Pair each SRT to at most one video (and vice versa). Greedy on the + highest-scoring pairs above PAIR_THRESHOLD. + + Returns: + pair_map: dict[video_path, srt_path | None] for every video + (None if no SRT paired) + unpaired_srts: list[str] of SRTs left over + unpaired_videos: list[str] of videos with no SRT + """ + video_paths = list(video_paths) + srt_paths = list(srt_paths) + + # Compute all (video, srt, score) triples above threshold + candidates = [] + v_tokens = {v: parse_video_tokens(v) for v in video_paths} + s_tokens = {s: parse_srt_tokens(s) for s in srt_paths} + for v in video_paths: + for s in srt_paths: + score = score_pair(v_tokens[v], s_tokens[s]) + if score >= PAIR_THRESHOLD: + candidates.append((score, v, s)) + + # Greedy: highest score first; tie-break by filename for determinism + candidates.sort(key=lambda t: (-t[0], os.path.basename(t[1]), os.path.basename(t[2]))) + + used_videos: set = set() + used_srts: set = set() + pair_map: dict = {v: None for v in video_paths} + for score, v, s in candidates: + if v in used_videos or s in used_srts: + continue + pair_map[v] = s + used_videos.add(v) + used_srts.add(s) + + unpaired_srts = [s for s in srt_paths if s not in used_srts] + unpaired_videos = [v for v in video_paths if v not in used_videos] + return pair_map, unpaired_srts, unpaired_videos +``` + +- [ ] **Step 2: Verify against the full test folder** + +```bash +python -c " +import os +from modules.video_qc.utils.srt_pairing import pair_batch +root = '/Users/nickviljoen/Desktop/HM_QC_Bitbucket/testing_15may/srt' +files = os.listdir(root) +videos = sorted(os.path.join(root, f) for f in files if f.endswith('.mp4')) +srts = sorted(os.path.join(root, f) for f in files if f.endswith('.srt')) +print(f'{len(videos)} videos, {len(srts)} srts') +pair_map, unpaired_srts, unpaired_videos = pair_batch(videos, srts) +print('PAIRS:') +for v, s in pair_map.items(): + print(f' V: {os.path.basename(v)}') + print(f' S: {os.path.basename(s) if s else \"(none)\"}') + print() +print(f'unpaired SRTs: {[os.path.basename(s) for s in unpaired_srts]}') +print(f'unpaired videos: {[os.path.basename(v) for v in unpaired_videos]}') +" +``` + +Expected: all 6 videos pair with 1 SRT each. `unpaired_srts` and `unpaired_videos` both empty. + +If pairs are wrong or empty, inspect token output from Task 3 — the most common failure mode is that one of the slugs lacks the substring overlap (e.g. `riointro6b` vs `riointro15c` — neither is a substring of the other, score = 0). Re-check the videos in question. + +- [ ] **Step 3: Commit** + +```bash +git add modules/video_qc/utils/srt_pairing.py +git commit -m "SRT QC: add score_pair + pair_batch + +score_pair: additive locale (0.5) + campaign code (0.3) + clip-slug +substring (0.4), capped at 1.0, with hard-reject on divergent locales +or non-overlapping slugs. pair_batch: greedy highest-first assignment +above 0.7 threshold; one SRT per video. + +Verified pairs all 6 videos in testing_15may/srt/ to their SRTs." +``` + +--- + +## Task 5: Add `srt_path` parameter to `VideoQCExecutor.__init__` + +**Files:** +- Modify: `modules/video_qc/executor.py` + +- [ ] **Step 1: Extend the constructor** + +In `VideoQCExecutor.__init__` (around line 50), change the signature and body. Replace: + +```python + def __init__(self, session_id: str, file_path: str, job_number: str = None, + llm_provider: str = 'google', llm_model: str = 'gemini-2.5-flash', + user: str = None, campaign_id: str = None, batch_id: str = None, + pricing_reference_id: int = None): + self.session_id = session_id + self.file_path = file_path + self.job_number = job_number + self.llm_provider = llm_provider + self.llm_model = llm_model + self.user = user + self.campaign_id = campaign_id + self.batch_id = batch_id + self.pricing_reference_id = pricing_reference_id + self.progress = UnifiedProgressTracker(session_id) + self.results = {} + self.campaign_context = {} + self.pricing_reference = None # loaded via _load_pricing_reference +``` + +with: + +```python + def __init__(self, session_id: str, file_path: str, job_number: str = None, + llm_provider: str = 'google', llm_model: str = 'gemini-2.5-flash', + user: str = None, campaign_id: str = None, batch_id: str = None, + pricing_reference_id: int = None, srt_path: str = None): + self.session_id = session_id + self.file_path = file_path + self.job_number = job_number + self.llm_provider = llm_provider + self.llm_model = llm_model + self.user = user + self.campaign_id = campaign_id + self.batch_id = batch_id + self.pricing_reference_id = pricing_reference_id + self.srt_path = srt_path + self.progress = UnifiedProgressTracker(session_id) + self.results = {} + self.campaign_context = {} + self.pricing_reference = None # loaded via _load_pricing_reference +``` + +- [ ] **Step 2: Commit (no behaviour change yet)** + +```bash +git add modules/video_qc/executor.py +git commit -m "Video QC: thread srt_path through executor constructor + +Defaults to None — existing single-file flow unchanged. Used by the +upcoming SRT checks and by BatchVideoQCExecutor's pair_batch step." +``` + +--- + +## Task 6: Implement `_run_srt_structure_check` + +Deterministic. No LLM. Uses the `srt` library to parse. + +**Files:** +- Modify: `modules/video_qc/executor.py` + +- [ ] **Step 1: Add the method to `VideoQCExecutor`** + +Insert after the existing `_run_price_check` (and after any methods added by Plan 1 if that has been applied): + +```python + def _read_srt_text(self) -> tuple[str, dict]: + """Read the paired SRT file. Returns (text, encoding_info). + + encoding_info: {encoding: str, fallback_used: bool, replacement_chars: int} + Raises OSError if the file can't be read. + """ + with open(self.srt_path, 'rb') as f: + raw = f.read() + info = {'encoding': 'utf-8', 'fallback_used': False, 'replacement_chars': 0} + try: + text = raw.decode('utf-8') + except UnicodeDecodeError: + try: + import chardet + detected = chardet.detect(raw) + enc = detected.get('encoding') or 'latin-1' + text = raw.decode(enc, errors='replace') + info['encoding'] = enc + info['fallback_used'] = True + except ImportError: + text = raw.decode('latin-1', errors='replace') + info['encoding'] = 'latin-1' + info['fallback_used'] = True + info['replacement_chars'] = text.count('�') + return text, info + + def _run_srt_structure_check(self) -> Dict[str, Any]: + """Validate the paired SRT parses, is well-encoded, and has sane cues.""" + import srt as srt_lib + weight = 15 + try: + text, enc_info = self._read_srt_text() + except OSError as e: + return { + 'check_name': 'srt_structure', 'score': 0.0, 'status': 'failed', + 'message': f'Could not read SRT file: {e}', + 'details': {'error': str(e)}, 'weight': weight, + } + + warnings: list[str] = [] + if enc_info['fallback_used']: + warnings.append( + f"SRT decoded as {enc_info['encoding']} (not UTF-8). " + "Upstream tool may be emitting legacy encoding." + ) + + if enc_info['replacement_chars'] > 0: + return { + 'check_name': 'srt_structure', 'score': 0.0, 'status': 'failed', + 'message': ( + f"SRT contains {enc_info['replacement_chars']} Unicode " + f"replacement char(s) — encoding loss." + ), + 'details': {'encoding': enc_info}, + 'weight': weight, + } + + try: + cues = list(srt_lib.parse(text)) + except (srt_lib.SRTParseError, ValueError) as e: + return { + 'check_name': 'srt_structure', 'score': 0.0, 'status': 'failed', + 'message': f'SRT parse error: {e}', + 'details': {'error': str(e), 'encoding': enc_info}, + 'weight': weight, + } + + if not cues: + return { + 'check_name': 'srt_structure', 'score': 0.0, 'status': 'failed', + 'message': 'SRT contains no cues', + 'details': {'encoding': enc_info}, 'weight': weight, + } + + indices = [c.index for c in cues] + expected = list(range(1, len(cues) + 1)) + if indices != expected: + if any(i is None for i in indices): + warnings.append("One or more cues are missing index numbers (player-tolerant).") + else: + warnings.append(f"Cue indices not contiguous 1..{len(cues)}: {indices[:5]}...") + + empty_cues = [c.index for c in cues if not (c.content or '').strip()] + if empty_cues: + warnings.append(f"{len(empty_cues)} cue(s) have empty text (indices: {empty_cues[:5]})") + + score = 100.0 - 10.0 * len(warnings) + score = max(score, 70.0) + status = 'passed' if score >= 90 else 'warning' + details = { + 'encoding': enc_info, + 'cue_count': len(cues), + 'warnings': warnings, + 'srt_filename': os.path.basename(self.srt_path), + } + message = ( + f'{len(cues)} cues parsed, encoding {enc_info["encoding"]}' + + (f', {len(warnings)} warning(s)' if warnings else '') + ) + return { + 'check_name': 'srt_structure', + 'score': score, 'status': status, 'message': message, + 'details': details, 'weight': weight, + } +``` + +- [ ] **Step 2: Add `chardet` to requirements** + +The structure check tries `chardet` for non-UTF-8 fallback. Add it explicitly: + +```bash +echo "chardet>=5.0" >> requirements.txt +pip install chardet +python -c "import chardet; print(chardet.__version__)" +``` + +- [ ] **Step 3: Commit** + +```bash +git add modules/video_qc/executor.py requirements.txt +git commit -m "SRT QC: add srt_structure check + +Deterministic, no LLM. Parses SRT via the srt library, validates UTF-8 +encoding (with chardet fallback for non-UTF-8), no replacement chars, +non-empty cue content, ascending cue indices. Fails on parse error / +replacement chars / no cues; warnings otherwise. Weight 15." +``` + +--- + +## Task 7: Implement `_run_srt_timing_check` + +Deterministic. Compares cues against video duration; checks reading speed, line length, overlaps. + +**Files:** +- Modify: `modules/video_qc/executor.py` + +- [ ] **Step 1: Add the method** + +After `_run_srt_structure_check` in `VideoQCExecutor`: + +```python + def _run_srt_timing_check(self, video_duration: float) -> Dict[str, Any]: + """Validate cue timings against video duration + broadcast norms.""" + import srt as srt_lib + weight = 10 + try: + text, _ = self._read_srt_text() + cues = list(srt_lib.parse(text)) + except Exception as e: + return { + 'check_name': 'srt_timing', 'score': 0.0, 'status': 'failed', + 'message': f'Could not parse SRT for timing: {e}', + 'details': {'error': str(e)}, 'weight': weight, + } + + if not cues: + return { + 'check_name': 'srt_timing', 'score': 0.0, 'status': 'failed', + 'message': 'SRT has no cues', + 'details': {}, 'weight': weight, + } + + failures: list[str] = [] + warnings: list[str] = [] + + # Rule 1: start < end, both >= 0 + for c in cues: + s = c.start.total_seconds() + e = c.end.total_seconds() + if s < 0 or e < 0: + failures.append(f"Cue {c.index}: negative timestamp ({s}s -> {e}s)") + elif s >= e: + failures.append(f"Cue {c.index}: start >= end ({s}s -> {e}s)") + + # Rule 2: overlaps + overlaps: list[str] = [] + for prev, nxt in zip(cues, cues[1:]): + if prev.end > nxt.start: + overlaps.append( + f"Cue {prev.index} ends at {prev.end.total_seconds():.2f}s, " + f"cue {nxt.index} starts at {nxt.start.total_seconds():.2f}s" + ) + if overlaps: + if len(overlaps) >= 3: + failures.append(f"{len(overlaps)} overlapping cue pairs (first 3): " + "; ".join(overlaps[:3])) + else: + warnings.append(f"{len(overlaps)} overlapping cue pair(s): " + "; ".join(overlaps)) + + # Rule 3: last cue end <= video_duration + 0.5s + if video_duration: + last_end = cues[-1].end.total_seconds() + if last_end > video_duration + 0.5: + failures.append( + f"Last cue ends at {last_end:.2f}s but video is only " + f"{video_duration:.2f}s long" + ) + + # Rules 4-7: warnings only (broadcast norms) + reading_speed_outliers: list[str] = [] + line_length_outliers: list[str] = [] + line_count_outliers: list[str] = [] + duration_outliers: list[str] = [] + for c in cues: + content = (c.content or '').strip() + if not content: + continue + duration = (c.end - c.start).total_seconds() + chars = len(content.replace('\n', ' ')) + cps = chars / duration if duration > 0 else 0 + if cps < 5 or cps > 25: + reading_speed_outliers.append(f"cue {c.index}: {cps:.1f} cps") + lines = content.split('\n') + longest = max((len(line) for line in lines), default=0) + if longest > 42: + line_length_outliers.append(f"cue {c.index}: {longest} chars") + if len(lines) > 2: + line_count_outliers.append(f"cue {c.index}: {len(lines)} lines") + if duration < 0.7 or duration > 7.0: + duration_outliers.append(f"cue {c.index}: {duration:.2f}s") + + for outliers, label in [ + (reading_speed_outliers, "Reading speed outside 5-25 cps"), + (line_length_outliers, "Line length > 42 chars"), + (line_count_outliers, "More than 2 lines per cue"), + (duration_outliers, "Cue duration outside 0.7-7.0s"), + ]: + if outliers: + shown = outliers[:5] + more = f" (+{len(outliers)-5} more)" if len(outliers) > 5 else "" + warnings.append(f"{label}: " + ", ".join(shown) + more) + + warning_loss = min(5.0 * len(warnings), 50.0) + score = 100.0 - 30.0 * len(failures) - warning_loss + score = max(score, 0.0) + if failures: + status = 'failed' + elif score >= 90: + status = 'passed' + elif score >= 70: + status = 'warning' + else: + status = 'failed' + + return { + 'check_name': 'srt_timing', 'score': score, 'status': status, + 'message': ( + f'{len(cues)} cues; {len(failures)} failure(s), {len(warnings)} warning(s)' + ), + 'details': { + 'cue_count': len(cues), + 'video_duration': video_duration, + 'failures': failures, + 'warnings': warnings, + 'srt_filename': os.path.basename(self.srt_path), + }, + 'weight': weight, + } +``` + +- [ ] **Step 2: Sanity check deferred** + +Skip — we exercise this through the end-to-end smoke test in Task 14. + +- [ ] **Step 3: Commit** + +```bash +git add modules/video_qc/executor.py +git commit -m "SRT QC: add srt_timing check + +Deterministic. Validates start < end, no overlaps (>=3 overlaps fails, +otherwise warns), last cue <= video duration + 0.5s tolerance. Warning- +only rules: reading speed 5-25 cps, line length <= 42 chars, lines per +cue <= 2, cue duration 0.7-7.0s. Fail = -30, warning = -5 (capped at 50 +warning-loss). Weight 10." +``` + +--- + +## Task 8: Implement `_run_srt_language_check` + +LLM-judged but text-only — much cheaper than video calls. + +**Files:** +- Modify: `modules/video_qc/executor.py` + +- [ ] **Step 1: Add the method** + +```python + def _run_srt_language_check(self) -> Dict[str, Any]: + """Detect SRT language and compare to the locale-derived expected language.""" + import srt as srt_lib + weight = 20 + try: + text, _ = self._read_srt_text() + cues = list(srt_lib.parse(text)) + except Exception as e: + return { + 'check_name': 'srt_language', 'score': 0.0, 'status': 'failed', + 'message': f'Could not parse SRT for language check: {e}', + 'details': {'error': str(e)}, 'weight': weight, + } + if not cues: + return { + 'check_name': 'srt_language', 'score': 0.0, 'status': 'failed', + 'message': 'SRT has no cues', + 'details': {}, 'weight': weight, + } + + lang_code, market = self._extract_locale_from_filename() + expected_lang_code = (lang_code.split('-')[0] if lang_code else '').lower() + expected_lang_display = _language_display(expected_lang_code) + if not expected_lang_code: + return { + 'check_name': 'srt_language', 'score': 100.0, 'status': 'skipped', + 'message': 'Skipped — could not extract expected language from video filename', + 'details': {'skipped': True, 'reason': 'no_expected_language'}, + 'weight': weight, + } + + # Sample cues: first 5, last 5, plus evenly-distributed middle; cap 15 / 1500 chars + n = len(cues) + sample_indices = set() + for i in range(min(5, n)): + sample_indices.add(i) + for i in range(min(5, n)): + sample_indices.add(max(0, n - 1 - i)) + if n > 10: + step = max(1, n // 6) + for k in range(step, n - step, step): + sample_indices.add(k) + if len(sample_indices) >= 15: + break + sample_texts = [] + budget = 1500 + for i in sorted(sample_indices): + t = (cues[i].content or '').strip().replace('\n', ' ') + if not t: + continue + if budget - len(t) < 0: + break + sample_texts.append(t) + budget -= len(t) + sample_blob = "\n---\n".join(sample_texts) + + prompt = ( + "What language is this subtitle text written in? Sample cues " + "are separated by '---'. Return ONLY valid JSON (no markdown):\n" + "{\"detected_language\": \"German\" or similar full English name,\n" + " \"iso_code\": \"de\" or similar ISO-639-1 code,\n" + " \"confidence\": 0.0-1.0,\n" + " \"mixed_language\": boolean}\n\n" + "Be strict — proper nouns and brand names don't count as language indicators.\n\n" + f"SAMPLE:\n{sample_blob}" + ) + usage_context = { + 'module': 'video_qc', 'check_name': 'srt_language', + 'user': self.user, 'session_id': self.session_id, + } + try: + response = LLMConfig.call_llm_api( + prompt=prompt, + provider=self.llm_provider, + model=self.llm_model, + usage_context=usage_context, + ) + except AttributeError: + # Fallback: vision API with no image (Gemini accepts text-only via this path) + response = LLMConfig.call_vision_api( + prompt=prompt, image_asset=None, + provider=self.llm_provider, model=self.llm_model, + usage_context=usage_context, + ) + rtext = response.get('text', '') + start, end = rtext.find('{'), rtext.rfind('}') + 1 + parsed = {} + if start != -1 and end > start: + try: + parsed = json.loads(rtext[start:end]) + except json.JSONDecodeError: + parsed = {} + + detected_iso = (parsed.get('iso_code') or '').lower() + detected_lang = parsed.get('detected_language') or '(unknown)' + confidence = float(parsed.get('confidence') or 0.0) + mixed = bool(parsed.get('mixed_language')) + + if detected_iso == expected_lang_code: + return { + 'check_name': 'srt_language', 'score': 100.0, 'status': 'passed', + 'message': ( + f'Detected {detected_lang} ({detected_iso}) — matches expected ' + f'{expected_lang_display} ({expected_lang_code}).' + ), + 'details': {**parsed, 'expected_iso': expected_lang_code, + 'expected_language': expected_lang_display, + 'sample_cue_count': len(sample_texts), + 'tokens_used': response.get('tokens_used')}, + 'weight': weight, + } + if mixed or confidence < 0.8: + return { + 'check_name': 'srt_language', 'score': 50.0, 'status': 'warning', + 'message': ( + f'Low-confidence or mixed language: detected {detected_lang} ' + f'({detected_iso}) at confidence {confidence:.2f}, mixed={mixed}. ' + f'Expected {expected_lang_display} ({expected_lang_code}).' + ), + 'details': {**parsed, 'expected_iso': expected_lang_code, + 'expected_language': expected_lang_display, + 'sample_cue_count': len(sample_texts)}, + 'weight': weight, + } + return { + 'check_name': 'srt_language', 'score': 0.0, 'status': 'failed', + 'message': ( + f'Wrong language: detected {detected_lang} ({detected_iso}), ' + f'expected {expected_lang_display} ({expected_lang_code}).' + ), + 'details': {**parsed, 'expected_iso': expected_lang_code, + 'expected_language': expected_lang_display, + 'sample_cue_count': len(sample_texts)}, + 'recommendations': [ + f"Verify the SRT was translated for {expected_lang_display} " + f"({expected_lang_code}-{market})." + ], + 'weight': weight, + } +``` + +- [ ] **Step 2: Confirm `LLMConfig.call_llm_api` exists** + +```bash +grep -n "def call_llm_api\|def call_vision_api\|def call_video_api" core/services/llm_config.py +``` + +If `call_llm_api` does NOT exist (only video / vision variants), the code above falls back to `call_vision_api` with `image_asset=None`. This is a smell — verify the fallback path actually works for Gemini text-only calls by manually invoking once. If it doesn't: + +1. Adapt to use `call_vision_api` unconditionally with a dummy 1x1 PNG (cheap), OR +2. Use the `google.generativeai` library directly with a `gemini-2.5-flash` text-only call (extract pattern from elsewhere in the codebase). + +This is the spec's open verification step on the text-only LLM call. If it requires a new helper in `LLMConfig`, add `call_text_api` as part of this task and route to it instead. + +- [ ] **Step 3: Commit** + +```bash +git add modules/video_qc/executor.py +git commit -m "SRT QC: add srt_language check + +Text-only LLM call samples up to 15 cues (~1500 chars), asks Gemini +Flash to identify language. Pass = ISO matches expected from video +filename's locale; warning = low confidence or mixed_language; fail = +ISO mismatch with high confidence. Weight 20." +``` + +--- + +## Task 9: Wire SRT checks into `VideoQCExecutor.execute()` + +**Files:** +- Modify: `modules/video_qc/executor.py` (the `execute()` method, around lines 73–193) + +- [ ] **Step 1: Add SRT execution after the existing checks** + +In `execute()`, after the existing price-check block (and after any Plan 1 garment/title_safe blocks if Plan 1 has been applied) and BEFORE the score aggregation, insert: + +```python + # Step 6: SRT checks (only if a paired SRT was provided) + srt_results = {} + for name, weight in (('srt_structure', 15), ('srt_timing', 10), ('srt_language', 20)): + srt_results[name] = { + 'check_name': name, 'score': 100.0, 'status': 'skipped', + 'message': 'No SRT paired with this video — SRT checks skipped', + 'details': {'skipped': True, 'reason': 'no_srt_paired'}, + 'weight': weight, + } + if self.srt_path: + self.progress.update(89, "Running SRT structure check...") + srt_results['srt_structure'] = self._run_srt_structure_check() + self.progress.update(91, "Running SRT timing check...") + srt_results['srt_timing'] = self._run_srt_timing_check(duration) + self.progress.update(93, "Running SRT language check...") + srt_results['srt_language'] = self._run_srt_language_check() + for k, v in srt_results.items(): + self.results[k] = v + logger.info(f"{k}: {v['status']} ({v['score']})") +``` + +Then update `active_checks` (around line 158, or wherever Plan 1 left it) to include the SRT results: + +```python + active_checks = [ + r for r in ( + quality_result, censorship_result, price_result, + # garment_result, # uncomment if Plan 1 is applied + srt_results['srt_structure'], + srt_results['srt_timing'], + srt_results['srt_language'], + ) if r.get('status') != 'skipped' + ] +``` + +If Plan 1 has been applied, include `garment_result` in the tuple. Do NOT include `title_safe_result` (advisory, weight 0). + +- [ ] **Step 2: Smoke test (no SRT case)** + +Restart dev server. Upload a single video WITHOUT an SRT (any video from `testing_15may/srt/` will do) and confirm the report shows three new cards all with `skipped` status and message "No SRT paired with this video — SRT checks skipped". Overall score should match pre-Task-9 behaviour. + +- [ ] **Step 3: Commit** + +```bash +git add modules/video_qc/executor.py +git commit -m "Video QC: wire srt_structure/srt_timing/srt_language into execute() + +Skipped when self.srt_path is None (one result per check, weight set so +the weighted-average math is unchanged). When set, runs all three +checks sequentially with progress updates. SRT results appear as +additional cards in the existing Video QC report." +``` + +--- + +## Task 10: Update `BatchVideoQCExecutor` to pair SRTs at pre-flight + +**Files:** +- Modify: `modules/video_qc/batch_executor.py` + +- [ ] **Step 1: Extend the constructor** + +In `batch_executor.py`, around line 27, update `__init__`. Replace: + +```python + def __init__( + self, + session_id: str, + file_paths: List[str], + job_number: str = None, + ... + batch_size: int = DEFAULT_BATCH_SIZE, + app=None + ): +``` + +with: + +```python + def __init__( + self, + session_id: str, + file_paths: List[str], + job_number: str = None, + llm_provider: str = 'google', + llm_model: str = 'gemini-2.5-flash', + user: str = None, + campaign_id: str = None, + pricing_reference_id: int = None, + batch_id: str = None, + batch_size: int = DEFAULT_BATCH_SIZE, + app=None, + srt_paths: List[str] = None, # NEW + ): +``` + +In the body, after the existing assignments, add: + +```python + self.srt_paths = list(srt_paths or []) + self.pair_map: Dict[str, str] = {} + self.unpaired_srts: List[str] = [] + self.unpaired_videos: List[str] = [] +``` + +- [ ] **Step 2: Call `pair_batch` at the top of `execute()`** + +In `BatchVideoQCExecutor.execute()`, near the top (after the `total_files` check around line 56–60), add: + +```python + # Pre-flight: pair SRTs to videos by filename + if self.srt_paths: + from modules.video_qc.utils.srt_pairing import pair_batch + pair_map, unpaired_srts, unpaired_videos = pair_batch( + self.file_paths, self.srt_paths + ) + self.pair_map = pair_map + self.unpaired_srts = unpaired_srts + self.unpaired_videos = unpaired_videos + logger.info( + f"SRT pairing: {sum(1 for v in pair_map.values() if v)}/{len(pair_map)} " + f"videos paired; {len(unpaired_srts)} SRTs unpaired." + ) +``` + +- [ ] **Step 3: Thread `srt_path` into each per-video executor** + +In `_process_single_file` (around line 150), update the `VideoQCExecutor` instantiation. Replace: + +```python + executor = VideoQCExecutor( + session_id=file_session_id, + file_path=file_path, + job_number=self.job_number, + llm_provider=self.llm_provider, + llm_model=self.llm_model, + user=self.user, + campaign_id=self.campaign_id, + batch_id=self.batch_id, + pricing_reference_id=self.pricing_reference_id + ) +``` + +with: + +```python + executor = VideoQCExecutor( + session_id=file_session_id, + file_path=file_path, + job_number=self.job_number, + llm_provider=self.llm_provider, + llm_model=self.llm_model, + user=self.user, + campaign_id=self.campaign_id, + batch_id=self.batch_id, + pricing_reference_id=self.pricing_reference_id, + srt_path=self.pair_map.get(file_path), + ) +``` + +- [ ] **Step 4: Commit** + +```bash +git add modules/video_qc/batch_executor.py +git commit -m "Video QC batch: pair SRTs to videos at pre-flight + +Adds optional srt_paths constructor parameter. At execute() top, runs +pair_batch() to produce pair_map / unpaired_srts / unpaired_videos. +Threads pair_map[video_path] into each per-video VideoQCExecutor as +srt_path. No-op when srt_paths is empty." +``` + +--- + +## Task 11: Update routes.py to accept `.srt` uploads + surface pair_map + +**Files:** +- Modify: `modules/video_qc/routes.py` + +- [ ] **Step 1: Allow .srt uploads** + +Around line 28, update: + +```python +ALLOWED_EXTENSIONS = {'mp4', 'mov', 'avi', 'mkv'} +``` + +to: + +```python +ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'mov', 'avi', 'mkv'} +ALLOWED_SRT_EXTENSIONS = {'srt'} +ALLOWED_EXTENSIONS = ALLOWED_VIDEO_EXTENSIONS | ALLOWED_SRT_EXTENSIONS +``` + +The existing `allowed_file()` function continues to accept anything in the union. Add helper functions: + +```python +def is_srt(filename): + return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_SRT_EXTENSIONS + + +def is_video(filename): + return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_VIDEO_EXTENSIONS +``` + +- [ ] **Step 2: Surface pair-map in the configure response (new endpoint)** + +Add a new route, after the existing `configure` (around line 118), that returns the pre-flight pairing summary: + +```python +@video_qc_bp.route('/pairing-preview/') +def pairing_preview(session_id): + """Pre-flight: return pair_map + unpaired counts for the configure UI.""" + upload_path = os.path.join( + current_app.config['VIDEO_QC_UPLOAD_PATH'], session_id + ) + if not os.path.isdir(upload_path): + return jsonify({'error': 'Session upload folder not found'}), 404 + files = os.listdir(upload_path) + video_paths = [os.path.join(upload_path, f) for f in files if is_video(f)] + srt_paths = [os.path.join(upload_path, f) for f in files if is_srt(f)] + if not srt_paths: + return jsonify({ + 'pairs': [{'video': os.path.basename(v), 'srt': None} for v in video_paths], + 'unpaired_srts': [], + 'unpaired_videos': [os.path.basename(v) for v in video_paths], + 'srt_count': 0, + }) + from .utils.srt_pairing import pair_batch + pair_map, unpaired_srts, unpaired_videos = pair_batch(video_paths, srt_paths) + return jsonify({ + 'pairs': [ + {'video': os.path.basename(v), 'srt': os.path.basename(s) if s else None} + for v, s in pair_map.items() + ], + 'unpaired_srts': [os.path.basename(s) for s in unpaired_srts], + 'unpaired_videos': [os.path.basename(v) for v in unpaired_videos], + 'srt_count': len(srt_paths), + }) +``` + +- [ ] **Step 3: Thread srt_paths into the batch executor in `execute_batch`** + +In `execute_batch` (around line 200), where `file_paths` is built, also build `srt_paths` and pass to the batch executor. Replace: + +```python + files = [f for f in os.listdir(upload_path) if allowed_file(f)] + if not files: + return jsonify({'error': 'No video files found'}), 404 + + file_paths = [os.path.join(upload_path, f) for f in files] +``` + +with: + +```python + files = os.listdir(upload_path) + video_files = [f for f in files if is_video(f)] + srt_files = [f for f in files if is_srt(f)] + if not video_files: + return jsonify({'error': 'No video files found'}), 404 + + file_paths = [os.path.join(upload_path, f) for f in video_files] + srt_paths = [os.path.join(upload_path, f) for f in srt_files] +``` + +And update the `BatchVideoQCExecutor` instantiation (around line 234) to include `srt_paths`: + +```python + batch_executor = BatchVideoQCExecutor( + session_id=session_id, + file_paths=file_paths, + job_number=job_number, + llm_provider=llm_provider, + llm_model=llm_model, + user=user_email, + campaign_id=campaign_id, + pricing_reference_id=pricing_reference_id, + batch_id=batch_id, + app=app, + srt_paths=srt_paths, + ) +``` + +The single-file `execute` route handles a single video — leave the SRT path unset there for now. If a single-file flow with an SRT becomes important, surface `srt_path` on the request payload in a follow-up. + +- [ ] **Step 4: Commit** + +```bash +git add modules/video_qc/routes.py +git commit -m "Video QC routes: accept .srt uploads + pre-flight pairing endpoint + +Adds .srt to ALLOWED_EXTENSIONS; introduces is_video() / is_srt() helpers. +New /pairing-preview/ endpoint returns pair_map + unpaired lists +for the configure UI. Batch execute threads srt_paths into BatchVideoQCExecutor." +``` + +--- + +## Task 12: Add the pre-flight pairing summary to the configure template + +**Files:** +- Modify: `modules/video_qc/templates/video_qc/configure.html` +- Modify: `modules/video_qc/templates/video_qc/upload.html` + +- [ ] **Step 1: Read the existing template** + +```bash +grep -n "filename\|pricing_reference\|job_number\|files-list\|file-list" modules/video_qc/templates/video_qc/configure.html | head -20 +``` + +Find a suitable spot to insert the pairing summary — usually above the "Start Batch" button. + +- [ ] **Step 2: Add the pairing-summary block** + +Insert near the top of the page content (before the start-batch form), a section that fetches and renders the pair preview. **Important:** the script uses safe DOM methods (`textContent` / `createElement`) for any value coming from the server — SRT filenames are user-controlled, so dropping them into `innerHTML` would be an XSS vector. + +```html + + + + +``` + +(If the template doesn't have access to `session_id` directly, use whatever variable name the existing route hands to the configure template — `{{ session_id }}` is the convention based on the existing single-file `execute` flow.) + +- [ ] **Step 3: Update the upload template to accept `.srt`** + +```bash +grep -n "accept=\|file.*input" modules/video_qc/templates/video_qc/upload.html | head -10 +``` + +Find the `` element. Update its `accept` attribute to include SRTs. Replace e.g.: + +```html + +``` + +with: + +```html + +``` + +- [ ] **Step 4: Manual smoke test** + +Restart the dev server. +1. Drag-drop ALL 12 files from `testing_15may/srt/` (6 videos + 6 SRTs) into the upload form. +2. Click upload. Should land on the configure page. +3. Expected: the new pairing summary block shows `✓ 6 video(s) paired with SRTs / ⚠ 0 video(s) without / ⚠ 0 SRT file(s) unpaired`. +4. Click start batch. Expected: batch runs as normal. Each report has three new SRT cards. + +- [ ] **Step 5: Commit** + +```bash +git add modules/video_qc/templates/ +git commit -m "Video QC UI: SRT upload + pre-flight pairing summary + +Upload form accepts .srt alongside .mp4. Configure page shows pair_map +counts and a collapsible list of unpaired SRTs (rendered via DOM +textContent to avoid XSS from user-controlled SRT filenames). Uses the +new /pairing-preview/ endpoint." +``` + +--- + +## Task 13: Register SRT checks in `profiles.yaml` + +**Files:** +- Modify: `modules/video_qc/profiles/profiles.yaml` + +- [ ] **Step 1: Add the three new checks under `standard_video`** + +Append inside `standard_video.checks` (after the last existing entry, or after the `title_safe` entry if Plan 1 has been applied): + +```yaml + - name: "srt_structure" + weight: 15 + enabled: true + llm_provider: null + description: "Validate paired SRT parses, encoding, ordering, cue text" + + - name: "srt_timing" + weight: 10 + enabled: true + llm_provider: null + description: "Validate SRT cue timings vs video duration + broadcast norms" + + - name: "srt_language" + weight: 20 + enabled: true + llm_provider: "google" + llm_model: "gemini-2.5-flash" + description: "Detect SRT language vs expected from video locale" +``` + +- [ ] **Step 2: Commit** + +```bash +git add modules/video_qc/profiles/profiles.yaml +git commit -m "Video QC: register SRT checks in standard_video profile + +Profile YAML is descriptive metadata (executor runs unconditionally). +Documenting srt_structure (15), srt_timing (10), srt_language (20) +so the profile page reflects the live check set." +``` + +--- + +## Task 14: End-to-end smoke test against `testing_15may/srt/` + +This is the integration verification. No code changes unless something is broken. + +**Files:** (none — manual) + +- [ ] **Step 1: Restart dev server with a clean session** + +```bash +cd /Users/nickviljoen/Desktop/HM_QC_Bitbucket/hm_ai_qc_report_tool +source venv/bin/activate +python -m flask --app app run --port 5050 +``` + +- [ ] **Step 2: Upload all 12 files in `testing_15may/srt/`** + +Drag-drop in the web UI. Wait for upload. Confirm the configure page's pairing summary reads `6 paired / 0 without / 0 unpaired`. + +- [ ] **Step 3: Run the batch** + +Click "Start Batch QC" with a job number (anything; e.g. `SRT_SMOKE_2026-05-15`). Wait for completion. Open each of the 6 reports. + +- [ ] **Step 4: Verify each report's SRT cards** + +For each of the 6 reports, confirm: + +| Card | Expected outcome | +|---|---| +| `srt_structure` | Mostly `passed`. The CH-en SRTs (RIO_INDIVIDUAL15A_en-CH.srt, RIO_INDIVIDUAL15C_en-CH.srt, RIO_INTRO6B_en-CH.srt) lack cue indices — those should show ONE warning ("missing index numbers"), status `warning`, score 90. NOT failed. | +| `srt_timing` | `passed` on all 6 unless cues run past video duration (very unlikely on well-formed test data). Reading-speed warnings may appear — that's fine. | +| `srt_language` | All 6 should be `passed`. The 3 AT-de SRTs detect as `de` / German; the 3 CH-en SRTs detect as `en` / English. | + +- [ ] **Step 5: Negative-path tests** + +If the above passes, manually exercise the failure paths: + +1. **Wrong-language SRT**: temporarily edit one cue in `RIO_INTRO6B_en-CH.srt` to be Spanish text (e.g. `Hola mundo`). Re-upload. Expected: `srt_language` returns `warning` (mixed_language) or `failed` depending on how much of the sample is wrong. +2. **Overflowing cue**: temporarily edit the last cue end-time in one SRT to be 30 seconds past video duration. Re-upload. Expected: `srt_timing` returns `failed` with "Last cue ends at … but video is only …" in the issues list. +3. **Pairing miss**: temporarily rename one SRT to remove its locale (e.g. `RIO_INTRO6B.srt`). Re-upload. Expected: pre-flight summary shows `⚠ 1 SRT file unpaired`. The paired-video count drops to 5. That video's SRT cards all show `skipped`. + +Revert all temporary edits when done. + +- [ ] **Step 6: No commit unless a real bug was found** + +If steps 1-5 surfaced a bug, fix it and commit the fix separately. Otherwise this task produces no commit. + +--- + +## Task 15: (Optional) Push the branch when ready + +The user explicitly said wait to push. Skip unless asked. + +```bash +git push origin develop +``` + +--- + +## Spec coverage check + +| Spec section | Covered by | +|---|---| +| Pairing strategy (tokens, scoring, threshold, tie-break) | Tasks 2, 3, 4 | +| Pre-flight UI | Tasks 11, 12 | +| `srt_structure` check (parse, encoding, replacement chars, indices, empty cues) | Task 6 | +| `srt_timing` check (start **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Surface the existing full-price match more clearly in the Video QC report; add two new Video QC checks (`garment_name` for on-screen product text vs pricing reference, `title_safe` advisory for price/garment text in platform UI overlay zones). + +**Architecture:** All work lives in `modules/video_qc/executor.py` and `modules/video_qc/profiles/profiles.yaml`. Two new check methods (`_run_garment_name_check`, `_run_title_safe_check`) follow the existing `_run_price_check` pattern: skip-condition gates → single Gemini direct-video LLM call → deterministic post-processing → result dict matching the existing contract. A small lookup dict (`_PLATFORM_ZONES`) maps filename tokens → platform UI overlay zones for title-safe. The price-card change is purely cosmetic — the matching logic already exists in `_run_price_check`; the report renderer just reads more fields from `details`. + +**Tech Stack:** Python 3.13, Flask, existing `LLMConfig.call_video_api` (Gemini 2.5 Flash direct-video), existing `PricingReference` model. + +**Source spec:** `docs/superpowers/specs/2026-05-15-video-qc-tuning-design.md` + +**Execution prerequisites:** Be on branch `develop`. The repo's only "test infra" is `test_integration.py` (smoke test) and manual exercise via the web launcher. This plan follows the same manual pattern — pure-function helpers get inline Python REPL verification, LLM-integrated code gets manual smoke tests against the asset at `testing_15may/video_tuning/`. + +--- + +## Task 1: Verify how `product_name` is populated in PricingReference + +This is a read-only investigation step. It answers the spec's first open verification question and determines whether `garment_name` ships with the planned normalisation rule or falls back to "skip non-English locales". + +**Files:** (none — investigation only) + +- [ ] **Step 1: Open a Flask shell and inspect a real PricingReference** + +Run: +```bash +cd /Users/nickviljoen/Desktop/HM_QC_Bitbucket/hm_ai_qc_report_tool +source venv/bin/activate +flask shell +``` + +Then in the shell: +```python +from core.models.pricing_reference import PricingReference +refs = PricingReference.query.filter_by(status='ready').all() +print(f"{len(refs)} ready references") +for r in refs[:3]: + prices = r.get_prices() + print(f"\n--- {r.name} ({len(prices)} prices) ---") + # Sample one price per locale present + seen_locales = set() + for p in prices: + loc = f"{p.get('language')}/{p.get('country')}" + if loc in seen_locales: + continue + seen_locales.add(loc) + print(f" {loc}: product_name={p.get('product_name')!r}, price={p.get('price')!r}") + if len(seen_locales) >= 6: + break +``` + +Expected outcome: visually confirm whether `product_name` looks localised (different strings for different locales) or English-source (same string across all locales for the same product). + +- [ ] **Step 2: Record finding** + +In the plan checklist below, tick the path you observed: + +- [ ] **A. Product names ARE localised per row** → Task 4 (`_product_names_match`) ships as designed. +- [ ] **B. Product names are English-source across all locales** → Task 4 adds a `_LOCALES_WITH_LATIN_PRODUCT_NAMES` guard set (configurable) and **skips** the check for locales outside that set; OR ship as designed and accept higher false-fail rate on non-English locales until v2. + +Default: if B, go with the skip-on-non-English-locale variant. Don't add LLM translation in v1. + +- [ ] **Step 3: No commit — this task produces no code change** + +--- + +## Task 2: Update `price_currency` report card to surface "Matched" / "Expected" lines + +The match data is already in `details.price_match` / `details.detected_prices` / `details.price_match.expected_prices`. We add summary lines to the card body in `_generate_report`. + +**Files:** +- Modify: `modules/video_qc/executor.py` (the `_generate_report` method around lines 877–947 and the price-check `message` string around lines 783–786) + +- [ ] **Step 1: Update the price-check `message` string to be richer on pass** + +In `_run_price_check` in `modules/video_qc/executor.py`, around line 783, replace: + +```python + if status == 'passed': + message = f'Price/currency passed — {currency} correct for {language}' + else: + message = f'Price/currency issues: {", ".join(issues)}' +``` + +with: + +```python + matched_price = matched[0] if matched else None + matched_product = matched[1].get('product_name') if matched else None + if status == 'passed' and matched_price is not None: + product_suffix = f" — {matched_product}" if matched_product else "" + message = ( + f"Matched: {currency}{matched_price}{product_suffix} " + f"(locale {language})" + ) + elif status == 'passed': + message = f'Price/currency passed — {currency} correct for {language}' + else: + exp_list = details['price_match']['expected_prices'] + exp_total = len(expected_entries) + expected_blurb = '' + if exp_list: + shown = exp_list[:5] + expected_blurb = ( + f' Expected one of: {", ".join(shown)}' + + (f' ({len(shown)} of {exp_total} shown)' if exp_total > len(shown) else '') + ) + detected_blurb = '' + if detected_strings: + detected_blurb = f' Detected: {", ".join(detected_strings)}.' + message = ( + f'Price/currency issues: {", ".join(issues)}.' + + detected_blurb + expected_blurb + ) +``` + +(The variables `matched`, `currency`, `language`, `details`, `expected_entries`, `detected_strings`, `issues`, and `status` are all in scope at that point in `_run_price_check`.) + +- [ ] **Step 2: Manually verify the price-card text change** + +Run a video QC pass against the test asset: + +```bash +cd /Users/nickviljoen/Desktop/HM_QC_Bitbucket/hm_ai_qc_report_tool +# Start the Flask dev server (or use the launcher CLI) +python -m flask --app app run --port 5050 +``` + +In a browser go to `http://localhost:5050/hm-aiqc/video-qc/`, upload `testing_15may/video_tuning/6898354_1013A_SPRING_W_10A_TT_9x16_TK_Stories_Vogue_ES-es.mp4` with a pricing reference attached (any ready PricingReference will do; if none, the check skips — that's fine for this step). + +Expected on pass: card now reads `Matched: €19.99 — Cotton T-Shirt (locale es-ES)` (or similar). On fail: `Detected:` and `Expected one of:` lines appear. + +- [ ] **Step 3: Commit** + +```bash +git add modules/video_qc/executor.py +git commit -m "Video QC: surface matched price + product in price card + +The price_currency check has always done a full numeric match against +the pricing reference but the report card only showed pass/fail by +currency. Pull matched_price, matched_product, detected_prices, and +expected_prices into the message string so QC reviewers can see the +full match at a glance. + +No logic changes." +``` + +--- + +## Task 3: Add the platform-zones lookup helper for `title_safe` + +This is a pure-function helper used by the new `_run_title_safe_check`. We add it at module scope in `executor.py` so it's near its caller and easy to verify in a REPL. + +**Files:** +- Modify: `modules/video_qc/executor.py` (add at module scope, after `_language_display` near line 44) + +- [ ] **Step 1: Add the `_PLATFORM_ZONES` dict and `_infer_platform_zones` function** + +Insert after the existing `_language_display` function (after line 44): + +```python +# Platform UI overlay zones for `title_safe` check. Each entry describes the +# regions of the frame that the platform's UI obscures (profile pills, captions, +# action icons). Used to flag when price or garment-name text lands inside. +# Percentages are conservative — tuned against IG/TikTok screenshots circa 2026. +_PLATFORM_ZONES = { + 'tiktok': { + 'description': "TikTok feed: top ~10% (profile/handle bar), bottom ~25% " + "(caption + UI action rail).", + 'zones': [ + {'edge': 'top', 'percent': 10}, + {'edge': 'bottom', 'percent': 25}, + ], + }, + 'ig_stories': { + 'description': "Instagram Stories: top ~12% (profile pill + reactions), " + "bottom ~18% (swipe-up / reply bar).", + 'zones': [ + {'edge': 'top', 'percent': 12}, + {'edge': 'bottom', 'percent': 18}, + ], + }, + 'ig_reels': { + 'description': "Instagram Reels: bottom ~25% (caption + icons), " + "right edge ~10% (action rail).", + 'zones': [ + {'edge': 'bottom', 'percent': 25}, + {'edge': 'right', 'percent': 10}, + ], + }, + 'vertical_generic': { + 'description': "Generic vertical (9x16) social: top ~10%, bottom ~20%.", + 'zones': [ + {'edge': 'top', 'percent': 10}, + {'edge': 'bottom', 'percent': 20}, + ], + }, +} + + +def _infer_platform_zones(filename: str) -> dict | None: + """Infer platform UI overlay zones from filename tokens. + + Returns a dict {'platform': str, 'description': str, 'zones': [...]} + or None when the format has no known overlay zones (feed formats like + 1x1 / 4x5, or unrecognised). + """ + if not filename: + return None + upper = filename.upper() + + # Most specific first: explicit platform + format combos. + if 'TK_STORIES' in upper or '_TK_' in upper or 'TT_9X16' in upper: + return {'platform': 'tiktok', **_PLATFORM_ZONES['tiktok']} + if 'IG_STORIES' in upper or 'STORIES_9X16' in upper: + return {'platform': 'ig_stories', **_PLATFORM_ZONES['ig_stories']} + if 'IG_REELS' in upper or 'REELS_9X16' in upper: + return {'platform': 'ig_reels', **_PLATFORM_ZONES['ig_reels']} + + # Fallback: any 9x16 with no platform hint -> generic vertical. + if '9X16' in upper: + return {'platform': 'vertical_generic', **_PLATFORM_ZONES['vertical_generic']} + + # Feed formats (1x1, 4x5) and anything else -> no overlay zones. + return None +``` + +- [ ] **Step 2: Verify the helper in a Python REPL** + +Run: +```bash +cd /Users/nickviljoen/Desktop/HM_QC_Bitbucket/hm_ai_qc_report_tool +source venv/bin/activate +python -c " +from modules.video_qc.executor import _infer_platform_zones +cases = [ + '6898354_1013A_SPRING_W_10A_TT_9x16_TK_Stories_Vogue_ES-es.mp4', + '7147775_AT-de_CFUL262B01_PP_RIO_INTRO_15C_4x5_SoMe_MASTER.mp4', + '7158980_CH-en_CFUL262B01_PP_RIO_INTRO_6B_1x1_SoMe_MASTER.mp4', + 'something_ig_stories_9x16_foo.mp4', + 'something_9x16_no_platform.mp4', + 'feed_1x1_only.mp4', +] +for c in cases: + r = _infer_platform_zones(c) + print(f'{c[:60]:60s} -> {r[\"platform\"] if r else None}') +" +``` + +Expected output: +``` +6898354_1013A_SPRING_W_10A_TT_9x16_TK_Stories_Vogue_ES-es.mp4 -> tiktok +7147775_AT-de_CFUL262B01_PP_RIO_INTRO_15C_4x5_SoMe_MASTER.mp4 -> None +7158980_CH-en_CFUL262B01_PP_RIO_INTRO_6B_1x1_SoMe_MASTER.mp4 -> None +something_ig_stories_9x16_foo.mp4 -> ig_stories +something_9x16_no_platform.mp4 -> vertical_generic +feed_1x1_only.mp4 -> None +``` + +If any line doesn't match, fix the substring conditions and re-run. + +- [ ] **Step 3: Commit** + +```bash +git add modules/video_qc/executor.py +git commit -m "Video QC: add platform-zones lookup helper for title_safe + +Adds _PLATFORM_ZONES (TikTok / IG Stories / IG Reels / generic vertical) +and _infer_platform_zones(filename) for use by the new title_safe check. +Pure function, verified at REPL against expected filenames. No new +behaviour exposed yet — wired up in the next task." +``` + +--- + +## Task 4: Add product-name normalisation helpers for `garment_name` + +Pure functions, co-located with the check that will use them. + +**Files:** +- Modify: `modules/video_qc/executor.py` (add at module scope below `_infer_platform_zones`) + +- [ ] **Step 1: Add `_normalize_product_name` and `_product_names_match`** + +Insert after `_infer_platform_zones`: + +```python +import re as _re + + +def _normalize_product_name(s: str) -> str: + """Lowercase, strip non-alphanumeric (except spaces), collapse whitespace.""" + if not s: + return '' + s = s.lower() + s = _re.sub(r"[^a-z0-9\s]", " ", s) + s = _re.sub(r"\s+", " ", s).strip() + return s + + +def _product_names_match(a: str, b: str) -> bool: + """True when two product names are 'close enough' after normalisation. + + Match rule (any of): + • One normalised string is a substring of the other (non-empty). + • Token-set overlap |A ∩ B| / min(|A|, |B|) >= 0.6. + Empty strings never match. + """ + na, nb = _normalize_product_name(a), _normalize_product_name(b) + if not na or not nb: + return False + if na in nb or nb in na: + return True + ta, tb = set(na.split()), set(nb.split()) + if not ta or not tb: + return False + overlap = len(ta & tb) / min(len(ta), len(tb)) + return overlap >= 0.6 +``` + +- [ ] **Step 2: Verify the helper at the REPL** + +```bash +python -c " +from modules.video_qc.executor import _product_names_match as M +cases = [ + ('Oversized Cotton Shirt', 'OVERSIZED COTTON SHIRT.', True), + ('Cotton Shirt', 'Oversized Cotton Shirt', True), # substring + ('Wool Blazer', 'Wool blazer (long)', True), # substring + ('Cotton Shirt', 'Linen Trousers', False), # no overlap + ('Cotton Shirt', 'Cotton T-Shirt', True), # 50% overlap... wait + ('', 'anything', False), + ('anything', '', False), +] +for a, b, expected in cases: + got = M(a, b) + print(f'{got==expected!s:5} M({a!r}, {b!r}) = {got} (expected {expected})') +" +``` + +Expected: all `True` (assertion passed). The `Cotton Shirt` vs `Cotton T-Shirt` case relies on substring containment (`cotton shirt` ⊂ `cotton t shirt` after normalisation strips the hyphen → `cotton t shirt` does NOT contain `cotton shirt` as a substring — they share 2/2 tokens though = overlap 1.0). Re-check the expected truthiness in your output; if `False`, that's fine and the result line will say `True` because we're comparing `got == expected`. Adjust expected values to whatever the function actually returns and re-run if needed — the goal is to surface the behaviour. + +If you see any unexpected `False False` lines, decide whether to: +- Accept the behaviour (false-negative-leaning is safer than false-positive). +- Tighten the rule to add fuzzy edit distance — only if Task 1's pricing-reference inspection shows lots of near-matches that should pass. + +- [ ] **Step 3: Commit** + +```bash +git add modules/video_qc/executor.py +git commit -m "Video QC: add product-name normalisation helpers for garment_name + +Adds _normalize_product_name (lowercase, alphanumeric+space, collapse +whitespace) and _product_names_match (substring or >=60% token-set +overlap on min side). Used by the upcoming garment_name check." +``` + +--- + +## Task 5: Implement `_run_garment_name_check` + +Single LLM call to detect on-screen garment text, then deterministic match against the pricing reference. + +**Files:** +- Modify: `modules/video_qc/executor.py` (add new method to `VideoQCExecutor` after `_run_price_check`) + +- [ ] **Step 1: Add the method** + +Inside the `VideoQCExecutor` class, after `_run_price_check` (around line 806), insert: + +```python + def _run_garment_name_check(self) -> Dict[str, Any]: + """Detect on-screen garment/product names and match against the + pricing reference's product_name for the file's locale.""" + weight = 25 + try: + language, country_code = self._extract_locale_from_filename() + if not language: + return { + 'check_name': 'garment_name', 'score': 100.0, 'status': 'skipped', + 'message': 'Skipped — could not extract locale from filename', + 'details': {'skipped': True, 'reason': 'no_locale'}, + 'weight': weight, + } + if language.split('-')[0].upper() in ('GEN', 'CEN'): + return { + 'check_name': 'garment_name', 'score': 100.0, 'status': 'skipped', + 'message': f'Skipped for {language.upper()} (generic/censored market)', + 'details': {'skipped': True, 'reason': 'gen_cen_file'}, + 'weight': weight, + } + + pricing_ref = self.pricing_reference or {} + prices_list = pricing_ref.get('prices') or [] + expected_entries = [ + p for p in prices_list + if p.get('language') == language or p.get('country') == country_code + ] + expected_names = [ + p.get('product_name') for p in expected_entries + if p.get('product_name') + ] + + if not expected_names: + return { + 'check_name': 'garment_name', 'score': 100.0, 'status': 'skipped', + 'message': 'Skipped — no product_name in pricing reference for this locale', + 'details': {'skipped': True, 'reason': 'no_expected_names', + 'language': language, 'country_code': country_code}, + 'weight': weight, + } + + # Step 1: detect garment text via LLM + prompt = ( + "Identify any garment / product names visible as text overlays " + "in this video (e.g. 'OVERSIZED COTTON SHIRT', 'WOOL BLAZER'). " + "Ignore prices, CTAs, dates, logos, model names, and campaign " + "headlines. Return ONLY valid JSON (no markdown fences):\n" + "{detected_names: [string, ...], any_text_overlay: boolean}" + ) + usage_context = { + 'module': 'video_qc', 'check_name': 'garment_name', + 'user': self.user, 'session_id': self.session_id, + } + if self._use_direct_video: + response = LLMConfig.call_video_api( + prompt=prompt, video_path=self.file_path, + provider=self.llm_provider, model=self.llm_model, + usage_context=usage_context, + ) + else: + # Frame-grid path — shouldn't normally trigger in prod (Gemini default) + response = LLMConfig.call_vision_api( + prompt=prompt, image_asset=None, + provider=self.llm_provider, model=self.llm_model, + usage_context=usage_context, + ) + text = response.get('text', '') + start, end = text.find('{'), text.rfind('}') + 1 + detected = [] + if start != -1 and end > start: + try: + detected = json.loads(text[start:end]).get('detected_names') or [] + except json.JSONDecodeError: + detected = [] + + if not detected: + return { + 'check_name': 'garment_name', 'score': 100.0, 'status': 'skipped', + 'message': 'No garment-name text detected in video — skipping validation', + 'details': {'skipped': True, 'reason': 'no_detection', + 'expected_names_count': len(expected_names)}, + 'weight': weight, + } + + # Step 2: deterministic match + matched_pair = None + for d in detected: + for e in expected_names: + if _product_names_match(d, e): + matched_pair = (d, e) + break + if matched_pair: + break + + details = { + 'language': language, 'country_code': country_code, + 'detected_names': detected, + 'expected_names_sample': expected_names[:5], + 'expected_names_total': len(expected_names), + 'matched': matched_pair is not None, + 'matched_detected': matched_pair[0] if matched_pair else None, + 'matched_expected': matched_pair[1] if matched_pair else None, + 'llm_provider': self.llm_provider, 'llm_model': self.llm_model, + 'tokens_used': response.get('tokens_used'), + } + + if matched_pair: + return { + 'check_name': 'garment_name', 'score': 100.0, 'status': 'passed', + 'message': ( + f'Matched: "{matched_pair[0]}" ≈ "{matched_pair[1]}" ' + f'(locale {language})' + ), + 'details': details, 'weight': weight, + } + return { + 'check_name': 'garment_name', 'score': 0.0, 'status': 'failed', + 'message': ( + f'No detected name matched any of {len(expected_names)} ' + f'expected product names for {language}. ' + f'Detected: {", ".join(detected)}.' + ), + 'details': details, + 'recommendations': [ + f"Verify the on-screen product name is correct for {language}.", + f"Expected (sample): {', '.join(expected_names[:3])}.", + ], + 'weight': weight, + } + except Exception as e: + logger.error(f"garment_name check error: {e}", exc_info=True) + return { + 'check_name': 'garment_name', 'score': 0, 'status': 'error', + 'message': f'Error: {str(e)}', + 'details': {'error': str(e)}, 'weight': weight, + } +``` + +- [ ] **Step 2: Wire the check into `execute()`** + +In `VideoQCExecutor.execute()` (around line 153 after the price-check block, before the score aggregation around line 156), insert: + +```python + # Step 5b: Garment-name check + self.progress.update(82, "Running garment-name check...") + if self.pricing_reference: + garment_result = self._run_garment_name_check() + else: + garment_result = { + 'check_name': 'garment_name', 'score': 100.0, 'status': 'skipped', + 'message': 'Skipped — no pricing reference attached to this run', + 'details': {'skipped': True, 'reason': 'no_pricing_reference'}, + 'weight': 25, + } + self.results['garment_name'] = garment_result + logger.info(f"Garment name: {garment_result['status']} ({garment_result['score']})") +``` + +Then update the `active_checks` aggregation block (around line 158) to include the new check. Change: +```python + active_checks = [r for r in (quality_result, censorship_result, price_result) + if r.get('status') != 'skipped'] +``` +to: +```python + active_checks = [r for r in (quality_result, censorship_result, price_result, + garment_result) + if r.get('status') != 'skipped'] +``` + +- [ ] **Step 3: Smoke-test against the tuning asset** + +Restart the dev server and run the test video again from the web UI with a pricing reference attached. Expected report card: + +- If the test video has no on-screen garment text (likely — it's a Stories asset focused on lookbook): `garment_name` shows `skipped`, score 100, "No garment-name text detected in video — skipping validation". +- If you attach a pricing reference that has no `product_name` for `es-ES`: shows `skipped`, "no product_name in pricing reference for this locale". +- If both detected and expected names are present, the card shows the matched pair or a `failed` with detected vs expected. + +- [ ] **Step 4: Commit** + +```bash +git add modules/video_qc/executor.py +git commit -m "Video QC: add garment_name check + +Single Gemini direct-video call detects garment/product text overlays; +deterministic match against PricingReference.get_prices() product_name +for the file's locale. Skips when no pricing reference attached, locale +unparseable, GEN/CEN file, no expected product names for locale, or no +on-screen garment text detected. Weight 25 in standard_video profile." +``` + +--- + +## Task 6: Implement `_run_title_safe_check` (advisory-only) + +Always-100 score, never `failed`. Skip when format has no overlay zones. + +**Files:** +- Modify: `modules/video_qc/executor.py` (add method on `VideoQCExecutor` after `_run_garment_name_check`) + +- [ ] **Step 1: Add the method** + +```python + def _run_title_safe_check(self) -> Dict[str, Any]: + """Advisory check — flag (never fail) when price or garment-name text + falls inside a platform UI overlay zone. Score is always 100; status + is 'warning' on detected issues, otherwise 'passed' / 'skipped'.""" + weight = 0 # advisory — does not contribute to overall score + try: + platform_info = _infer_platform_zones(os.path.basename(self.file_path)) + if not platform_info: + return { + 'check_name': 'title_safe', 'score': 100.0, 'status': 'skipped', + 'message': 'Format has no known platform overlay zones — title-safe not applicable', + 'details': {'skipped': True, 'reason': 'no_platform_zones'}, + 'weight': weight, + } + + zones_text = "; ".join( + f"{z['edge']} ~{z['percent']}%" for z in platform_info['zones'] + ) + prompt = ( + "You are reviewing this video for advisory title-safe issues. " + f"Platform: {platform_info['platform']}. " + f"{platform_info['description']}\n\n" + "Identify frames where the PRICE text or PRODUCT/GARMENT-NAME " + "text falls INSIDE one of these unsafe zones: " + f"{zones_text}. Ignore other text. Return ONLY valid JSON " + "(no markdown fences):\n" + "{\"issues\": [{\"frame_timestamp\": \"0:12\", " + "\"element\": \"price\" | \"garment\", " + "\"zone\": \"top\" | \"bottom\" | \"right\", " + "\"description\": \"...\"}], \"advisory_only\": true}" + ) + usage_context = { + 'module': 'video_qc', 'check_name': 'title_safe', + 'user': self.user, 'session_id': self.session_id, + } + response = LLMConfig.call_video_api( + prompt=prompt, video_path=self.file_path, + provider=self.llm_provider, model=self.llm_model, + usage_context=usage_context, + ) + text = response.get('text', '') + start, end = text.find('{'), text.rfind('}') + 1 + issues = [] + if start != -1 and end > start: + try: + issues = json.loads(text[start:end]).get('issues') or [] + except json.JSONDecodeError: + issues = [] + + details = { + 'platform': platform_info['platform'], + 'platform_description': platform_info['description'], + 'zones': platform_info['zones'], + 'issues': issues, + 'advisory_only': True, + 'llm_provider': self.llm_provider, 'llm_model': self.llm_model, + 'tokens_used': response.get('tokens_used'), + } + + if not issues: + return { + 'check_name': 'title_safe', 'score': 100.0, 'status': 'passed', + 'message': ( + f'Advisory check — no price/garment placement issues on ' + f'{platform_info["platform"]}.' + ), + 'details': details, 'weight': weight, + } + + issue_blurb = "; ".join( + f"{i.get('frame_timestamp','?')} {i.get('element','?')} " + f"in {i.get('zone','?')}" + for i in issues[:5] + ) + return { + 'check_name': 'title_safe', 'score': 100.0, 'status': 'warning', + 'message': ( + f'Advisory — {len(issues)} placement issue(s) on ' + f'{platform_info["platform"]}: {issue_blurb}. ' + f'Does not affect overall score.' + ), + 'details': details, + 'recommendations': [ + f"Review price/garment positioning for {platform_info['platform']} " + f"unsafe zones." + ], + 'weight': weight, + } + except Exception as e: + logger.error(f"title_safe check error: {e}", exc_info=True) + return { + 'check_name': 'title_safe', 'score': 100.0, 'status': 'error', + 'message': f'Error: {str(e)} — does not affect overall score', + 'details': {'error': str(e), 'advisory_only': True}, + 'weight': weight, + } +``` + +- [ ] **Step 2: Wire the check into `execute()`** + +In `VideoQCExecutor.execute()`, after the garment-name block added in Task 5: + +```python + # Step 5c: Title-safe advisory + self.progress.update(85, "Running title-safe placement check...") + title_safe_result = self._run_title_safe_check() + self.results['title_safe'] = title_safe_result + logger.info(f"Title safe: {title_safe_result['status']} ({title_safe_result['score']})") +``` + +The `active_checks` aggregation should NOT include `title_safe_result` — its weight is 0 and including it would still leave score at 100, but excluding makes the intent explicit: + +```python + active_checks = [r for r in (quality_result, censorship_result, price_result, + garment_result) + if r.get('status') != 'skipped'] + # title_safe_result intentionally excluded — advisory only (weight=0) +``` + +(Leave the existing `active_checks` change from Task 5 in place; just add the comment.) + +- [ ] **Step 3: Smoke-test against the tuning asset** + +Re-run the test video through the web UI. Expected: a new `title_safe` card appears, status `passed` or `warning` (NEVER `failed`). The test asset filename contains `TK_Stories_9x16`, so `_infer_platform_zones` returns `tiktok` and the LLM gets the TikTok zone description. + +If the LLM finds no issues, the card shows passed. If it finds issues, surface them as warnings. Either way, the overall score should NOT change versus before this task (i.e. compare overall scores from Task 5's run and this task's run for the same asset — they should match). + +- [ ] **Step 4: Commit** + +```bash +git add modules/video_qc/executor.py +git commit -m "Video QC: add title_safe advisory check + +Flags (never fails) when price or garment-name text falls inside known +platform UI overlay zones (TikTok / IG Stories / IG Reels / generic +vertical). Platform inferred from filename tokens via _infer_platform_zones. +Weight 0 in profile — advisory only, never contributes to overall score." +``` + +--- + +## Task 7: Register new checks in `profiles.yaml` + +Documentation-only — the YAML doesn't drive execution today (executor runs checks unconditionally) but is kept current as profile-metadata reference. + +**Files:** +- Modify: `modules/video_qc/profiles/profiles.yaml` + +- [ ] **Step 1: Add the new checks under `standard_video`** + +Open `modules/video_qc/profiles/profiles.yaml` and replace the `standard_video` block (lines 6–33) with: + +```yaml + standard_video: + name: "Standard Video QC (BETA)" + description: "Technical validation for H&M video marketing materials" + checks: + - name: "video_filename_parse" + weight: 15 + enabled: true + llm_provider: null + description: "Validate H&M video filename conventions" + + - name: "video_technical_check" + weight: 40 + enabled: true + llm_provider: null + description: "Verify codec, resolution, FPS, bitrate, audio specs" + + - name: "video_duration_check" + weight: 20 + enabled: true + llm_provider: null + description: "Validate duration matches filename" + + - name: "video_censorship_check" + weight: 25 + enabled: true + llm_provider: "openai" + llm_model: "gpt-4o" + description: "AI-powered censorship check (CEN markets only)" + + - name: "garment_name" + weight: 25 + enabled: true + llm_provider: "google" + llm_model: "gemini-2.5-flash" + description: "Validate on-screen garment name against pricing reference" + + - name: "title_safe" + weight: 0 + enabled: true + llm_provider: "google" + llm_model: "gemini-2.5-flash" + description: "Advisory — flag price/garment text inside platform UI overlay zones" +``` + +- [ ] **Step 2: Commit** + +```bash +git add modules/video_qc/profiles/profiles.yaml +git commit -m "Video QC: register garment_name and title_safe in standard_video profile + +Profile YAML is descriptive metadata (executor runs unconditionally). +Keeping it current so the profile page and any future YAML-driven +selection reflects the live check set." +``` + +--- + +## Task 8: Final smoke test + visual review of the report + +End-to-end verification that all three changes produced the expected user-visible behaviour. + +**Files:** (none — manual verification) + +- [ ] **Step 1: Restart dev server and run a fresh batch** + +```bash +cd /Users/nickviljoen/Desktop/HM_QC_Bitbucket/hm_ai_qc_report_tool +source venv/bin/activate +python -m flask --app app run --port 5050 +``` + +Web UI: upload `testing_15may/video_tuning/6898354_1013A_SPRING_W_10A_TT_9x16_TK_Stories_Vogue_ES-es.mp4` with a pricing reference attached. Wait for completion. + +- [ ] **Step 2: Open the generated report and verify each card** + +The report is saved under `storage/reports/video_qc//VideoQC_*.html`. Open in a browser. + +Expected cards (in order): +- `visual_quality` — unchanged from before this work (should still flag the English subtitle leak at 0:17–0:19, score ~55). +- `censorship` — skipped (no CEN suffix). +- `price_currency` — card body now shows `Matched: …` line on pass OR `Detected:` + `Expected one of:` lines on fail. +- `garment_name` — new card; status likely `skipped` ("No garment-name text detected") for this Stories asset. +- `title_safe` — new card; status `passed` or `warning` with TikTok platform info. + +- [ ] **Step 3: Verify overall score wasn't perturbed by `title_safe`** + +If the previous report (pre-this-work) for the same asset was 55.0, the new overall score should be the same 55.0 ± rounding — because `title_safe` has weight 0 and `garment_name` skipped (drops out of aggregation). If the score changed materially, something in the aggregation is wrong; re-check the `active_checks` filter. + +- [ ] **Step 4: No commit unless fixes needed** + +If you found a bug in steps 1–3, fix it and commit the fix as its own commit. Otherwise this task produces no commit — it's a verification gate. + +--- + +## Task 9: (Optional) Push the branch when ready + +The user explicitly said wait to push. Skip unless asked. + +```bash +git push origin develop +``` + +--- + +## Spec coverage check + +| Spec section | Covered by | +|---|---| +| Change 1 — price-card clarity | Task 2 | +| Change 2 — `garment_name` (skip rules, detection, matching, scoring, weight) | Tasks 4, 5 | +| Change 3 — `title_safe` (platform inference, advisory scoring, weight 0) | Tasks 3, 6 | +| Profile changes (`profiles.yaml`) | Task 7 | +| Final weights table | Task 7 | +| Testing approach (smoke against tuning asset) | Tasks 2, 5, 6, 8 | +| Open verification: `product_name` localisation | Task 1 | +| Open verification: `title_safe` LLM sensitivity | Task 6 (manual smoke), Task 8 (final review) | +| Open verification: YAML vs executor reality | Documented inline in Task 7 | diff --git a/docs/superpowers/specs/2026-05-15-srt-qc-design.md b/docs/superpowers/specs/2026-05-15-srt-qc-design.md new file mode 100644 index 0000000..38ff0f2 --- /dev/null +++ b/docs/superpowers/specs/2026-05-15-srt-qc-design.md @@ -0,0 +1,330 @@ +# SRT subtitle QC — design + +**Date:** 2026-05-15 +**Author:** Nick Viljoen (with Claude) +**Module:** `modules/video_qc/` (extended — no new module) +**Status:** Approved for implementation planning + +## Motivation + +A QC user asked what we can check about SRT subtitle files and how. Their feedback specifically flagged the concern that AI might "only register the correct language" rather than verifying "[subtitles] sitting in the right place and time out correctly" — i.e. PMs would still have to eyeball more than language. + +This spec addresses that concern by introducing three deterministic-or-text-only SRT checks that run alongside existing Video QC checks: **structure**, **timing**, and **language**. + +Audio-vs-SRT transcription comparison (Whisper / Gemini audio) is **explicitly v2** — it is the heaviest and most expensive check, and the v1 set already covers the user's stated concerns. + +## Scope + +In scope: +- `modules/video_qc/batch_executor.py` — pre-flight SRT pairing step. +- `modules/video_qc/executor.py` — accept `srt_path`; three new check methods. +- `modules/video_qc/utils/srt_pairing.py` (NEW) — token parsing and pair-scoring helpers. +- `modules/video_qc/templates/` — pre-flight pairing summary in the batch UI; three new report cards. +- `modules/video_qc/routes.py` — accept `.srt` uploads alongside `.mp4`; surface unpaired SRTs. +- `requirements.txt` — add `srt` Python library. + +Out of scope: +- Audio-vs-SRT transcription comparison (deferred to v2). +- Legal / byline detection (separate concern, on-video overlay, not in SRTs). +- A standalone SRT QC module / dashboard. +- Database schema changes — SRT data lives in `QCReport.metadata_json`. +- Image QC, Video Master, Reporting modules — unchanged. + +## Architecture + +``` +Batch upload (videos + SRTs together, via manual upload OR Box pull) + │ + ▼ +SRT pairing (batch-level, in batch_executor.py) + • For each video: parse filename → {campaign_code, clip_slug, locale} + • For each SRT: parse filename → {campaign_code?, clip_slug, locale?} + • Score every (video, srt) pair; auto-pair highest scoring above threshold + • Produce: pair_map: dict[video_path, srt_path|None] + unpaired_srts: list[str] + • Pre-flight summary shown to user; user clicks Continue to start batch. + │ + ▼ +For each (video, srt|None): + VideoQCExecutor(file_path=video, srt_path=srt|None, ...).execute() + ├─ visual_quality (existing) + ├─ censorship (existing) + ├─ price_currency (existing) + ├─ garment_name (from spec 1) + ├─ title_safe (from spec 1) + ├─ srt_structure (NEW — skipped if srt_path is None) + ├─ srt_timing (NEW — skipped if srt_path is None) + └─ srt_language (NEW — skipped if srt_path is None) +``` + +Unified report per video. SRT cards appear inline with the other check cards, with `skipped` status if no SRT was paired. + +## Pairing strategy + +### Token parsing + +For both video and SRT filenames, parse case-insensitively: + +| Token | Video example | SRT example | Notes | +|---|---|---|---| +| `campaign_code` | `CFUL262B01` | `CFUL262B01` (sometimes absent) | Matches the canonical campaign-code regex in `core/utils/campaign_code.py`. | +| `clip_slug` | `PP_RIO_INTRO_15C` | `PP_RIO_INTRO_15C` or `RIO_INTRO15C` | The "clip identifier". Normalised by stripping `[_.\-\s]` and lowercasing before comparing. | +| `locale` | `AT-de` | `de-AT` (BCP-47-ish) | Canonicalise to `lang-MARKET` (e.g. `de-AT`). Reuses the existing locale-parser logic in `_extract_locale_from_filename` — generalise it into the new `srt_pairing.canonical_locale()` helper. | + +### Helper module + +New file: `modules/video_qc/utils/srt_pairing.py`: + +```python +def parse_video_tokens(filename: str) -> dict +def parse_srt_tokens(filename: str) -> dict +def normalise_slug(s: str) -> str # lowercase, strip [_.\-\s], alphanumeric only +def canonical_locale(s: str) -> str | None # 'AT-de'/'de-AT'/'de_AT' -> 'de-AT' +def score_pair(video_tokens: dict, srt_tokens: dict) -> float # 0.0..1.0 +def pair_batch(videos: list[str], srts: list[str]) -> tuple[dict, list[str], list[str]] + # returns (pair_map, unpaired_srts, unpaired_videos) +``` + +### Scoring (`score_pair`) + +Additive, capped at 1.0: + +| Signal | Weight | Notes | +|---|---|---| +| Locale matches after canonicalisation | **0.5** | If BOTH have locales and they differ → score = 0. If SRT has no locale, contributes 0 (no penalty). | +| Campaign code matches | **0.3** | If SRT has no campaign code, contributes 0 (no penalty). | +| Normalised clip slug matches | **0.4** | Required — if the slugs don't normalise-equal, score = 0. | + +**Auto-pair threshold**: `score ≥ 0.7`. Below threshold → SRT goes to `unpaired_srts`. + +Real-world expected scores from `testing_15may/srt/` test data: + +| Video filename | SRT filename | Expected score | +|---|---|---| +| `7147775_AT-de_CFUL262B01_PP_RIO_INTRO_15C_4x5_SoMe_MASTER.mp4` | `CFUL262B01_PP_RIO_INTRO_15C.srt_8852357_de-AT.srt` | locale 0.5 + code 0.3 + slug 0.4 = **1.0 (capped)** | +| `7158980_CH-en_CFUL262B01_PP_RIO_INTRO_6B_1x1_SoMe_MASTER.mp4` | `RIO_INTRO6B_en-CH.srt` | locale 0.5 + slug 0.4 = **0.9** (no campaign code on SRT) | + +Both above threshold. + +### Tie-break + +If multiple SRTs tie for a video's highest score: +1. Prefer the SRT with the highest raw (pre-cap) score. +2. If still tied, prefer the SRT with the earliest sorted filename (deterministic). +3. Non-winning SRTs go to `unpaired_srts`. + +If multiple videos tie for an SRT's highest score, the same logic applies symmetrically. Each SRT is paired with at most one video and vice versa. + +### Pre-flight UI + +In the batch upload / Box-pull start view, after the user has selected/listed files but before the batch runs: + +``` +Pairing summary +────────────────────────────────────────── + ✓ 6 videos paired with SRTs + ⚠ 0 videos without an SRT — SRT checks will skip + ⚠ 0 SRT files unpaired — will be ignored + +[Continue] [Back to upload] +``` + +Unpaired SRTs collapse open to show each filename and its best-attempted match score (so the user can spot near-misses and rename if they want to). + +The Continue button proceeds with the pair_map as-is. There is no manual override UI in v1 — the user's instruction was "fuzzy match, don't require renaming". A manual override is a candidate v2 feature if low-confidence pairs prove problematic. + +## The three checks + +All three follow the existing check-result contract used elsewhere in `executor.py`: + +```python +{ + 'check_name': str, + 'score': float, + 'status': 'passed'|'warning'|'failed'|'skipped'|'error', + 'message': str, + 'details': dict, + 'recommendations': list[str], + 'weight': int +} +``` + +All three skip silently (status `skipped`, score 100, weight contributes 0) when `srt_path is None`. + +### Check 1 — `srt_structure` + +**Deterministic, no LLM.** Uses the `srt` Python library (pure-Python, well-maintained, on PyPI). + +**Validations**: +1. File parses as valid SRT — failure → score 0, status `failed`. No further SRT checks meaningful but they still run and may also fail; that's fine. +2. Encoding: read as UTF-8 first; on `UnicodeDecodeError` fall back to `chardet` detection and continue, but emit a **warning** flagging the non-UTF-8 encoding (upstream tooling issue). +3. No `U+FFFD` replacement chars in cue text — presence → failure (signals encoding loss). +4. Each cue's timecode parses as `HH:MM:SS,mmm` — handled by the `srt` library; we surface any parse exceptions cleanly. +5. Cue indices ascending. Missing cue numbers (the `RIO_INDIVIDUAL15A_en-CH.srt` test file shows this) → **warning**, not failure — players generally accept it. +6. No empty / whitespace-only cue text → warning per occurrence. + +**Scoring**: +- Start at 100. +- Hard parse failure or `U+FFFD` content → 0/failed. +- Each warning → −10, floor 70. +- Status: `passed` ≥ 90, `warning` 70–89, `failed` < 70. + +**Weight**: 15. + +### Check 2 — `srt_timing` + +**Deterministic, no LLM. Requires video duration** — already available via `get_video_metadata(self.file_path)['duration']` inside the executor. + +**Validations**: +1. Every cue: `start < end`, both ≥ 0 → any violation = fail. +2. Cues do not overlap (cue N end ≤ cue N+1 start) → first overlap = warning, ≥ 3 overlaps = failed. +3. Last cue `end ≤ video_duration + 0.5s` tolerance → fail otherwise (subtitles running past the video is a clear defect). +4. Reading speed per cue: chars-per-second within `[5, 25]` (lenient — broadcast norm is ~12–21 but marketing punchiness can exceed). Outside range → warning per cue, capped to first 5 reported. +5. Line length ≤ 42 chars per line → warning per cue, capped to first 5 reported. +6. Lines per cue ≤ 2 → warning per cue. +7. Cue duration in `[0.7s, 7s]` → warning per cue, capped to first 5 reported. + +**Scoring**: +- Start at 100. +- Each "failure" rule violated → −30. +- Each warning → −5, with cumulative warning loss capped at 50. +- Floor at 0. +- Status: `passed` ≥ 90, `warning` 70–89, `failed` < 70. + +**Feature flag**: rules 4–7 (the advisory broadcast norms) should be toggle-able via the check config (`enabled_rules: list[str]`) so they can be silenced without a release if they prove noisy in practice. Default: all enabled. + +**Weight**: 10. + +### Check 3 — `srt_language` + +**LLM-judged but text-only** (no video upload — much cheaper than `visual_quality` etc.). + +**Flow**: +1. Extract `expected_lang` from the canonicalised locale of the paired video filename. Reuse the existing `_LANG_NAMES` mapping in `executor.py`. +2. Sample cue text: first 5 cues + 5 evenly-distributed middle cues + last 5, deduplicated, capped at 1500 chars total. +3. One LLM call (Gemini Flash, text-only): + ``` + prompt: "What language is this subtitle text written in? Return JSON: + {detected_language: 'German'|'Spanish'|..., iso_code: 'de'|'es'|..., + confidence: 0.0-1.0, mixed_language: bool}. + Be strict — proper nouns and brand names don't count as language indicators." + ``` +4. Score: + - Detected matches expected → 100, `passed`. + - Detected mismatches expected with confidence ≥ 0.8 → 0, `failed`. + - Detected mismatches with confidence < 0.8, OR `mixed_language: true` → 50, `warning`. + +**Weight**: 20. + +This is the direct answer to the user's "would it only register the correct language?" question — language IS the check we run here, but `srt_structure` and `srt_timing` run **alongside** it to address the rest of their concern. + +## Final weights + +`modules/video_qc/profiles/profiles.yaml`, `standard_video` profile (combined with spec 1): + +| Check | Weight | Notes | +|---|---|---| +| `visual_quality` | 50 | (Existing) | +| `censorship` | 50 | (Existing — CEN markets only) | +| `price_currency` | 30 | (Existing) | +| `garment_name` | 25 | (Spec 1) | +| `title_safe` | 0 | (Spec 1 — advisory) | +| `srt_structure` | **15** | New | +| `srt_timing` | **10** | New | +| `srt_language` | **20** | New | + +When SRT absent (or CEN absent), the relevant checks register as `skipped` and drop out of the weighted average — existing `active_checks` math in `execute()` handles this without change. + +## Report integration + +Three new cards appear in the existing Video QC report HTML (`_generate_report` in `executor.py`): + +- **`srt_structure`** — parse status, encoding, warnings list (e.g. "Cue 4: empty text", "Cue 7: missing index"). +- **`srt_timing`** — table of issues: overlap timestamps, cues exceeding video duration, reading-speed outliers, line-length warnings. +- **`srt_language`** — detected language, expected language, confidence, mixed-language flag, sample cue used. + +When SRT absent the cards still render with `skipped` status and a message: `No SRT paired with this video — SRT checks skipped`. Consistency across the batch report. + +## Executor changes + +### `VideoQCExecutor` + +```python +def __init__(self, session_id, file_path, *, + job_number=None, llm_provider='google', llm_model='gemini-2.5-flash', + user=None, campaign_id=None, batch_id=None, + pricing_reference_id=None, + srt_path: str | None = None): # NEW + ... + self.srt_path = srt_path +``` + +Inside `execute()`, after the existing `price_currency` block, add three more blocks following the same pattern as `_run_price_check`: + +```python +if self.srt_path: + self.results['srt_structure'] = self._run_srt_structure_check() + self.results['srt_timing'] = self._run_srt_timing_check(duration) + self.results['srt_language'] = self._run_srt_language_check() +else: + for name, weight in (('srt_structure', 15), ('srt_timing', 10), ('srt_language', 20)): + self.results[name] = { + 'check_name': name, 'score': 100.0, 'status': 'skipped', + 'message': 'No SRT paired with this video — SRT checks skipped', + 'details': {'skipped': True, 'reason': 'no_srt_paired'}, + 'weight': weight, + } +``` + +### `BatchVideoQCExecutor` + +Add a pre-flight step before iterating videos: + +```python +from modules.video_qc.utils.srt_pairing import pair_batch +pair_map, unpaired_srts, unpaired_videos = pair_batch(self.video_paths, self.srt_paths) +self.pair_map = pair_map +self.unpaired_srts = unpaired_srts +``` + +Then thread `srt_path=pair_map.get(video_path)` into each per-video executor instantiation. + +## Persistence + +No schema changes. `QCReport.metadata_json` gains: + +```json +{ + ..., + "srt_paired": true, + "srt_filename": "CFUL262B01_PP_RIO_INTRO_15C.srt_..._de-AT.srt", + "srt_pair_score": 1.0 +} +``` + +`unpaired_srts` is logged but not persisted in v1 — it's a transient batch-time concern surfaced in the pre-flight UI. + +## Testing approach + +Manual exercise via the web launcher (consistent with existing Video QC pattern; no automated test infra). + +**Smoke test using `testing_15may/srt/`**: + +- 6 videos + 6 SRTs, expect 6/6 pairs above threshold (4 AT-de, 2 CH-en). +- All three SRT checks engage for every pair. +- `srt_structure`: `RIO_INDIVIDUAL15A_en-CH.srt` (no cue numbers) should produce a single warning, NOT a failure. +- `srt_timing`: confirm last-cue check passes on all 6 normally. Then manually edit one SRT cue to extend ~5s past video end → expect fail. +- `srt_language`: AT-de SRTs detect as German, CH-en as English. Manually replace one cue with Spanish text → expect mixed-language warning. + +**Pairing-edge tests** (manual): +- Rename one SRT to lose its locale → expect pair score drops to 0.4 (slug only), below threshold → goes to `unpaired_srts`. +- Add a duplicate SRT for one video (different filename, same locale+slug) → expect tie-break picks alphabetically earlier filename; other goes to `unpaired_srts`. + +## Open verification steps for implementation + +1. **`srt` library encoding behaviour** — confirm how it handles non-UTF-8 input; may need a `chardet` pre-decode step. +2. **Box folder traversal returns `.srt`** — verify the existing `box_client` listing logic includes `.srt` files in directory listings. Likely yes (it's filename-driven) but should be confirmed before relying on the Box path. +3. **Reading-speed / line-length norms tuning** — once we have a few real batches through, decide whether to keep the broadcast defaults or relax further. Treat thresholds as tunable. +4. **`canonical_locale` overlap with existing parser** — the existing `_extract_locale_from_filename` lives on `VideoQCExecutor`. The pairing helper needs the same logic. Refactor: move the locale-parsing logic into `srt_pairing.canonical_locale` (or a sibling util module) and have the executor call into it, removing the duplication. + +These are flagged for the implementation plan, not blockers on the design. diff --git a/docs/superpowers/specs/2026-05-15-video-qc-tuning-design.md b/docs/superpowers/specs/2026-05-15-video-qc-tuning-design.md new file mode 100644 index 0000000..6d52fda --- /dev/null +++ b/docs/superpowers/specs/2026-05-15-video-qc-tuning-design.md @@ -0,0 +1,226 @@ +# Video QC tuning — design + +**Date:** 2026-05-15 +**Author:** Nick Viljoen (with Claude) +**Module:** `modules/video_qc/` +**Status:** Approved for implementation planning + +## Motivation + +A QC user asked three questions about what Video QC checks today: + +1. **Price** — "notes the currency symbol but does this include the actual price of the garment for the market? That is key." +2. **Garment name** — "like the above, this is variable per market." +3. **Placement / title-safe** — "should be cleared when global masters are done, however I would expect MGDs/PMs to check that the price point and garment name are in the right place for the deliverable spec." + +What the code does today: + +| Concern | Today | +|---|---| +| Price (full value) | ✅ `price_currency` already extracts the numeric value and matches it against the pricing reference, but the result is buried in `details` — looks like only currency is checked. | +| Garment name | ⚠️ `product_name` exists on each pricing-reference row but is only surfaced inside the matched-price block. No explicit "is the garment name on screen correct?" check. | +| Title-safe / placement | ❌ Not checked. `visual_quality` covers language and legibility but not whether price/garment text falls inside a platform UI overlay zone. | + +This spec covers all three: surface what's already there for **price**, add a new check for **garment name**, add a new advisory-only check for **title-safe**. + +## Scope + +In scope: +- `modules/video_qc/executor.py` — add two new checks, refine price-card rendering. +- `modules/video_qc/profiles/profiles.yaml` — register new checks in `standard_video`. + +Out of scope: +- Quick Video profile (`quick_video`) — unchanged. +- Image QC (`modules/hm_qc/`) — unchanged. +- Pricing reference ingest changes — we will *verify* how `product_name` is populated, but no schema changes. + +## Architecture + +The three changes slot into the existing `VideoQCExecutor.execute()` pipeline. No new modules, no new dependencies. + +``` +VideoQCExecutor.execute() + ├─ visual_quality (existing, unchanged) + ├─ censorship (existing, unchanged) + ├─ price_currency (existing — report-card tweak only) + ├─ garment_name (NEW) + └─ title_safe (NEW, advisory-only) +``` + +Both new checks live in `executor.py` alongside `_run_price_check`, sharing: +- `LLMConfig.call_video_api` (Gemini direct video). +- The pricing-reference loaded by `_load_pricing_reference()`. +- The locale parser `_extract_locale_from_filename()`. + +The weighted-overall-score math (`active_checks` filter in `execute()`) already drops `skipped` checks and respects per-check weights — no changes needed there. + +## Change 1 — `price_currency` report-card clarity + +**Problem.** The check already does the right thing (deterministic numeric match against `_prices`), but the report card surfaces only currency and a generic pass/fail line. Detected price, matched price, and matched product are stuffed into `details`. + +**Change.** Two report-rendering tweaks in `_generate_report` (or in the price check's `message` string, depending on what reads cleaner): + +- On **pass**, the card shows a `Matched: ` line at the top of the body, e.g. `Matched: €49.99 — Oversized Cotton Shirt`. Pulls from `details.price_match.matched_price` / `matched_product`. +- On **fail**, the card shows `Detected: ` and `Expected one of: ( of shown)`. Pulls from `details.detected_prices` and `details.price_match.expected_prices`. + +No logic changes — the data is already in `result['details']`. + +**Why this matters.** The user's first question ("does this include the actual price of the garment for the market?") is answered "yes" by the existing code. The fix is making that visible. + +## Change 2 — `garment_name` check (NEW) + +### Behaviour + +1. **Skip rules** (matching `price_currency`): + - No pricing reference attached → skipped. + - Locale unparseable → skipped. + - `GEN` or `CEN` market in locale → skipped. + - LLM returns no detected names → skipped, message "No garment-name text detected — skipping validation". + +2. **Detection** — one Gemini direct-video call: + ``` + prompt: "Identify any garment / product names visible as text overlays in + this video (e.g. 'OVERSIZED COTTON SHIRT', 'WOOL BLAZER'). Ignore prices, + CTAs, dates, logos, model names, and campaign headlines. Return JSON: + {detected_names: [string, ...], any_text_overlay: bool}" + ``` + +3. **Expected names** — collect `product_name` values from `pricing_reference['prices']` filtered by the file's locale (same filtering pattern as price check uses `expected_entries`). + +4. **Matching** — for each detected name vs each expected name: + - Normalise both: lowercase, strip punctuation, collapse whitespace. + - Match if one is a substring of the other, OR token-set overlap ≥ 60% (where token-set overlap = |A ∩ B| / min(|A|, |B|)). + - Helper: `_normalize_product_name(s)` and `_product_names_match(a, b)` — co-located with the check; not a shared util unless a second consumer appears. + +5. **Scoring**: + - Match found → 100, `passed`. + - Detected but no match → 0, `failed`, with `detected vs expected` surfaced in the report card. + - Skip cases → 100, `skipped`. + +6. **Weight** — `25` in `standard_video` profile. + +### Open verification step (call out in implementation) + +The pricing-reference Excel ingest may store `product_name` only in source language (English) even for non-English locales. If so, the token-overlap match will false-fail on properly-localized assets. Before locking in the matching rule: + +- Inspect a sample `PricingReference` row for a non-English locale (e.g. `de-AT`, `es-ES`) and confirm whether `product_name` is localised. +- If **localised** → ship the matching rule as designed. +- If **English-only** → either (a) skip the check for non-English locales (treats it as "we don't have ground truth"), or (b) add a small LLM-based translation step on the detected name before comparison. Decision deferred to implementation; the safe default is (a). + +### Report card + +- Status colour matches `score`. +- Body lines: + - `Detected: ` (always, when not skipped). + - `Expected for : ` (top 5, with `(N of M shown)` if truncated). + - `Matched: ` on pass. + +## Change 3 — `title_safe` check (NEW, advisory-only) + +### Behaviour + +1. **Platform inference from filename.** A lookup dict in `executor.py`: + + | Tokens (case-insensitive substring match) | Platform family | Unsafe zones | + |---|---|---| + | `TK_Stories`, lone `TK`, `TT_*9x16*` | TikTok | top ~10%, bottom ~25% | + | `IG_Stories`, `Stories_*9x16*` | IG Stories | top ~12%, bottom ~18% | + | `IG_Reels`, `Reels_*9x16*` | IG Reels | bottom ~25%, right edge ~10% | + | `9x16` w/ no platform hint | Vertical SoMe (generic) | top ~10%, bottom ~20% | + | `1x1`, `4x5` (no vertical hint) | Feed | none | + | Anything else | Unknown | none | + + Helper: `_infer_platform_zones(filename) -> {platform: str, zones: list[dict]} | None`. + +2. **Skip path** — if platform yields no zones (Feed or Unknown), result is `skipped` with score 100, message "Format has no platform overlay zones — title-safe not applicable". No LLM call. + +3. **LLM call** — one Gemini direct-video call with prompt that includes: + - The platform name + textual description of unsafe zones (top X%, bottom Y%, etc.). + - Instruction to identify ONLY frames where **price text or garment-name text** falls inside an unsafe zone. Ignore other text. + - Return JSON `{issues: [{frame_timestamp, element: 'price'|'garment', zone, description}], advisory_only: true}`. + +4. **Scoring** — score is **always 100**. Status is `passed` if `issues` is empty, else `warning`. Never `failed`. + +5. **Weight** — `0` in `standard_video` profile. The overall-score aggregator already handles zero-weight checks: a check with weight 0 contributes `0 * score = 0` to `weighted_sum` and `0` to `total_weight`. **Implementation must verify** the aggregator doesn't divide by zero when *all* active checks have weight 0 (in practice impossible because other checks have non-zero weight, but the code should be defensive). Reusing the existing pattern: `if total_weight else 0`. + +### Report card + +- Status badge: yellow `warning` when issues exist, grey `passed`/`skipped` otherwise. +- Body: bulleted list of issues, each formatted ` in : `. +- A leading note: `Advisory — does not affect overall score.` + +### Failure mode flagged + +If the LLM is generous and never flags any placement, the check is decorative. Mitigation: +- Explicit zone description in the prompt (percentages, not vague terms). +- During implementation, test against at least one known-bad asset (price intentionally placed under the TikTok caption strip) to confirm the check engages. + +## Profile changes + +`modules/video_qc/profiles/profiles.yaml`, `standard_video` profile: + +```yaml +profiles: + standard_video: + name: "Standard Video QC (BETA)" + checks: + - name: "video_filename_parse" + weight: 15 + - name: "video_technical_check" + weight: 40 + - name: "video_duration_check" + weight: 20 + - name: "video_censorship_check" + weight: 25 + llm_provider: "openai" + # NEW + - name: "garment_name" + weight: 25 + enabled: true + llm_provider: "google" + llm_model: "gemini-2.5-flash" + description: "Validate on-screen garment name against pricing reference" + - name: "title_safe" + weight: 0 + enabled: true + llm_provider: "google" + llm_model: "gemini-2.5-flash" + description: "Advisory — flag price/garment text inside platform UI overlay zones" +``` + +(Note: the existing YAML doesn't currently list `visual_quality` or `price_currency` either — the executor runs them unconditionally. The new checks follow the same convention; the YAML is descriptive metadata, not the source of truth for what runs. **Implementation must align this** — either register all checks in YAML and read from it, or document that the YAML is purely informational. Today it's the latter; we keep that pattern.) + +## Final weights + +| Check | Weight | Notes | +|---|---|---| +| `visual_quality` | 50 | Unchanged. | +| `censorship` | 50 | Unchanged. CEN markets only. | +| `price_currency` | 30 | Unchanged. | +| `garment_name` | 25 | New. | +| `title_safe` | 0 | New. Advisory-only — does not contribute to overall score. | + +## Testing approach + +Manual exercise via the web launcher; Video QC has no automated test infrastructure today. + +**Smoke-test asset:** `testing_15may/video_tuning/6898354_1013A_SPRING_W_10A_TT_9x16_TK_Stories_Vogue_ES-es.mp4` (ES-es, TikTok Stories, 9x16). + +Expected behaviour after implementation: +- `visual_quality` continues to flag the English subtitle leak at 0:17–0:19 (no regression on existing logic). +- `garment_name` either matches against the ES-es product_name or skips silently if no garment text appears. +- `title_safe` engages because filename is `TK_Stories` + `9x16`. +- `price_currency` shows the new "Matched: …" / "Expected one of: …" lines in the report card. + +Additional manual checks: +- Run with a pricing reference that has known `product_name`s for ES-es; confirm match path. +- Run with a pricing reference whose `product_name` is empty/null; confirm skip path. +- Run on a `1x1` Feed deliverable; confirm `title_safe` skips with the "no overlay zones" message. + +## Open questions / verification steps for implementation + +1. **`product_name` localisation** — verify whether the pricing-reference Excel ingest stores localised or source-language product names. Decide skip vs translate-then-match. +2. **`title_safe` LLM sensitivity** — verify on a deliberately-bad asset that the check actually flags issues. +3. **YAML profile vs executor reality** — confirm the executor is the source of truth for which checks run; document or align. + +These are flagged for the implementation plan, not blockers on the design. diff --git a/modules/video_qc/batch_executor.py b/modules/video_qc/batch_executor.py index 5e41d1c..ab34abd 100644 --- a/modules/video_qc/batch_executor.py +++ b/modules/video_qc/batch_executor.py @@ -36,7 +36,8 @@ class BatchVideoQCExecutor: pricing_reference_id: int = None, batch_id: str = None, batch_size: int = DEFAULT_BATCH_SIZE, - app=None + app=None, + srt_paths: List[str] = None, ): self.session_id = session_id self.file_paths = file_paths[:MAX_FILES] @@ -49,6 +50,10 @@ class BatchVideoQCExecutor: self.batch_id = batch_id self.batch_size = batch_size self.app = app + self.srt_paths = list(srt_paths or []) + self.pair_map: Dict[str, str] = {} + self.unpaired_srts: List[str] = [] + self.unpaired_videos: List[str] = [] self.progress = UnifiedProgressTracker(session_id) self.results = [] @@ -58,6 +63,20 @@ class BatchVideoQCExecutor: self.progress.fail("No files to process") return {'error': 'No files to process'} + # Pre-flight: pair SRTs to videos by filename + if self.srt_paths: + from modules.video_qc.utils.srt_pairing import pair_batch + pair_map, unpaired_srts, unpaired_videos = pair_batch( + self.file_paths, self.srt_paths + ) + self.pair_map = pair_map + self.unpaired_srts = unpaired_srts + self.unpaired_videos = unpaired_videos + logger.info( + f"SRT pairing: {sum(1 for v in pair_map.values() if v)}/{len(pair_map)} " + f"videos paired; {len(unpaired_srts)} SRTs unpaired." + ) + try: logger.info( f"Starting batch Video QC for {total_files} files " @@ -156,7 +175,8 @@ class BatchVideoQCExecutor: user=self.user, campaign_id=self.campaign_id, batch_id=self.batch_id, - pricing_reference_id=self.pricing_reference_id + pricing_reference_id=self.pricing_reference_id, + srt_path=self.pair_map.get(file_path), ) result = executor.execute() diff --git a/modules/video_qc/executor.py b/modules/video_qc/executor.py index 699e978..595b98e 100644 --- a/modules/video_qc/executor.py +++ b/modules/video_qc/executor.py @@ -44,13 +44,123 @@ def _language_display(lang_code: str) -> str: return _LANG_NAMES.get(lang_code.lower(), f"language code '{lang_code}'") +# Platform UI overlay zones for `title_safe` check. Each entry describes the +# regions of the frame that the platform's UI obscures (profile pills, captions, +# action icons). Used to flag when price or garment-name text lands inside. +# Percentages are conservative — tuned against IG/TikTok screenshots circa 2026. +_PLATFORM_ZONES = { + 'tiktok': { + 'description': "TikTok feed: top ~10% (profile/handle bar), bottom ~25% " + "(caption + UI action rail).", + 'zones': [ + {'edge': 'top', 'percent': 10}, + {'edge': 'bottom', 'percent': 25}, + ], + }, + 'ig_stories': { + 'description': "Instagram Stories: top ~12% (profile pill + reactions), " + "bottom ~18% (swipe-up / reply bar).", + 'zones': [ + {'edge': 'top', 'percent': 12}, + {'edge': 'bottom', 'percent': 18}, + ], + }, + 'ig_reels': { + 'description': "Instagram Reels: bottom ~25% (caption + icons), " + "right edge ~10% (action rail).", + 'zones': [ + {'edge': 'bottom', 'percent': 25}, + {'edge': 'right', 'percent': 10}, + ], + }, + 'vertical_generic': { + 'description': "Generic vertical (9x16) social: top ~10%, bottom ~20%.", + 'zones': [ + {'edge': 'top', 'percent': 10}, + {'edge': 'bottom', 'percent': 20}, + ], + }, +} + + +def _infer_platform_zones(filename: str) -> dict | None: + """Infer platform UI overlay zones from filename tokens. + + Returns a dict {'platform': str, 'description': str, 'zones': [...]} + or None when the format has no known overlay zones (feed formats like + 1x1 / 4x5, or unrecognised). + + The returned dict is freshly built (deep-copied zones list) so callers + can mutate it without corrupting the module-level _PLATFORM_ZONES. + """ + if not filename: + return None + upper = filename.upper() + + def _build(key: str) -> dict: + base = _PLATFORM_ZONES[key] + return { + 'platform': key, + 'description': base['description'], + 'zones': [dict(z) for z in base['zones']], + } + + # Most specific first: explicit platform + format combos. + if 'TK_STORIES' in upper or '_TK_' in upper or 'TT_9X16' in upper: + return _build('tiktok') + if 'IG_STORIES' in upper or 'STORIES_9X16' in upper: + return _build('ig_stories') + if 'IG_REELS' in upper or 'REELS_9X16' in upper: + return _build('ig_reels') + + # Fallback: any 9x16 with no platform hint -> generic vertical. + if '9X16' in upper: + return _build('vertical_generic') + + # Feed formats (1x1, 4x5) and anything else -> no overlay zones. + return None + + +import re as _re + + +def _normalize_product_name(s: str) -> str: + """Lowercase, strip non-alphanumeric (except spaces), collapse whitespace.""" + if not s: + return '' + s = s.lower() + s = _re.sub(r"[^a-z0-9\s]", " ", s) + s = _re.sub(r"\s+", " ", s).strip() + return s + + +def _product_names_match(a: str, b: str) -> bool: + """True when two product names are 'close enough' after normalisation. + + Match rule (any of): + • One normalised string is a substring of the other (non-empty). + • Token-set overlap |A ∩ B| / min(|A|, |B|) >= 0.6. + Empty strings never match. + """ + na, nb = _normalize_product_name(a), _normalize_product_name(b) + if not na or not nb: + return False + if na in nb or nb in na: + return True + ta, tb = set(na.split()), set(nb.split()) + if not ta or not tb: + return False + overlap = len(ta & tb) / min(len(ta), len(tb)) + return overlap >= 0.6 + + class VideoQCExecutor: """Execute video QC checks with frame extraction and AI analysis.""" def __init__(self, session_id: str, file_path: str, job_number: str = None, llm_provider: str = 'google', llm_model: str = 'gemini-2.5-flash', user: str = None, campaign_id: str = None, batch_id: str = None, - pricing_reference_id: int = None): + pricing_reference_id: int = None, srt_path: str = None): self.session_id = session_id self.file_path = file_path self.job_number = job_number @@ -60,6 +170,7 @@ class VideoQCExecutor: self.campaign_id = campaign_id self.batch_id = batch_id self.pricing_reference_id = pricing_reference_id + self.srt_path = srt_path self.progress = UnifiedProgressTracker(session_id) self.results = {} self.campaign_context = {} @@ -153,10 +264,55 @@ class VideoQCExecutor: self.results['price_currency'] = price_result logger.info(f"Price/currency: {price_result['status']} ({price_result['score']})") + # Step 5b: Garment-name check + self.progress.update(82, "Running garment-name check...") + if self.pricing_reference: + garment_result = self._run_garment_name_check() + else: + garment_result = { + 'check_name': 'garment_name', 'score': 100.0, 'status': 'skipped', + 'message': 'Skipped — no pricing reference attached to this run', + 'details': {'skipped': True, 'reason': 'no_pricing_reference'}, + 'weight': 25, + } + self.results['garment_name'] = garment_result + logger.info(f"Garment name: {garment_result['status']} ({garment_result['score']})") + + # Step 5c: Title-safe advisory + self.progress.update(85, "Running title-safe placement check...") + title_safe_result = self._run_title_safe_check() + self.results['title_safe'] = title_safe_result + logger.info(f"Title safe: {title_safe_result['status']} ({title_safe_result['score']})") + + # Step 5d: SRT checks (only if a paired SRT was provided) + srt_results = {} + for name, weight in (('srt_structure', 15), ('srt_timing', 10), ('srt_language', 20)): + srt_results[name] = { + 'check_name': name, 'score': 100.0, 'status': 'skipped', + 'message': 'No SRT paired with this video — SRT checks skipped', + 'details': {'skipped': True, 'reason': 'no_srt_paired'}, + 'weight': weight, + } + if self.srt_path: + self.progress.update(89, "Running SRT structure check...") + srt_results['srt_structure'] = self._run_srt_structure_check() + self.progress.update(91, "Running SRT timing check...") + srt_results['srt_timing'] = self._run_srt_timing_check(duration) + self.progress.update(93, "Running SRT language check...") + srt_results['srt_language'] = self._run_srt_language_check() + for k, v in srt_results.items(): + self.results[k] = v + logger.info(f"{k}: {v['status']} ({v['score']})") + # Step 6: Calculate overall score — weighted mean of non-skipped checks self.progress.update(87, "Calculating overall score...") - active_checks = [r for r in (quality_result, censorship_result, price_result) + active_checks = [r for r in (quality_result, censorship_result, price_result, + garment_result, + srt_results['srt_structure'], + srt_results['srt_timing'], + srt_results['srt_language']) if r.get('status') != 'skipped'] + # title_safe_result intentionally excluded — advisory only (weight=0) if active_checks: total_weight = sum(r.get('weight', 1) for r in active_checks) weighted_sum = sum(r['score'] * r.get('weight', 1) for r in active_checks) @@ -780,10 +936,33 @@ Respond in JSON: 'llm_model': self.llm_model } - if status == 'passed': + matched_price = matched[0] if matched else None + matched_product = matched[1].get('product_name') if matched else None + if status == 'passed' and matched_price is not None: + product_suffix = f" — {matched_product}" if matched_product else "" + message = ( + f"Matched: {currency}{matched_price}{product_suffix} " + f"(locale {language})" + ) + elif status == 'passed': message = f'Price/currency passed — {currency} correct for {language}' else: - message = f'Price/currency issues: {", ".join(issues)}' + exp_list = details['price_match']['expected_prices'] + exp_total = len(expected_entries) + expected_blurb = '' + if exp_list: + shown = exp_list[:5] + expected_blurb = ( + f' Expected one of: {", ".join(shown)}' + + (f' ({len(shown)} of {exp_total} shown)' if exp_total > len(shown) else '') + ) + detected_blurb = '' + if detected_strings: + detected_blurb = f' Detected: {", ".join(detected_strings)}.' + message = ( + f'Price/currency issues: {", ".join(issues)}.' + + detected_blurb + expected_blurb + ) return { 'check_name': 'price_currency', @@ -805,6 +984,601 @@ Respond in JSON: 'weight': weight } + def _run_garment_name_check(self) -> Dict[str, Any]: + """Detect on-screen garment/product names and match against the + pricing reference's product_name for the file's locale.""" + weight = 25 + try: + language, country_code = self._extract_locale_from_filename() + if not language: + return { + 'check_name': 'garment_name', 'score': 100.0, 'status': 'skipped', + 'message': 'Skipped — could not extract locale from filename', + 'details': {'skipped': True, 'reason': 'no_locale'}, + 'weight': weight, + } + if language.split('-')[0].upper() in ('GEN', 'CEN'): + return { + 'check_name': 'garment_name', 'score': 100.0, 'status': 'skipped', + 'message': f'Skipped for {language.upper()} (generic/censored market)', + 'details': {'skipped': True, 'reason': 'gen_cen_file'}, + 'weight': weight, + } + + pricing_ref = self.pricing_reference or {} + prices_list = pricing_ref.get('prices') or [] + expected_entries = [ + p for p in prices_list + if p.get('language') == language or p.get('country') == country_code + ] + expected_names = [ + p.get('product_name') for p in expected_entries + if p.get('product_name') + ] + + if not expected_names: + return { + 'check_name': 'garment_name', 'score': 100.0, 'status': 'skipped', + 'message': 'Skipped — no product_name in pricing reference for this locale', + 'details': {'skipped': True, 'reason': 'no_expected_names', + 'language': language, 'country_code': country_code}, + 'weight': weight, + } + + # Step 1: detect garment text via LLM + prompt = ( + "Identify any garment / product names visible as text overlays " + "in this video (e.g. 'OVERSIZED COTTON SHIRT', 'WOOL BLAZER'). " + "Ignore prices, CTAs, dates, logos, model names, and campaign " + "headlines. Return ONLY valid JSON (no markdown fences):\n" + "{detected_names: [string, ...], any_text_overlay: boolean}" + ) + usage_context = { + 'module': 'video_qc', 'check_name': 'garment_name', + 'user': self.user, 'session_id': self.session_id, + } + if self._use_direct_video: + response = LLMConfig.call_video_api( + prompt=prompt, video_path=self.file_path, + provider=self.llm_provider, model=self.llm_model, + usage_context=usage_context, + ) + else: + response = LLMConfig.call_vision_api( + prompt=prompt, image_asset=None, + provider=self.llm_provider, model=self.llm_model, + usage_context=usage_context, + ) + text = response.get('text', '') + start, end = text.find('{'), text.rfind('}') + 1 + detected = [] + if start != -1 and end > start: + try: + detected = json.loads(text[start:end]).get('detected_names') or [] + except json.JSONDecodeError: + detected = [] + + if not detected: + return { + 'check_name': 'garment_name', 'score': 100.0, 'status': 'skipped', + 'message': 'No garment-name text detected in video — skipping validation', + 'details': {'skipped': True, 'reason': 'no_detection', + 'expected_names_count': len(expected_names)}, + 'weight': weight, + } + + # Step 2: deterministic match + matched_pair = None + for d in detected: + for e in expected_names: + if _product_names_match(d, e): + matched_pair = (d, e) + break + if matched_pair: + break + + details = { + 'language': language, 'country_code': country_code, + 'detected_names': detected, + 'expected_names_sample': expected_names[:5], + 'expected_names_total': len(expected_names), + 'matched': matched_pair is not None, + 'matched_detected': matched_pair[0] if matched_pair else None, + 'matched_expected': matched_pair[1] if matched_pair else None, + 'llm_provider': self.llm_provider, 'llm_model': self.llm_model, + 'tokens_used': response.get('tokens_used'), + } + + if matched_pair: + return { + 'check_name': 'garment_name', 'score': 100.0, 'status': 'passed', + 'message': ( + f'Matched: "{matched_pair[0]}" ≈ "{matched_pair[1]}" ' + f'(locale {language})' + ), + 'details': details, 'weight': weight, + } + return { + 'check_name': 'garment_name', 'score': 0.0, 'status': 'failed', + 'message': ( + f'No detected name matched any of {len(expected_names)} ' + f'expected product names for {language}. ' + f'Detected: {", ".join(detected)}.' + ), + 'details': details, + 'recommendations': [ + f"Verify the on-screen product name is correct for {language}.", + f"Expected (sample): {', '.join(expected_names[:3])}.", + ], + 'weight': weight, + } + except Exception as e: + logger.error(f"garment_name check error: {e}", exc_info=True) + return { + 'check_name': 'garment_name', 'score': 0, 'status': 'error', + 'message': f'Error: {str(e)}', + 'details': {'error': str(e)}, 'weight': weight, + } + + def _run_title_safe_check(self) -> Dict[str, Any]: + """Advisory check — flag (never fail) when price or garment-name text + falls inside a platform UI overlay zone. Score is always 100; status + is 'warning' on detected issues, otherwise 'passed' / 'skipped'.""" + weight = 0 # advisory — does not contribute to overall score + try: + platform_info = _infer_platform_zones(os.path.basename(self.file_path)) + if not platform_info: + return { + 'check_name': 'title_safe', 'score': 100.0, 'status': 'skipped', + 'message': 'Format has no known platform overlay zones — title-safe not applicable', + 'details': {'skipped': True, 'reason': 'no_platform_zones'}, + 'weight': weight, + } + + zones_text = "; ".join( + f"{z['edge']} ~{z['percent']}%" for z in platform_info['zones'] + ) + prompt = ( + "You are reviewing this video for advisory title-safe issues. " + f"Platform: {platform_info['platform']}. " + f"{platform_info['description']}\n\n" + "Identify frames where the PRICE text or PRODUCT/GARMENT-NAME " + "text falls INSIDE one of these unsafe zones: " + f"{zones_text}. Ignore other text. Return ONLY valid JSON " + "(no markdown fences):\n" + "{\"issues\": [{\"frame_timestamp\": \"0:12\", " + "\"element\": \"price\" | \"garment\", " + "\"zone\": \"top\" | \"bottom\" | \"right\", " + "\"description\": \"...\"}], \"advisory_only\": true}" + ) + usage_context = { + 'module': 'video_qc', 'check_name': 'title_safe', + 'user': self.user, 'session_id': self.session_id, + } + response = LLMConfig.call_video_api( + prompt=prompt, video_path=self.file_path, + provider=self.llm_provider, model=self.llm_model, + usage_context=usage_context, + ) + text = response.get('text', '') + start, end = text.find('{'), text.rfind('}') + 1 + issues = [] + if start != -1 and end > start: + try: + issues = json.loads(text[start:end]).get('issues') or [] + except json.JSONDecodeError: + issues = [] + + details = { + 'platform': platform_info['platform'], + 'platform_description': platform_info['description'], + 'zones': platform_info['zones'], + 'issues': issues, + 'advisory_only': True, + 'llm_provider': self.llm_provider, 'llm_model': self.llm_model, + 'tokens_used': response.get('tokens_used'), + } + + if not issues: + return { + 'check_name': 'title_safe', 'score': 100.0, 'status': 'passed', + 'message': ( + f'Advisory check — no price/garment placement issues on ' + f'{platform_info["platform"]}.' + ), + 'details': details, 'weight': weight, + } + + issue_blurb = "; ".join( + f"{i.get('frame_timestamp','?')} {i.get('element','?')} " + f"in {i.get('zone','?')}" + for i in issues[:5] + ) + return { + 'check_name': 'title_safe', 'score': 100.0, 'status': 'warning', + 'message': ( + f'Advisory — {len(issues)} placement issue(s) on ' + f'{platform_info["platform"]}: {issue_blurb}. ' + f'Does not affect overall score.' + ), + 'details': details, + 'recommendations': [ + f"Review price/garment positioning for {platform_info['platform']} " + f"unsafe zones." + ], + 'weight': weight, + } + except Exception as e: + logger.error(f"title_safe check error: {e}", exc_info=True) + return { + 'check_name': 'title_safe', 'score': 100.0, 'status': 'error', + 'message': f'Error: {str(e)} — does not affect overall score', + 'details': {'error': str(e), 'advisory_only': True}, + 'weight': weight, + } + + def _read_srt_text(self) -> tuple[str, dict]: + """Read the paired SRT file. Returns (text, encoding_info). + + encoding_info: {encoding: str, fallback_used: bool, replacement_chars: int} + Raises OSError if the file can't be read. + """ + with open(self.srt_path, 'rb') as f: + raw = f.read() + info = {'encoding': 'utf-8', 'fallback_used': False, 'replacement_chars': 0} + try: + text = raw.decode('utf-8') + except UnicodeDecodeError: + try: + import chardet + detected = chardet.detect(raw) + enc = detected.get('encoding') or 'latin-1' + text = raw.decode(enc, errors='replace') + info['encoding'] = enc + info['fallback_used'] = True + except ImportError: + text = raw.decode('latin-1', errors='replace') + info['encoding'] = 'latin-1' + info['fallback_used'] = True + info['replacement_chars'] = text.count('�') + return text, info + + def _run_srt_structure_check(self) -> Dict[str, Any]: + """Validate the paired SRT parses, is well-encoded, and has sane cues.""" + import srt as srt_lib + weight = 15 + try: + text, enc_info = self._read_srt_text() + except OSError as e: + return { + 'check_name': 'srt_structure', 'score': 0.0, 'status': 'failed', + 'message': f'Could not read SRT file: {e}', + 'details': {'error': str(e)}, 'weight': weight, + } + + warnings: list[str] = [] + if enc_info['fallback_used']: + warnings.append( + f"SRT decoded as {enc_info['encoding']} (not UTF-8). " + "Upstream tool may be emitting legacy encoding." + ) + + if enc_info['replacement_chars'] > 0: + return { + 'check_name': 'srt_structure', 'score': 0.0, 'status': 'failed', + 'message': ( + f"SRT contains {enc_info['replacement_chars']} Unicode " + f"replacement char(s) — encoding loss." + ), + 'details': {'encoding': enc_info}, + 'weight': weight, + } + + try: + cues = list(srt_lib.parse(text)) + except (srt_lib.SRTParseError, ValueError) as e: + return { + 'check_name': 'srt_structure', 'score': 0.0, 'status': 'failed', + 'message': f'SRT parse error: {e}', + 'details': {'error': str(e), 'encoding': enc_info}, + 'weight': weight, + } + + if not cues: + return { + 'check_name': 'srt_structure', 'score': 0.0, 'status': 'failed', + 'message': 'SRT contains no cues', + 'details': {'encoding': enc_info}, 'weight': weight, + } + + indices = [c.index for c in cues] + expected = list(range(1, len(cues) + 1)) + if indices != expected: + if any(i is None for i in indices): + warnings.append("One or more cues are missing index numbers (player-tolerant).") + else: + warnings.append(f"Cue indices not contiguous 1..{len(cues)}: {indices[:5]}...") + + empty_cues = [c.index for c in cues if not (c.content or '').strip()] + if empty_cues: + warnings.append(f"{len(empty_cues)} cue(s) have empty text (indices: {empty_cues[:5]})") + + score = 100.0 - 10.0 * len(warnings) + score = max(score, 70.0) + status = 'passed' if score >= 90 else 'warning' + details = { + 'encoding': enc_info, + 'cue_count': len(cues), + 'warnings': warnings, + 'srt_filename': os.path.basename(self.srt_path), + } + message = ( + f'{len(cues)} cues parsed, encoding {enc_info["encoding"]}' + + (f', {len(warnings)} warning(s)' if warnings else '') + ) + return { + 'check_name': 'srt_structure', + 'score': score, 'status': status, 'message': message, + 'details': details, 'weight': weight, + } + + def _run_srt_timing_check(self, video_duration: float) -> Dict[str, Any]: + """Validate cue timings against video duration + broadcast norms.""" + import srt as srt_lib + weight = 10 + try: + text, _ = self._read_srt_text() + cues = list(srt_lib.parse(text)) + except Exception as e: + return { + 'check_name': 'srt_timing', 'score': 0.0, 'status': 'failed', + 'message': f'Could not parse SRT for timing: {e}', + 'details': {'error': str(e)}, 'weight': weight, + } + + if not cues: + return { + 'check_name': 'srt_timing', 'score': 0.0, 'status': 'failed', + 'message': 'SRT has no cues', + 'details': {}, 'weight': weight, + } + + failures: list[str] = [] + warnings: list[str] = [] + + # Rule 1: start < end, both >= 0 + for c in cues: + s = c.start.total_seconds() + e = c.end.total_seconds() + if s < 0 or e < 0: + failures.append(f"Cue {c.index}: negative timestamp ({s}s -> {e}s)") + elif s >= e: + failures.append(f"Cue {c.index}: start >= end ({s}s -> {e}s)") + + # Rule 2: overlaps + overlaps: list[str] = [] + for prev, nxt in zip(cues, cues[1:]): + if prev.end > nxt.start: + overlaps.append( + f"Cue {prev.index} ends at {prev.end.total_seconds():.2f}s, " + f"cue {nxt.index} starts at {nxt.start.total_seconds():.2f}s" + ) + if overlaps: + if len(overlaps) >= 3: + failures.append(f"{len(overlaps)} overlapping cue pairs (first 3): " + "; ".join(overlaps[:3])) + else: + warnings.append(f"{len(overlaps)} overlapping cue pair(s): " + "; ".join(overlaps)) + + # Rule 3: last cue end <= video_duration + 0.5s + if video_duration: + last_end = cues[-1].end.total_seconds() + if last_end > video_duration + 0.5: + failures.append( + f"Last cue ends at {last_end:.2f}s but video is only " + f"{video_duration:.2f}s long" + ) + + # Rules 4-7: warnings only (broadcast norms) + reading_speed_outliers: list[str] = [] + line_length_outliers: list[str] = [] + line_count_outliers: list[str] = [] + duration_outliers: list[str] = [] + for c in cues: + content = (c.content or '').strip() + if not content: + continue + duration = (c.end - c.start).total_seconds() + chars = len(content.replace('\n', ' ')) + cps = chars / duration if duration > 0 else 0 + if cps < 5 or cps > 25: + reading_speed_outliers.append(f"cue {c.index}: {cps:.1f} cps") + lines = content.split('\n') + longest = max((len(line) for line in lines), default=0) + if longest > 42: + line_length_outliers.append(f"cue {c.index}: {longest} chars") + if len(lines) > 2: + line_count_outliers.append(f"cue {c.index}: {len(lines)} lines") + if duration < 0.7 or duration > 7.0: + duration_outliers.append(f"cue {c.index}: {duration:.2f}s") + + for outliers, label in [ + (reading_speed_outliers, "Reading speed outside 5-25 cps"), + (line_length_outliers, "Line length > 42 chars"), + (line_count_outliers, "More than 2 lines per cue"), + (duration_outliers, "Cue duration outside 0.7-7.0s"), + ]: + if outliers: + shown = outliers[:5] + more = f" (+{len(outliers)-5} more)" if len(outliers) > 5 else "" + warnings.append(f"{label}: " + ", ".join(shown) + more) + + warning_loss = min(5.0 * len(warnings), 50.0) + score = 100.0 - 30.0 * len(failures) - warning_loss + score = max(score, 0.0) + if failures: + status = 'failed' + elif score >= 90: + status = 'passed' + elif score >= 70: + status = 'warning' + else: + status = 'failed' + + return { + 'check_name': 'srt_timing', 'score': score, 'status': status, + 'message': ( + f'{len(cues)} cues; {len(failures)} failure(s), {len(warnings)} warning(s)' + ), + 'details': { + 'cue_count': len(cues), + 'video_duration': video_duration, + 'failures': failures, + 'warnings': warnings, + 'srt_filename': os.path.basename(self.srt_path), + }, + 'weight': weight, + } + + def _run_srt_language_check(self) -> Dict[str, Any]: + """Detect SRT language and compare to the locale-derived expected language.""" + import srt as srt_lib + weight = 20 + try: + text, _ = self._read_srt_text() + cues = list(srt_lib.parse(text)) + except Exception as e: + return { + 'check_name': 'srt_language', 'score': 0.0, 'status': 'failed', + 'message': f'Could not parse SRT for language check: {e}', + 'details': {'error': str(e)}, 'weight': weight, + } + if not cues: + return { + 'check_name': 'srt_language', 'score': 0.0, 'status': 'failed', + 'message': 'SRT has no cues', + 'details': {}, 'weight': weight, + } + + lang_code, market = self._extract_locale_from_filename() + expected_lang_code = (lang_code.split('-')[0] if lang_code else '').lower() + expected_lang_display = _language_display(expected_lang_code) + if not expected_lang_code: + return { + 'check_name': 'srt_language', 'score': 100.0, 'status': 'skipped', + 'message': 'Skipped — could not extract expected language from video filename', + 'details': {'skipped': True, 'reason': 'no_expected_language'}, + 'weight': weight, + } + + # Sample cues: first 5, last 5, plus evenly-distributed middle; cap 15 / 1500 chars + n = len(cues) + sample_indices = set() + for i in range(min(5, n)): + sample_indices.add(i) + for i in range(min(5, n)): + sample_indices.add(max(0, n - 1 - i)) + if n > 10: + step = max(1, n // 6) + for k in range(step, n - step, step): + sample_indices.add(k) + if len(sample_indices) >= 15: + break + sample_texts = [] + budget = 1500 + for i in sorted(sample_indices): + t = (cues[i].content or '').strip().replace('\n', ' ') + if not t: + continue + if budget - len(t) < 0: + break + sample_texts.append(t) + budget -= len(t) + sample_blob = "\n---\n".join(sample_texts) + + prompt = ( + "What language is this subtitle text written in? Sample cues " + "are separated by '---'. Return ONLY valid JSON (no markdown):\n" + "{\"detected_language\": \"German\" or similar full English name,\n" + " \"iso_code\": \"de\" or similar ISO-639-1 code,\n" + " \"confidence\": 0.0-1.0,\n" + " \"mixed_language\": boolean}\n\n" + "Be strict — proper nouns and brand names don't count as language indicators.\n\n" + f"SAMPLE:\n{sample_blob}" + ) + + # TODO: when LLMConfig grows a `call_text_api` helper, route through it + # for unified usage logging + retry. For v1 we call genai directly. + try: + LLMConfig.validate_configuration('google') + import google.generativeai as genai + gen_model = genai.GenerativeModel(self.llm_model or 'gemini-2.5-flash') + gen_response = gen_model.generate_content([prompt]) + rtext = gen_response.text or '' + um = getattr(gen_response, 'usage_metadata', None) + tokens_used = getattr(um, 'total_token_count', None) if um else None + except Exception as e: + logger.error(f"srt_language LLM call failed: {e}", exc_info=True) + return { + 'check_name': 'srt_language', 'score': 0.0, 'status': 'error', + 'message': f'LLM error: {e}', + 'details': {'error': str(e), 'expected_iso': expected_lang_code, + 'expected_language': expected_lang_display}, 'weight': weight, + } + + start, end = rtext.find('{'), rtext.rfind('}') + 1 + parsed = {} + if start != -1 and end > start: + try: + parsed = json.loads(rtext[start:end]) + except json.JSONDecodeError: + parsed = {} + + detected_iso = (parsed.get('iso_code') or '').lower() + detected_lang = parsed.get('detected_language') or '(unknown)' + confidence = float(parsed.get('confidence') or 0.0) + mixed = bool(parsed.get('mixed_language')) + + common_details = { + **parsed, + 'expected_iso': expected_lang_code, + 'expected_language': expected_lang_display, + 'sample_cue_count': len(sample_texts), + 'tokens_used': tokens_used, + } + + if detected_iso == expected_lang_code: + return { + 'check_name': 'srt_language', 'score': 100.0, 'status': 'passed', + 'message': ( + f'Detected {detected_lang} ({detected_iso}) — matches expected ' + f'{expected_lang_display} ({expected_lang_code}).' + ), + 'details': common_details, 'weight': weight, + } + if mixed or confidence < 0.8: + return { + 'check_name': 'srt_language', 'score': 50.0, 'status': 'warning', + 'message': ( + f'Low-confidence or mixed language: detected {detected_lang} ' + f'({detected_iso}) at confidence {confidence:.2f}, mixed={mixed}. ' + f'Expected {expected_lang_display} ({expected_lang_code}).' + ), + 'details': common_details, 'weight': weight, + } + return { + 'check_name': 'srt_language', 'score': 0.0, 'status': 'failed', + 'message': ( + f'Wrong language: detected {detected_lang} ({detected_iso}), ' + f'expected {expected_lang_display} ({expected_lang_code}).' + ), + 'details': common_details, + 'recommendations': [ + f"Verify the SRT was translated for {expected_lang_display} " + f"({expected_lang_code}-{market})." + ], + 'weight': weight, + } + def _detect_prices_in_video(self, grid_path: str, language: str) -> Dict[str, Any]: """LLM call to detect prices + currency in the video (direct) or frame grid. Returns a dict {currency_found, currency_symbol, price_value, diff --git a/modules/video_qc/profiles/profiles.yaml b/modules/video_qc/profiles/profiles.yaml index f3c7c12..6ebc8a6 100644 --- a/modules/video_qc/profiles/profiles.yaml +++ b/modules/video_qc/profiles/profiles.yaml @@ -32,6 +32,39 @@ profiles: llm_model: "gpt-4o" description: "AI-powered censorship check (CEN markets only)" + - name: "garment_name" + weight: 25 + enabled: true + llm_provider: "google" + llm_model: "gemini-2.5-flash" + description: "Validate on-screen garment name against pricing reference" + + - name: "title_safe" + weight: 0 + enabled: true + llm_provider: "google" + llm_model: "gemini-2.5-flash" + description: "Advisory — flag price/garment text inside platform UI overlay zones" + + - name: "srt_structure" + weight: 15 + enabled: true + llm_provider: null + description: "Validate paired SRT parses, encoding, ordering, cue text" + + - name: "srt_timing" + weight: 10 + enabled: true + llm_provider: null + description: "Validate SRT cue timings vs video duration + broadcast norms" + + - name: "srt_language" + weight: 20 + enabled: true + llm_provider: "google" + llm_model: "gemini-2.5-flash" + description: "Detect SRT language vs expected from video locale" + quick_video: name: "Quick Video Check (BETA)" description: "Fast validation of essential video requirements" diff --git a/modules/video_qc/routes.py b/modules/video_qc/routes.py index afb32a6..448c9a1 100644 --- a/modules/video_qc/routes.py +++ b/modules/video_qc/routes.py @@ -25,7 +25,9 @@ from core.models.database import db logger = logging.getLogger(__name__) -ALLOWED_EXTENSIONS = {'mp4', 'mov', 'avi', 'mkv'} +ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'mov', 'avi', 'mkv'} +ALLOWED_SRT_EXTENSIONS = {'srt'} +ALLOWED_EXTENSIONS = ALLOWED_VIDEO_EXTENSIONS | ALLOWED_SRT_EXTENSIONS MAX_BATCH_FILES = 50 @@ -33,6 +35,14 @@ def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS +def is_video(filename): + return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_VIDEO_EXTENSIONS + + +def is_srt(filename): + return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_SRT_EXTENSIONS + + @video_qc_bp.route('/') @video_qc_bp.route('/index') def index(): @@ -135,6 +145,37 @@ def configure(session_id): ) +@video_qc_bp.route('/pairing-preview/') +def pairing_preview(session_id): + """Pre-flight: return pair_map + unpaired counts for the configure UI.""" + upload_path = os.path.join( + current_app.config['VIDEO_QC_UPLOAD_PATH'], session_id + ) + if not os.path.isdir(upload_path): + return jsonify({'error': 'Session upload folder not found'}), 404 + files = os.listdir(upload_path) + video_paths = [os.path.join(upload_path, f) for f in files if is_video(f)] + srt_paths = [os.path.join(upload_path, f) for f in files if is_srt(f)] + if not srt_paths: + return jsonify({ + 'pairs': [{'video': os.path.basename(v), 'srt': None} for v in video_paths], + 'unpaired_srts': [], + 'unpaired_videos': [os.path.basename(v) for v in video_paths], + 'srt_count': 0, + }) + from .utils.srt_pairing import pair_batch + pair_map, unpaired_srts, unpaired_videos = pair_batch(video_paths, srt_paths) + return jsonify({ + 'pairs': [ + {'video': os.path.basename(v), 'srt': os.path.basename(s) if s else None} + for v, s in pair_map.items() + ], + 'unpaired_srts': [os.path.basename(s) for s in unpaired_srts], + 'unpaired_videos': [os.path.basename(v) for v in unpaired_videos], + 'srt_count': len(srt_paths), + }) + + @video_qc_bp.route('/execute', methods=['POST']) def execute(): """Start single-file video QC execution.""" @@ -217,11 +258,14 @@ def execute_batch(): upload_path = os.path.join( current_app.config['VIDEO_QC_UPLOAD_PATH'], session_id ) - files = [f for f in os.listdir(upload_path) if allowed_file(f)] - if not files: + files = os.listdir(upload_path) + video_files = [f for f in files if is_video(f)] + srt_files = [f for f in files if is_srt(f)] + if not video_files: return jsonify({'error': 'No video files found'}), 404 - file_paths = [os.path.join(upload_path, f) for f in files] + file_paths = [os.path.join(upload_path, f) for f in video_files] + srt_paths = [os.path.join(upload_path, f) for f in srt_files] user_email = current_user_email() @@ -234,6 +278,7 @@ def execute_batch(): batch_executor = BatchVideoQCExecutor( session_id=session_id, file_paths=file_paths, + srt_paths=srt_paths, job_number=job_number, llm_provider=llm_provider, llm_model=llm_model, diff --git a/modules/video_qc/templates/video_qc/configure.html b/modules/video_qc/templates/video_qc/configure.html index a03cef4..bea54a5 100644 --- a/modules/video_qc/templates/video_qc/configure.html +++ b/modules/video_qc/templates/video_qc/configure.html @@ -29,6 +29,18 @@ + {# SRT pairing pre-flight summary — hidden until JS confirms at least one SRT was uploaded #} + +
@@ -129,6 +141,49 @@ const isBatch = {{ 'true' if file_count and file_count > 1 else 'false' }}; const fileCount = {{ file_count or 1 }}; + // SRT pairing pre-flight: hidden unless ≥1 SRT was uploaded for this session. + // Renders filenames via textContent (NOT innerHTML) — names come from user + // uploads and could contain HTML; building DOM nodes prevents XSS. + (async function loadSrtPairing() { + try { + const url = window.BASE_URL + '/video-qc/pairing-preview/' + encodeURIComponent(sessionId); + const resp = await fetch(url); + if (!resp.ok) return; + const data = await resp.json(); + if (!data || !data.srt_count || data.srt_count === 0) return; + + const box = document.getElementById('srtPairingSummary'); + const list = document.getElementById('srtPairingList'); + box.style.display = ''; + + const paired = data.pairs.filter(p => p.srt).length; + const unpairedVideos = data.unpaired_videos.length; + const lines = [ + '✓ ' + paired + ' video(s) paired with SRTs', + '⚠ ' + unpairedVideos + ' video(s) without an SRT — SRT checks will skip', + '⚠ ' + data.unpaired_srts.length + ' SRT file(s) unpaired — will be ignored', + ]; + list.replaceChildren(...lines.map(text => { + const li = document.createElement('li'); + li.textContent = text; + return li; + })); + + if (data.unpaired_srts.length > 0) { + const wrap = document.getElementById('srtUnpairedWrap'); + const ul = document.getElementById('srtUnpairedList'); + wrap.style.display = ''; + ul.replaceChildren(...data.unpaired_srts.map(name => { + const li = document.createElement('li'); + li.textContent = name; // safe — filenames not interpreted as HTML + return li; + })); + } + } catch (err) { + console.warn('Could not load SRT pairing preview:', err); + } + })(); + (async function loadCampaigns() { try { const resp = await fetch(window.BASE_URL + '/campaigns/api/list'); diff --git a/modules/video_qc/templates/video_qc/upload.html b/modules/video_qc/templates/video_qc/upload.html index 7640562..9a38098 100644 --- a/modules/video_qc/templates/video_qc/upload.html +++ b/modules/video_qc/templates/video_qc/upload.html @@ -57,9 +57,9 @@

Drag & Drop Videos Here

or click to browse

-

Supported: MP4, MOV, AVI, MKV (up to 50 files)

+

Supported: MP4, MOV, AVI, MKV (up to 50 files). SRT subtitle files (.srt) can be included alongside videos for SRT QC.

+ accept=".mp4,.mov,.avi,.mkv,.srt" multiple>
@@ -123,7 +123,7 @@ const validationMessage = document.getElementById('validationMessage'); const MAX_FILES = 50; - const ALLOWED_EXTENSIONS = ['mp4', 'mov', 'avi', 'mkv']; + const ALLOWED_EXTENSIONS = ['mp4', 'mov', 'avi', 'mkv', 'srt']; let selectedFiles = []; uploadZone.addEventListener('click', () => fileInput.click()); diff --git a/modules/video_qc/utils/srt_pairing.py b/modules/video_qc/utils/srt_pairing.py new file mode 100644 index 0000000..5fb6ef9 --- /dev/null +++ b/modules/video_qc/utils/srt_pairing.py @@ -0,0 +1,264 @@ +""" +SRT ↔ Video filename pairing helpers. + +Pure functions. Used by BatchVideoQCExecutor at pre-flight to pair each +SRT file to its video, even when filenames diverge in style (e.g. +`CFUL262B01_PP_RIO_INTRO_15C.srt_8852296_de-AT.srt` should pair with +`7147775_AT-de_CFUL262B01_PP_RIO_INTRO_15C_4x5_SoMe_MASTER.mp4`). + +Token parsing extracts campaign_code, clip_slug, and locale from each +filename. score_pair() combines them into a 0.0–1.0 score; pair_batch() +greedily assigns SRTs to videos above a 0.7 threshold. + +Verified at REPL against the test folder testing_15may/srt/. +""" +import os +import re +from typing import Iterable + +# Campaign-code regex — see core/utils/campaign_code.py for the canonical +# version. We re-declare here to keep this module dependency-free, but the +# patterns must stay in sync. Two formats supported: +# • Legacy: 4 digits + optional uppercase letter (1013A, 4116) +# • New: 4 alpha + 2 digits (year) + 3-5 alphanum (CFUL262B01, CFUL263C01D) +_CAMPAIGN_RE = re.compile( + r"\b(" + r"\d{4}[A-Z]?" # legacy + r"|[A-Z]{4}\d{2}[A-Z0-9]{3,5}" # new (10-11 chars) + r")\b" +) + + +def normalise_slug(s: str) -> str: + """Lowercase and strip every non-alphanumeric character. + + `PP_RIO_INTRO_15C` -> `ppriointro15c` + `RIO_INTRO15C` -> `riointro15c` + `RIO-INTRO 15c` -> `riointro15c` + + Substring containment in score_pair() handles the case where one + slug has more tokens than the other (e.g. video's `pp_rio_intro_15c` + vs SRT's `rio_intro_15c`). + """ + if not s: + return '' + return re.sub(r"[^a-z0-9]", "", s.lower()) + + +def canonical_locale(s: str) -> str | None: + """Canonicalise locale strings to 'lang-MARKET' (lowercase-Uppercase). + + Accepts: 'AT-de', 'de-AT', 'de_AT', 'AT_de', 'de-at', etc. + Returns 'de-AT' for any of the above. None when input doesn't look + like a 2-2 locale pair. + + Heuristic: of the two halves, the all-uppercase shape is the market. + If both/neither are uppercase, treat the first as market (H&M + convention for video filenames is Market_lang). + """ + if not s: + return None + m = re.match(r"^([A-Za-z]{2})[-_]([A-Za-z]{2})$", s.strip()) + if not m: + return None + a, b = m.group(1), m.group(2) + if a.isupper() and b.islower(): + market, lang = a.upper(), b.lower() + elif b.isupper() and a.islower(): + market, lang = b.upper(), a.lower() + else: + # Ambiguous case shape — assume Market_lang (H&M video convention). + market, lang = a.upper(), b.lower() + return f"{lang}-{market}" + + +def parse_video_tokens(filename: str) -> dict: + """Extract {campaign_code, clip_slug, locale} from a video filename. + + Video filename forms (H&M conventions): + • `7147775_AT-de_CFUL262B01_PP_RIO_INTRO_15C_4x5_SoMe_MASTER.mp4` + • `6898354_1013A_SPRING_W_10A_TT_9x16_TK_Stories_Vogue_ES-es.mp4` + + clip_slug = everything between (locale OR campaign_code) and any + aspect-ratio token (1x1 / 4x5 / 9x16) or the file extension. We + normalise it via normalise_slug() before storage. + """ + base = os.path.splitext(os.path.basename(filename))[0] + parts = base.split('_') + + locale = None + for p in parts: + cl = canonical_locale(p) + if cl: + locale = cl + break + + campaign_code = None + for p in parts: + if _CAMPAIGN_RE.fullmatch(p): + campaign_code = p + break + if not campaign_code: + m = _CAMPAIGN_RE.search(base) + if m: + campaign_code = m.group(1) + + slug_parts = [] + aspect_pat = re.compile(r"^\d+x\d+$", re.IGNORECASE) + for p in parts: + if p.isdigit(): + continue + if canonical_locale(p): + continue + if _CAMPAIGN_RE.fullmatch(p): + continue + if aspect_pat.match(p): + continue + slug_parts.append(p) + + raw_slug = '_'.join(slug_parts) + return { + 'campaign_code': campaign_code, + 'clip_slug': normalise_slug(raw_slug), + 'locale': locale, + '_raw_slug_parts': slug_parts, + } + + +def parse_srt_tokens(filename: str) -> dict: + """Extract {campaign_code, clip_slug, locale} from an SRT filename. + + Two known forms (both seen in testing_15may/srt/): + A. `CFUL262B01_PP_RIO_INTRO_15C.srt_8852296_de-AT.srt` + — campaign code + clip slug + locale all present, with an + internal `.srt__` artifact from the upstream tool. + B. `RIO_INTRO6B_en-CH.srt` + — no campaign code; clip slug and locale only. + + We strip ALL `.srt` occurrences from the base, drop leading numeric + IDs, and run the same campaign/locale/slug extraction as parse_video_tokens. + """ + base = os.path.basename(filename) + base = base.replace('.srt', '') + parts = base.split('_') + + locale = None + for p in parts: + cl = canonical_locale(p) + if cl: + locale = cl + break + + campaign_code = None + for p in parts: + if _CAMPAIGN_RE.fullmatch(p): + campaign_code = p + break + if not campaign_code: + m = _CAMPAIGN_RE.search(base) + if m: + campaign_code = m.group(1) + + slug_parts = [] + for p in parts: + if p.isdigit(): + continue + if canonical_locale(p): + continue + if _CAMPAIGN_RE.fullmatch(p): + continue + if not p: + continue + slug_parts.append(p) + + raw_slug = '_'.join(slug_parts) + return { + 'campaign_code': campaign_code, + 'clip_slug': normalise_slug(raw_slug), + 'locale': locale, + '_raw_slug_parts': slug_parts, + } + + +PAIR_THRESHOLD = 0.7 + + +def score_pair(video_tokens: dict, srt_tokens: dict) -> float: + """Score how confidently this SRT belongs to this video. 0.0..1.0. + + Weights (additive, capped at 1.0): + • Locale match (both present, equal after canonicalisation): 0.5 + • Campaign code match (both present, equal): 0.3 + • Clip slug match (one is substring of the other, after + normalise_slug — see parse_*_tokens): 0.4 + + Hard zero rules: + • Both locales present AND differ -> 0.0 + • Both slugs present AND neither is substring of the other -> 0.0 + """ + v_loc = video_tokens.get('locale') + s_loc = srt_tokens.get('locale') + v_code = video_tokens.get('campaign_code') + s_code = srt_tokens.get('campaign_code') + v_slug = video_tokens.get('clip_slug') or '' + s_slug = srt_tokens.get('clip_slug') or '' + + # Hard reject: locales both present and divergent + if v_loc and s_loc and v_loc != s_loc: + return 0.0 + # Hard reject: slugs both present and neither contains the other + if v_slug and s_slug and v_slug not in s_slug and s_slug not in v_slug: + return 0.0 + + score = 0.0 + if v_loc and s_loc and v_loc == s_loc: + score += 0.5 + if v_code and s_code and v_code == s_code: + score += 0.3 + if v_slug and s_slug and (v_slug in s_slug or s_slug in v_slug): + score += 0.4 + + return min(score, 1.0) + + +def pair_batch( + video_paths: Iterable[str], + srt_paths: Iterable[str], +) -> tuple[dict, list[str], list[str]]: + """Pair each SRT to at most one video (and vice versa). Greedy on the + highest-scoring pairs above PAIR_THRESHOLD. + + Returns: + pair_map: dict[video_path, srt_path | None] for every video + (None if no SRT paired) + unpaired_srts: list[str] of SRTs left over + unpaired_videos: list[str] of videos with no SRT + """ + video_paths = list(video_paths) + srt_paths = list(srt_paths) + + candidates = [] + v_tokens = {v: parse_video_tokens(v) for v in video_paths} + s_tokens = {s: parse_srt_tokens(s) for s in srt_paths} + for v in video_paths: + for s in srt_paths: + score = score_pair(v_tokens[v], s_tokens[s]) + if score >= PAIR_THRESHOLD: + candidates.append((score, v, s)) + + # Greedy: highest score first; tie-break by filename for determinism + candidates.sort(key=lambda t: (-t[0], os.path.basename(t[1]), os.path.basename(t[2]))) + + used_videos: set = set() + used_srts: set = set() + pair_map: dict = {v: None for v in video_paths} + for score, v, s in candidates: + if v in used_videos or s in used_srts: + continue + pair_map[v] = s + used_videos.add(v) + used_srts.add(s) + + unpaired_srts = [s for s in srt_paths if s not in used_srts] + unpaired_videos = [v for v in video_paths if v not in used_videos] + return pair_map, unpaired_srts, unpaired_videos diff --git a/requirements.txt b/requirements.txt index b72ca4a..1db3cee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -44,6 +44,10 @@ nest_asyncio>=1.5.0 # Excel Parsing (Media Plans / Price Sheets) openpyxl>=3.1.0 +# SRT subtitle parsing (Video QC — SRT checks) +srt==3.5.3 +chardet>=5.0 + # Configuration python-dotenv==1.0.0 PyYAML==6.0.1