diff --git a/backend/app/pipeline/agents/agent_1_validator.py b/backend/app/pipeline/agents/agent_1_validator.py index d7c1cca..433f5d7 100644 --- a/backend/app/pipeline/agents/agent_1_validator.py +++ b/backend/app/pipeline/agents/agent_1_validator.py @@ -85,6 +85,7 @@ class Agent1Validator(BaseAgent): "locale_considerations_file" ), date_pct_formats_file=self.file_manifest_raw.get("date_pct_formats_file"), + supplementary_files=self.file_manifest_raw.get("supplementary_files", []), ) # Load reference files (for validation) diff --git a/backend/app/pipeline/agents/agent_single.py b/backend/app/pipeline/agents/agent_single.py index 39dde3b..4f9abd3 100644 --- a/backend/app/pipeline/agents/agent_single.py +++ b/backend/app/pipeline/agents/agent_single.py @@ -57,15 +57,27 @@ def _resolve_all_tm_paths( """Resolve TM file paths for multiple channels. Tries the legacy hardcoded CHANNEL_FILE_MAP first (which has the - canonical lowercase patterns for the original channels). For any + canonical lowercase keys for the original channels). For any channel not in the map (e.g. newly registered like PrimeCBM), falls back to the generic `flat__.json` pattern. - Missing files at the resolved path are silently skipped at load time. + + If the canonical path doesn't exist on disk, scan the locale's TM + directory for any filename that matches case-insensitively and use + that path instead. Protects against drift between the registry's + stored channel casing and the on-disk filename casing — both code + paths historically have produced mixed-case outputs. """ lc = locale_code.lower() tm_dir = os.path.join(settings.STORAGE_ROOT, "amazon", "tm", locale_code) paths: list[str] = [] + # Build a one-time case-insensitive index of the locale's TM directory. + # Empty/missing dir is fine — the resolved paths just won't exist. + dir_index: dict[str, str] = {} + if os.path.isdir(tm_dir): + for entry in os.listdir(tm_dir): + dir_index[entry.lower()] = entry + for ch in tm_channels: pattern = CHANNEL_FILE_MAP.get(ch.lower()) if pattern: @@ -74,7 +86,14 @@ def _resolve_all_tm_paths( # Auto-discovered channel — use the generic pattern with the # channel name as-cased in the registry. filename = TM_FILENAME_PATTERN.format(channel=ch, lc=lc) - path = os.path.join(tm_dir, filename) + + # Case-insensitive lookup: if the canonical filename doesn't + # exist exactly, resolve against the dir index. + actual = dir_index.get(filename.lower()) + if actual is not None: + path = os.path.join(tm_dir, actual) + else: + path = os.path.join(tm_dir, filename) # leave as-is; will be skipped at load if path not in paths: paths.append(path) @@ -461,7 +480,16 @@ class AgentSingle(BaseAgent): channel = context.job_params.channel programme = context.job_params.programme campaign_name = context.job_params.campaign_name - tm_channels = context.job_params.tm_channels or [channel] + # Distinguish "user explicitly sent empty list" (load no TMs) from + # "tm_channels was None" (legacy job → fall back to campaign channel). + # Bare `or` treats [] as falsy and would re-default to [channel] — + # which silently reintroduces the campaign TM after the user asked + # for none. + tm_channels = ( + context.job_params.tm_channels + if context.job_params.tm_channels is not None + else [channel] + ) logger.info( "AgentSingle starting: locale=%s channel=%s tm_channels=%s lines=%d", diff --git a/backend/app/tasks/job_tasks.py b/backend/app/tasks/job_tasks.py index 283987f..6937efd 100644 --- a/backend/app/tasks/job_tasks.py +++ b/backend/app/tasks/job_tasks.py @@ -282,7 +282,14 @@ def process_locale_instance(self, job_id: str, locale_code: str) -> dict: "programme": job.programme.value, "campaign_name": job.campaign_name, "context_prompt": job.context_prompt, - "tm_channels": job.tm_channels or [job.channel], + # Preserve an explicit empty list ("no TMs") from + # the user; only fall back to the campaign channel + # when the field was never set (legacy jobs). + "tm_channels": ( + job.tm_channels + if job.tm_channels is not None + else [job.channel] + ), "llm_model": job.llm_model, } diff --git a/frontend/src/components/jobs/JobWizard/StepReview.tsx b/frontend/src/components/jobs/JobWizard/StepReview.tsx index 6ac9d37..b86c585 100644 --- a/frontend/src/components/jobs/JobWizard/StepReview.tsx +++ b/frontend/src/components/jobs/JobWizard/StepReview.tsx @@ -51,8 +51,11 @@ export function StepReview({ data, onBack }: StepReviewProps) { programme: data.programme.toLowerCase(), channel: data.channel.toLowerCase(), sub_channel: data.sub_channel ? data.sub_channel.toLowerCase() : null, - // Always send an array (even empty) so the backend respects "no TMs" vs "default to channel" - tm_channels: data.tm_channels.map(c => c.toLowerCase()), + // Always send an array (even empty) so the backend respects "no TMs" vs "default to channel". + // Do NOT lowercase the channel names — the pipeline resolves TM filenames using the + // exact case from the registry (e.g. flat_PrimeCBMT_nl-be.json). Lowercasing breaks + // case-sensitive filesystem lookups for any channel not in the legacy CHANNEL_FILE_MAP. + tm_channels: data.tm_channels, llm_model: data.llm_model || undefined, locale_codes: data.locales, context_prompt: data.context_override || undefined,