Round 2.7: three broken promises — empty TM, supplementary files, new-TM casing
Bug 1: Empty tm_channels was silently re-defaulted to [campaign channel]
in both agent_single.py and job_tasks.py via `or [channel]`. Python's
`or` treats [] as falsy, so the frontend's empty-list intent was lost.
Fixed by replacing `or` with an explicit `is not None` check at both
sites. Empty list now means "load no TMs"; None still falls back.
Bug 2: Supplementary files dropped by Agent1Validator. The validator
built FileManifest(...) with explicit kwargs but forgot
supplementary_files, so the raw field from _resolve_file_manifest
never reached agent_single.run(). Files were uploaded to disk but
never inlined into the LLM context. Fixed by adding
supplementary_files=raw.get("supplementary_files", []) to the
validator's FileManifest construction.
Bug 3: New TM channels lowercased in StepReview.tsx, breaking
case-sensitive file lookup. On Linux, "flat_primecbmt_nl-be.json"
≠ "flat_PrimeCBMT_nl-be.json", so the file was silently skipped and
zero TM entries loaded. Legacy channels worked only because the
hardcoded CHANNEL_FILE_MAP has lowercase keys mapping to
canonically-cased filenames; auto-discovered channels (PrimeCBM,
PrimeCBMT, etc.) had no such safety net. Two-part fix:
3a. StepReview.tsx no longer lowercases tm_channels — preserves case
end-to-end from registry → frontend → backend → disk.
3b. _resolve_all_tm_paths builds a case-insensitive index of the
locale's TM directory once per call and resolves filenames
against it. Forgives any historical case-drift between registry
and disk.
Verified end-to-end with a standalone test script run inside the
backend container: 8/8 assertions pass covering empty tm_channels,
supplementary file passthrough, exact-case lookups, lowercase
fallback, missing channels, legacy MASS in both cases, and empty
tm_channels yielding no TM paths.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
100eddbc21
commit
bb8ed2a004
4 changed files with 46 additions and 7 deletions
|
|
@ -85,6 +85,7 @@ class Agent1Validator(BaseAgent):
|
||||||
"locale_considerations_file"
|
"locale_considerations_file"
|
||||||
),
|
),
|
||||||
date_pct_formats_file=self.file_manifest_raw.get("date_pct_formats_file"),
|
date_pct_formats_file=self.file_manifest_raw.get("date_pct_formats_file"),
|
||||||
|
supplementary_files=self.file_manifest_raw.get("supplementary_files", []),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Load reference files (for validation)
|
# Load reference files (for validation)
|
||||||
|
|
|
||||||
|
|
@ -57,15 +57,27 @@ def _resolve_all_tm_paths(
|
||||||
"""Resolve TM file paths for multiple channels.
|
"""Resolve TM file paths for multiple channels.
|
||||||
|
|
||||||
Tries the legacy hardcoded CHANNEL_FILE_MAP first (which has the
|
Tries the legacy hardcoded CHANNEL_FILE_MAP first (which has the
|
||||||
canonical lowercase patterns for the original channels). For any
|
canonical lowercase keys for the original channels). For any
|
||||||
channel not in the map (e.g. newly registered like PrimeCBM), falls
|
channel not in the map (e.g. newly registered like PrimeCBM), falls
|
||||||
back to the generic `flat_<Channel>_<lc>.json` pattern.
|
back to the generic `flat_<Channel>_<lc>.json` pattern.
|
||||||
Missing files at the resolved path are silently skipped at load time.
|
|
||||||
|
If the canonical path doesn't exist on disk, scan the locale's TM
|
||||||
|
directory for any filename that matches case-insensitively and use
|
||||||
|
that path instead. Protects against drift between the registry's
|
||||||
|
stored channel casing and the on-disk filename casing — both code
|
||||||
|
paths historically have produced mixed-case outputs.
|
||||||
"""
|
"""
|
||||||
lc = locale_code.lower()
|
lc = locale_code.lower()
|
||||||
tm_dir = os.path.join(settings.STORAGE_ROOT, "amazon", "tm", locale_code)
|
tm_dir = os.path.join(settings.STORAGE_ROOT, "amazon", "tm", locale_code)
|
||||||
paths: list[str] = []
|
paths: list[str] = []
|
||||||
|
|
||||||
|
# Build a one-time case-insensitive index of the locale's TM directory.
|
||||||
|
# Empty/missing dir is fine — the resolved paths just won't exist.
|
||||||
|
dir_index: dict[str, str] = {}
|
||||||
|
if os.path.isdir(tm_dir):
|
||||||
|
for entry in os.listdir(tm_dir):
|
||||||
|
dir_index[entry.lower()] = entry
|
||||||
|
|
||||||
for ch in tm_channels:
|
for ch in tm_channels:
|
||||||
pattern = CHANNEL_FILE_MAP.get(ch.lower())
|
pattern = CHANNEL_FILE_MAP.get(ch.lower())
|
||||||
if pattern:
|
if pattern:
|
||||||
|
|
@ -74,7 +86,14 @@ def _resolve_all_tm_paths(
|
||||||
# Auto-discovered channel — use the generic pattern with the
|
# Auto-discovered channel — use the generic pattern with the
|
||||||
# channel name as-cased in the registry.
|
# channel name as-cased in the registry.
|
||||||
filename = TM_FILENAME_PATTERN.format(channel=ch, lc=lc)
|
filename = TM_FILENAME_PATTERN.format(channel=ch, lc=lc)
|
||||||
path = os.path.join(tm_dir, filename)
|
|
||||||
|
# Case-insensitive lookup: if the canonical filename doesn't
|
||||||
|
# exist exactly, resolve against the dir index.
|
||||||
|
actual = dir_index.get(filename.lower())
|
||||||
|
if actual is not None:
|
||||||
|
path = os.path.join(tm_dir, actual)
|
||||||
|
else:
|
||||||
|
path = os.path.join(tm_dir, filename) # leave as-is; will be skipped at load
|
||||||
if path not in paths:
|
if path not in paths:
|
||||||
paths.append(path)
|
paths.append(path)
|
||||||
|
|
||||||
|
|
@ -461,7 +480,16 @@ class AgentSingle(BaseAgent):
|
||||||
channel = context.job_params.channel
|
channel = context.job_params.channel
|
||||||
programme = context.job_params.programme
|
programme = context.job_params.programme
|
||||||
campaign_name = context.job_params.campaign_name
|
campaign_name = context.job_params.campaign_name
|
||||||
tm_channels = context.job_params.tm_channels or [channel]
|
# Distinguish "user explicitly sent empty list" (load no TMs) from
|
||||||
|
# "tm_channels was None" (legacy job → fall back to campaign channel).
|
||||||
|
# Bare `or` treats [] as falsy and would re-default to [channel] —
|
||||||
|
# which silently reintroduces the campaign TM after the user asked
|
||||||
|
# for none.
|
||||||
|
tm_channels = (
|
||||||
|
context.job_params.tm_channels
|
||||||
|
if context.job_params.tm_channels is not None
|
||||||
|
else [channel]
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"AgentSingle starting: locale=%s channel=%s tm_channels=%s lines=%d",
|
"AgentSingle starting: locale=%s channel=%s tm_channels=%s lines=%d",
|
||||||
|
|
|
||||||
|
|
@ -282,7 +282,14 @@ def process_locale_instance(self, job_id: str, locale_code: str) -> dict:
|
||||||
"programme": job.programme.value,
|
"programme": job.programme.value,
|
||||||
"campaign_name": job.campaign_name,
|
"campaign_name": job.campaign_name,
|
||||||
"context_prompt": job.context_prompt,
|
"context_prompt": job.context_prompt,
|
||||||
"tm_channels": job.tm_channels or [job.channel],
|
# Preserve an explicit empty list ("no TMs") from
|
||||||
|
# the user; only fall back to the campaign channel
|
||||||
|
# when the field was never set (legacy jobs).
|
||||||
|
"tm_channels": (
|
||||||
|
job.tm_channels
|
||||||
|
if job.tm_channels is not None
|
||||||
|
else [job.channel]
|
||||||
|
),
|
||||||
"llm_model": job.llm_model,
|
"llm_model": job.llm_model,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -51,8 +51,11 @@ export function StepReview({ data, onBack }: StepReviewProps) {
|
||||||
programme: data.programme.toLowerCase(),
|
programme: data.programme.toLowerCase(),
|
||||||
channel: data.channel.toLowerCase(),
|
channel: data.channel.toLowerCase(),
|
||||||
sub_channel: data.sub_channel ? data.sub_channel.toLowerCase() : null,
|
sub_channel: data.sub_channel ? data.sub_channel.toLowerCase() : null,
|
||||||
// Always send an array (even empty) so the backend respects "no TMs" vs "default to channel"
|
// Always send an array (even empty) so the backend respects "no TMs" vs "default to channel".
|
||||||
tm_channels: data.tm_channels.map(c => c.toLowerCase()),
|
// Do NOT lowercase the channel names — the pipeline resolves TM filenames using the
|
||||||
|
// exact case from the registry (e.g. flat_PrimeCBMT_nl-be.json). Lowercasing breaks
|
||||||
|
// case-sensitive filesystem lookups for any channel not in the legacy CHANNEL_FILE_MAP.
|
||||||
|
tm_channels: data.tm_channels,
|
||||||
llm_model: data.llm_model || undefined,
|
llm_model: data.llm_model || undefined,
|
||||||
locale_codes: data.locales,
|
locale_codes: data.locales,
|
||||||
context_prompt: data.context_override || undefined,
|
context_prompt: data.context_override || undefined,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue