Round 2 feedback: parser fix, dynamic max_tokens, polling, TM auto-discovery, reviewer comments in export

A1 Export columns shifted (critical):
- V25 LLM occasionally emits 12/13-col tables with Copy Type/Char Limit prefix
- Parser now anchors on "Option 1" header position; robust to any prefix shift
- Verified with 23/23 unit tests covering 11/12/13-col variants
- Source-line block in prompt no longer uses pipe separators (defence in depth)

A2 Linguistic summary fallback:
- Drop the metadata key/value table fallback on Tab 2
- Show "No linguistic summary was generated" when the agent didn't produce one

A3 Dashboard stuck on "Running":
- useJobs / useJob now poll every 5s while any job/locale is in an active state
- Stops polling once everything is COMPLETED or ERROR

B1 TM auto-config: respect empty selection
- Send no TM files when user unchecks all (was auto-adding campaign channel)
- Backend distinguishes empty list vs missing field

B2 Auto-discover channels from TM registry:
- New GET /api/v1/files/tm/channels endpoint reads distinct channels from registry
- Frontend StepConfigure fetches channels per client; falls back to static list
- Pipeline TM resolution falls back to flat_<Channel>_<lc>.json pattern for any
  registered channel (no hardcoded map needed for new channels like PrimeCBM)

B3 Job inputs visible on monitoring:
- New "Inputs sent to the agent" card on /jobs/[id] showing AI model, TM files,
  supplementary file list, and context override
- New GET /api/v1/jobs/{id}/supplementary endpoint listing on-disk supplementary files

C1 Context cap (large briefs truncating):
- max_tokens scales with source line count (8k/16k/32k/64k by tier)
- 172-line briefs now have ~64k output budget instead of fixed 16k

D1 Reviewer comments in xlsx export:
- Export endpoint now copies xlsx to temp path on download, queries Feedback
  joined with User, and appends "Reviewer (Name): comment" to the rationale
  cells of options that have feedback
- Original generated file remains untouched

D2 Hide Clients & Voice from sidebar (page still reachable by URL)
D3 Remove dead notifications + settings icons from header
D4 Cost by Locale table added to Analytics with total + avg cost per brief

Makefile seed target now also runs register_storage_files so TM registry is
populated from disk on first setup (deploy.sh already does this via --init).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
DJP 2026-05-04 16:12:47 -04:00
parent 710d9310a6
commit 9825b0497c
18 changed files with 547 additions and 157 deletions

View file

@ -15,6 +15,7 @@ migrate:
seed:
docker compose exec backend python -m seed.create_default_client
docker compose exec backend python -m seed.create_test_users
docker compose exec backend python -m seed.register_storage_files
test:
docker compose exec backend python -m pytest tests/ -v

View file

@ -68,6 +68,41 @@ async def list_tm_files(
return [TMFileResponse.model_validate(f) for f in files]
@router.get("/tm/channels")
async def list_tm_channels(
client_id: UUID = Query(...),
db: AsyncSession = Depends(get_db),
current_user: dict = Depends(get_current_user),
) -> dict:
"""Return distinct channel names found in the TM registry, with the
locales each channel is available for.
Used by the New Job wizard to populate the Channel and TM Files
selectors dynamically adding a new TM file (e.g. flat_PrimeCBM_de-de.json)
causes the channel to appear here without code changes.
"""
from sqlalchemy import select
from app.models.files import TMFileRegistry
result = await db.execute(
select(TMFileRegistry.channel, TMFileRegistry.locale_code)
.where(TMFileRegistry.client_id == client_id)
)
rows = result.all()
by_channel: dict[str, set[str]] = {}
for ch, lc in rows:
if not ch:
continue
by_channel.setdefault(ch, set()).add(lc)
channels = sorted(
({"name": ch, "locales": sorted(list(locs))} for ch, locs in by_channel.items()),
key=lambda c: c["name"].lower(),
)
return {"channels": channels}
@router.get("/tm/{file_id}/download")
async def download_tm_file(
file_id: UUID,

View file

@ -262,6 +262,32 @@ async def upload_supplementary(
return {"message": "File uploaded", "file_path": file_path}
@router.get("/{job_id}/supplementary")
async def list_supplementary(
job_id: UUID,
db: AsyncSession = Depends(get_db),
current_user: dict = Depends(get_current_user),
) -> dict:
"""List supplementary file names uploaded for a job."""
import os
from app.config import settings
job = await job_service.get_job(db, job_id)
if job is None:
raise HTTPException(status_code=404, detail="Job not found")
supp_dir = os.path.join(
settings.STORAGE_ROOT, "jobs", str(job_id), "supplementary"
)
files: list[str] = []
if os.path.isdir(supp_dir):
files = sorted(
f for f in os.listdir(supp_dir)
if os.path.isfile(os.path.join(supp_dir, f))
)
return {"files": files}
@router.delete("/{job_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_job(
job_id: UUID,

View file

@ -29,6 +29,13 @@ from app.pipeline.agents.agent_2_tm_retrieval import (
CHANNEL_FILE_MAP,
_load_all_tm_entries,
)
# Pattern used to resolve TM file paths for ANY channel — supports
# auto-discovered channels from the registry (e.g. PrimeCBM) without
# requiring a hardcoded map entry. Files are named
# `flat_<Channel>_<lc>.json` on disk; if the file doesn't exist for a
# given (channel, locale) pair the pipeline silently skips it.
TM_FILENAME_PATTERN = "flat_{channel}_{lc}.json"
from app.pipeline.modules.ref_file_loader import load_all_reference_files
logger = logging.getLogger(__name__)
@ -47,17 +54,26 @@ def _resolve_all_tm_paths(
locale_code: str,
tm_channels: list[str],
) -> list[str]:
"""Resolve TM file paths for multiple channels."""
"""Resolve TM file paths for multiple channels.
Tries the legacy hardcoded CHANNEL_FILE_MAP first (which has the
canonical lowercase patterns for the original channels). For any
channel not in the map (e.g. newly registered like PrimeCBM), falls
back to the generic `flat_<Channel>_<lc>.json` pattern.
Missing files at the resolved path are silently skipped at load time.
"""
lc = locale_code.lower()
tm_dir = os.path.join(settings.STORAGE_ROOT, "amazon", "tm", locale_code)
paths: list[str] = []
for ch in tm_channels:
pattern = CHANNEL_FILE_MAP.get(ch.lower())
if not pattern:
logger.warning("Unknown TM channel %r; skipping", ch)
continue
filename = pattern.format(lc=lc)
if pattern:
filename = pattern.format(lc=lc)
else:
# Auto-discovered channel — use the generic pattern with the
# channel name as-cased in the registry.
filename = TM_FILENAME_PATTERN.format(channel=ch, lc=lc)
path = os.path.join(tm_dir, filename)
if path not in paths:
paths.append(path)
@ -66,7 +82,12 @@ def _resolve_all_tm_paths(
def _format_source_lines_for_prompt(source_lines: list[dict[str, Any]]) -> str:
"""Format source lines into a structured block for the user message."""
"""Format source lines into a structured block for the user message.
Avoid pipe separators in the per-line metadata: pipes can cue the LLM
into mirroring those fields as extra columns in the output markdown
table, which historically caused column-shift bugs in the export.
"""
lines = ["## SOURCE LINES", ""]
for i, sl in enumerate(source_lines, 1):
en = sl.get("en_gb", "")
@ -74,14 +95,13 @@ def _format_source_lines_for_prompt(source_lines: list[dict[str, Any]]) -> str:
char_limit = sl.get("char_limit", "")
guidance = sl.get("creative_guidance", "")
line = f"[{i}] EN_GB: {en}"
lines.append(f"[{i}] EN_GB: {en}")
if copy_type:
line += f" | Copy Type: {copy_type}"
lines.append(f" Copy Type: {copy_type}")
if char_limit:
line += f" | Char Limit: {char_limit}"
lines.append(f" Char Limit: {char_limit}")
if guidance:
line += f" | Guidance: {guidance}"
lines.append(line)
lines.append(f" Guidance: {guidance}")
return "\n".join(lines)
@ -141,15 +161,42 @@ def _format_ref_data_for_prompt(ref_data: dict[str, Any]) -> str:
# Markdown table parser
# ---------------------------------------------------------------------------
def _parse_markdown_table(response_text: str) -> tuple[list[list[str]], str]:
"""Parse the V25 markdown table output into structured rows.
def _split_row(line: str) -> list[str]:
"""Split a markdown table row, preserving empty cells."""
stripped = line.strip()
if stripped.startswith("|"):
stripped = stripped[1:]
if stripped.endswith("|"):
stripped = stripped[:-1]
return [c.strip() for c in stripped.split("|")]
def _find_option1_index(headers: list[str]) -> int | None:
"""Locate the column index of the 'Option 1' header.
The V25 LLM occasionally emits extra metadata columns (e.g. Copy Type,
Char Limit) before the option triplets, shifting later columns. By
anchoring on the Option 1 position we extract the 9 option/BT/rationale
cells reliably regardless of how many prefix columns the model adds.
"""
for i, h in enumerate(headers):
normalised = h.lower().strip()
if normalised in ("option 1", "option1") or normalised.startswith("option 1 "):
return i
return None
def _parse_markdown_table(
response_text: str,
) -> tuple[list[str], list[list[str]], str]:
"""Parse the V25 markdown table output.
Returns:
Tuple of (parsed_rows, linguistic_summary).
Each row is a list of cell strings in column order.
Tuple of (headers, parsed_rows, linguistic_summary).
headers is the list of column header strings (in order).
Each row in parsed_rows is a list of cell strings.
We use lists (not dicts) because the V25 table has duplicate
column names (Backtranslation x3, Rationale x3).
linguistic_summary is any text after the table.
"""
lines = response_text.split("\n")
table_lines: list[str] = []
@ -163,9 +210,7 @@ def _parse_markdown_table(response_text: str) -> tuple[list[list[str]], str]:
table_started = True
table_lines.append(stripped)
elif table_started and not stripped.startswith("|"):
# Check if this is a separator line or empty line within the table
if stripped == "" and not table_ended:
# Could be a gap — check if next lines have table content
continue
table_ended = True
post_table_lines.append(line)
@ -174,65 +219,75 @@ def _parse_markdown_table(response_text: str) -> tuple[list[list[str]], str]:
if not table_lines:
logger.warning("No markdown table found in response")
return [], response_text.strip()
return [], [], response_text.strip()
def _split_row(line: str) -> list[str]:
"""Split a markdown table row, preserving empty cells."""
# Strip leading/trailing pipe and split
stripped = line.strip()
if stripped.startswith("|"):
stripped = stripped[1:]
if stripped.endswith("|"):
stripped = stripped[:-1]
return [c.strip() for c in stripped.split("|")]
headers = _split_row(table_lines[0])
# Skip header row and separator line, collect data rows
data_lines: list[list[str]] = []
for tl in table_lines[1:]: # skip header
for tl in table_lines[1:]:
cells = _split_row(tl)
# Skip separator rows (---|---|---)
if cells and all(re.match(r"^[-:]+$", c) for c in cells if c):
continue
if any(c for c in cells): # at least one non-empty cell
if any(c for c in cells):
data_lines.append(cells)
linguistic_summary = "\n".join(post_table_lines).strip()
return data_lines, linguistic_summary
return headers, data_lines, linguistic_summary
def _rows_to_draft_outputs(
headers: list[str],
rows: list[list[str]],
source_lines: list[Any],
) -> tuple[list[DraftOutput], list[RankingDeclaration]]:
"""Convert parsed table rows into DraftOutput and RankingDeclaration objects.
The V25 table has columns (by position):
The V25 spec table has columns (by position):
0: Locale | 1: Source | 2: Option 1 | 3: Backtranslation | 4: Rationale |
5: Option 2 | 6: Backtranslation | 7: Rationale |
8: Option 3 | 9: Backtranslation | 10: Rationale
Rows are passed as lists of cell strings (not dicts) because the V25
table has duplicate column names that would collide in a dict.
The LLM occasionally emits extra metadata columns (e.g. Copy Type, Char
Limit) between Source and Option 1, which shifts later columns. To stay
correct, we locate the "Option 1" header position and read the 9
option/BT/rationale cells from that anchor robust to any prefix shift.
"""
drafts: list[DraftOutput] = []
rankings: list[RankingDeclaration] = []
opt1_idx = _find_option1_index(headers) if headers else None
if opt1_idx is None:
# Header missing or unrecognised — fall back to fixed position 2
opt1_idx = 2
logger.warning(
"Could not locate 'Option 1' column in header (%s); "
"falling back to fixed position %d",
headers, opt1_idx,
)
else:
logger.info("Anchoring on 'Option 1' at column %d (headers: %s)",
opt1_idx, headers)
def _cell(cells: list[str], idx: int) -> str:
return cells[idx] if 0 <= idx < len(cells) else ""
for i, cells in enumerate(rows):
# Expected 11 columns; minimum useful: 5 (locale, source, opt1, bt1, rat1)
if len(cells) < 5:
logger.warning("Row %d has too few columns (%d), skipping", i, len(cells))
if len(cells) < opt1_idx + 3:
logger.warning(
"Row %d has too few columns (%d) to extract option 1 at index %d, skipping",
i, len(cells), opt1_idx,
)
continue
# Extract values by position
opt1_text = cells[2] if len(cells) > 2 else ""
bt1 = cells[3] if len(cells) > 3 else ""
rat1 = cells[4] if len(cells) > 4 else ""
opt2_text = cells[5] if len(cells) > 5 else ""
bt2 = cells[6] if len(cells) > 6 else ""
rat2 = cells[7] if len(cells) > 7 else ""
opt3_text = cells[8] if len(cells) > 8 else ""
bt3 = cells[9] if len(cells) > 9 else ""
rat3 = cells[10] if len(cells) > 10 else ""
opt1_text = _cell(cells, opt1_idx + 0)
bt1 = _cell(cells, opt1_idx + 1)
rat1 = _cell(cells, opt1_idx + 2)
opt2_text = _cell(cells, opt1_idx + 3)
bt2 = _cell(cells, opt1_idx + 4)
rat2 = _cell(cells, opt1_idx + 5)
opt3_text = _cell(cells, opt1_idx + 6)
bt3 = _cell(cells, opt1_idx + 7)
rat3 = _cell(cells, opt1_idx + 8)
# Clean up <br> tags to newlines
def _clean(text: str) -> str:
@ -309,7 +364,7 @@ class AgentSingle(BaseAgent):
return ""
def parse_response(self, response: str, context: PipelineContext) -> Any:
"""Parse markdown table response."""
"""Parse markdown table response. Returns (headers, rows, summary)."""
return _parse_markdown_table(response)
async def run(self, context: PipelineContext) -> PipelineContext:
@ -384,10 +439,30 @@ class AgentSingle(BaseAgent):
len(system_prompt), len(user_message),
)
# Scale max_tokens with source line count. Empirically each output
# row consumes ~250 output tokens (option text + BT + rationale x3
# + table padding). 64k is the Sonnet 4.6 cap; 32k covers ~120 rows
# with comfortable headroom; 16k was the Round 1 default and was
# truncating large briefs (172 lines → 65 rows).
line_count = len(context.source_lines)
if line_count <= 30:
max_tokens = 8192
elif line_count <= 80:
max_tokens = 16384
elif line_count <= 150:
max_tokens = 32768
else:
max_tokens = 64000
logger.info(
"max_tokens=%d for %d source lines",
max_tokens, line_count,
)
response_text, usage = await llm.acreate_message(
system_prompt=system_prompt,
user_message=user_message,
max_tokens=16384,
max_tokens=max_tokens,
temperature=0.7,
)
@ -404,17 +479,16 @@ class AgentSingle(BaseAgent):
context.total_estimated_cost += usage.get("estimated_cost_usd", 0.0)
# ── Parse response ───────────────────────────────────────────
rows, linguistic_summary = _parse_markdown_table(response_text)
headers, rows, linguistic_summary = _parse_markdown_table(response_text)
logger.info("Parsed %d output rows from markdown table", len(rows))
if not rows:
logger.error("No rows parsed from response. Raw response:\n%s", response_text[:2000])
elif rows:
# Log first row column count for debugging
logger.info("First row has %d columns: %s", len(rows[0]), [c[:30] for c in rows[0]])
# Convert to structured outputs
drafts, rankings = _rows_to_draft_outputs(rows, context.source_lines)
# Convert to structured outputs (anchors on Option 1 header position)
drafts, rankings = _rows_to_draft_outputs(headers, rows, context.source_lines)
context.draft_outputs = drafts
context.ranking_declarations = rankings

View file

@ -133,56 +133,34 @@ def generate_output_xlsx(
# ---- Tab 2: Linguistic Summary ----
ws2 = wb.create_sheet("Linguistic Summary")
summary_data = summary or {}
ws2.column_dimensions["A"].width = 100
if linguistic_summary:
# Linguistic summary from single agent
ws2.column_dimensions["A"].width = 100
title_cell = ws2.cell(row=1, column=1, value="Linguistic Summary")
title_cell.font = Font(bold=True, size=14)
title_cell = ws2.cell(row=1, column=1, value="Linguistic Summary")
title_cell.font = Font(bold=True, size=14)
summary_cell = ws2.cell(row=3, column=1, value=linguistic_summary)
summary_cell.alignment = Alignment(wrap_text=True, vertical="top")
# Set row height based on content length
line_count = linguistic_summary.count("\n") + 1
ws2.row_dimensions[3].height = max(15, min(600, line_count * 15))
# Metadata footer
footer_row = 5 + line_count
footer_text = (
f"Job: {summary_data.get('job_id', '')} | "
f"Locale: {summary_data.get('locale_code', '')} | "
f"Generated: {summary_data.get('generated_at', '')}"
body = linguistic_summary.strip() if linguistic_summary else ""
if not body:
body = (
"No linguistic summary was generated for this job. "
"This can happen if the agent's output was truncated or "
"if no commentary was produced after the translation table."
)
footer_cell = ws2.cell(row=footer_row, column=1, value=footer_text)
footer_cell.font = Font(color="888888", size=9)
else:
# Fallback: metadata key/value summary
ws2.column_dimensions["A"].width = 25
ws2.column_dimensions["B"].width = 40
metadata_rows = [
("Job ID", summary_data.get("job_id", "")),
("Campaign", summary_data.get("campaign_name", "")),
("Locale", summary_data.get("locale_code", "")),
("Channel", summary_data.get("channel", "")),
("Programme", summary_data.get("programme", "")),
("Total Source Lines", summary_data.get("total_source_lines", 0)),
("Total Output Rows", summary_data.get("total_output_rows", 0)),
("High Confidence", summary_data.get("high_confidence", 0)),
("Moderate Confidence", summary_data.get("moderate_confidence", 0)),
("Low Confidence", summary_data.get("low_confidence", 0)),
("Total Tokens Used", summary_data.get("total_tokens", 0)),
("Estimated Cost (USD)", summary_data.get("estimated_cost", 0.0)),
("Agent Version", summary_data.get("agent_version", "")),
("Generated At", summary_data.get("generated_at", "")),
]
summary_cell = ws2.cell(row=3, column=1, value=body)
summary_cell.alignment = Alignment(wrap_text=True, vertical="top")
for row_idx, (label, value) in enumerate(metadata_rows, start=1):
label_cell = ws2.cell(row=row_idx, column=1, value=label)
label_cell.font = Font(bold=True)
ws2.cell(row=row_idx, column=2, value=str(value))
line_count = body.count("\n") + 1
ws2.row_dimensions[3].height = max(15, min(600, line_count * 15))
# Metadata footer (job id / locale / generated at)
footer_row = 5 + line_count
footer_text = (
f"Job: {summary_data.get('job_id', '')} | "
f"Locale: {summary_data.get('locale_code', '')} | "
f"Generated: {summary_data.get('generated_at', '')}"
)
footer_cell = ws2.cell(row=footer_row, column=1, value=footer_text)
footer_cell.font = Font(color="888888", size=9)
# Save
Path(output_path).parent.mkdir(parents=True, exist_ok=True)

View file

@ -31,8 +31,13 @@ class JobService:
else:
job_type = JobType.main
# Default tm_channels to [channel] if not provided
tm_channels = data.tm_channels if data.tm_channels else [data.channel]
# Default tm_channels to [channel] only if user didn't send the field at all.
# An explicit empty list means "do not load any TMs" — respected as-is.
tm_channels = (
data.tm_channels
if data.tm_channels is not None
else [data.channel]
)
job = Job(
client_id=data.client_id,

View file

@ -1,18 +1,26 @@
import logging
import shutil
import tempfile
from pathlib import Path
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.models.feedback import Feedback
from app.models.job import LocaleInstance
from app.models.output import OutputRow
from app.models.source import SourceLine
from app.models.user import User
from app.schemas.output import (
OutputPreviewResponse,
OutputRowResponse,
SourceLinePreview,
)
logger = logging.getLogger(__name__)
class OutputService:
"""Service for assembling output preview data and triggering exports."""
@ -86,7 +94,13 @@ class OutputService:
job_id: UUID,
locale_code: str,
) -> str | None:
"""Trigger export generation for a locale and return the file path."""
"""Return path to the export xlsx, with any reviewer feedback applied.
If reviewers have left comments on output rows, this method copies
the original xlsx to a temp path, appends "Reviewer (Name): comment"
to the affected rationale cells, and returns the temp path. If no
feedback exists, returns the original path unchanged.
"""
result = await db.execute(
select(LocaleInstance)
.where(
@ -95,11 +109,81 @@ class OutputService:
)
)
instance = result.scalar_one_or_none()
if instance is None:
if instance is None or not instance.output_file_path:
return None
if instance.output_file_path:
return instance.output_file_path
original_path = instance.output_file_path
if not Path(original_path).exists():
return None
# Export would be triggered here; for now return None indicating no export yet
return None
# Pull all feedback for this instance's output rows
feedback_query = (
select(Feedback, OutputRow.row_order, User.name)
.join(OutputRow, Feedback.output_id == OutputRow.id)
.join(User, Feedback.user_id == User.id)
.where(OutputRow.instance_id == instance.id)
.order_by(OutputRow.row_order, Feedback.created_at)
)
feedback_rows = (await db.execute(feedback_query)).all()
if not feedback_rows:
return original_path
# Group by (row_order, option_column) for placement in the xlsx
from collections import defaultdict
grouped: dict[tuple[int, int], list[str]] = defaultdict(list)
for fb, row_order, user_name in feedback_rows:
if not fb.comment:
continue
label = f"Reviewer ({user_name}): {fb.comment}"
grouped[(row_order, fb.option_column)].append(label)
if not grouped:
return original_path
return self._render_export_with_feedback(original_path, grouped)
def _render_export_with_feedback(
self,
original_path: str,
feedback_by_row: dict[tuple[int, int], list[str]],
) -> str:
"""Copy the xlsx to a temp path and append reviewer comments to
the rationale cells. Returns the temp path."""
from openpyxl import load_workbook
from openpyxl.styles import Alignment
# Tab 1 column indices for rationale of each option (matches excel_writer.OUTPUT_COLUMNS)
# 1: EN_GB, 2: Copy Type, 3: Option 1, 4: BT 1, 5: Rationale 1,
# 6: Option 2, 7: BT 2, 8: Rationale 2,
# 9: Option 3, 10: BT 3, 11: Rationale 3
RATIONALE_COL_BY_OPTION = {1: 5, 2: 8, 3: 11}
tmp = tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False)
tmp.close()
shutil.copy2(original_path, tmp.name)
try:
wb = load_workbook(tmp.name)
ws = wb["Transcreation Output"]
except Exception as exc:
logger.warning(
"Could not open xlsx for feedback injection (%s); "
"returning original", exc,
)
return original_path
for (row_order, option_col), comments in feedback_by_row.items():
col_idx = RATIONALE_COL_BY_OPTION.get(option_col)
if col_idx is None:
continue
xlsx_row = row_order + 1 # +1 for header
cell = ws.cell(row=xlsx_row, column=col_idx)
existing = cell.value or ""
suffix = "\n\n" + "\n".join(comments)
cell.value = (existing + suffix) if existing else suffix.lstrip()
cell.alignment = Alignment(vertical="top", wrap_text=True)
wb.save(tmp.name)
wb.close()
return tmp.name

View file

@ -171,16 +171,19 @@ class ReportService:
query = query.where(Job.created_at <= date_to)
result = await db.execute(query)
return [
{
out: list[dict[str, Any]] = []
for row in result.all():
count = row.count or 0
total_cost = float(row.total_cost or 0.0)
out.append({
"locale": row.locale_code,
"count": row.count,
"count": count,
"total_tokens": row.total_tokens or 0,
"total_cost": float(row.total_cost or 0.0),
"total_cost": total_cost,
"avg_cost_per_brief": round(total_cost / count, 4) if count else 0.0,
"avg_duration_minutes": round(float(row.avg_duration_seconds or 0) / 60, 1),
}
for row in result.all()
]
})
return out
async def get_jobs_over_time(
self,

View file

@ -50,12 +50,19 @@ def _resolve_file_manifest(
def _check(path: str) -> str | None:
return path if os.path.exists(path) else None
# Resolve TM file
# Resolve TM file. Try the legacy registry first; if the channel isn't
# listed, fall back to the generic pattern so newly-registered channels
# (e.g. PrimeCBM) work without code changes.
tm_files: list[str] = []
channel_lower = channel.lower() if channel else ""
tm_pattern = TM_CHANNEL_REGISTRY.get(channel_lower)
if tm_pattern:
tm_filename = tm_pattern.replace("{lc}", lc_lower)
elif channel:
tm_filename = f"flat_{channel}_{lc_lower}.json"
else:
tm_filename = ""
if tm_filename:
tm_path = f"{storage}/amazon/tm/{locale_code}/{tm_filename}"
if os.path.exists(tm_path):
tm_files.append(tm_path)

View file

@ -319,6 +319,52 @@ export default function ReportsPage() {
</CardContent>
</Card>
</div>
{/* Cost by Locale */}
<Card>
<CardHeader>
<CardTitle className="text-base">Cost by Locale</CardTitle>
</CardHeader>
<CardContent>
{localeStats.length > 0 ? (
<div className="overflow-x-auto">
<table className="w-full text-sm">
<thead>
<tr className="border-b border-gray-200 text-left text-xs uppercase tracking-wide text-gray-500">
<th className="py-2 pr-4">Locale</th>
<th className="py-2 pr-4 text-right">Briefs</th>
<th className="py-2 pr-4 text-right">Total Cost (USD)</th>
<th className="py-2 pr-4 text-right">Avg Cost / Brief</th>
<th className="py-2 pr-4 text-right">Total Tokens</th>
</tr>
</thead>
<tbody>
{localeStats.map((l) => (
<tr key={l.locale} className="border-b border-gray-100">
<td className="py-2 pr-4 font-mono">{l.locale}</td>
<td className="py-2 pr-4 text-right">{l.count}</td>
<td className="py-2 pr-4 text-right">${l.total_cost.toFixed(2)}</td>
<td className="py-2 pr-4 text-right">${(l.avg_cost_per_brief ?? 0).toFixed(4)}</td>
<td className="py-2 pr-4 text-right">{l.total_tokens.toLocaleString()}</td>
</tr>
))}
<tr className="font-semibold">
<td className="py-2 pr-4">Total</td>
<td className="py-2 pr-4 text-right">{localeStats.reduce((s, l) => s + l.count, 0)}</td>
<td className="py-2 pr-4 text-right">${localeStats.reduce((s, l) => s + l.total_cost, 0).toFixed(2)}</td>
<td className="py-2 pr-4 text-right"></td>
<td className="py-2 pr-4 text-right">{localeStats.reduce((s, l) => s + l.total_tokens, 0).toLocaleString()}</td>
</tr>
</tbody>
</table>
</div>
) : (
<p className="text-sm text-gray-400 text-center py-12">
No locale data for this period.
</p>
)}
</CardContent>
</Card>
</>
)}
</div>

View file

@ -8,7 +8,7 @@ import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import { LocaleInstanceCard } from "@/components/jobs/MonitoringHUD/LocaleInstanceCard";
import { useJob } from "@/hooks/useJobs";
import { cancelJob, deleteJob } from "@/lib/api";
import { cancelJob, deleteJob, listSupplementary } from "@/lib/api";
import { useAuth } from "@/hooks/useAuth";
import {
Clock,
@ -41,6 +41,14 @@ export default function JobMonitoringPage() {
const { job, loading, error, refetch } = useJob(jobId);
const { user } = useAuth();
const [confirmDelete, setConfirmDelete] = useState(false);
const [suppFiles, setSuppFiles] = useState<string[]>([]);
useEffect(() => {
if (!jobId) return;
listSupplementary(jobId)
.then((res) => setSuppFiles(res.files))
.catch(() => setSuppFiles([]));
}, [jobId]);
const isAdmin = user?.role === "ADMIN";
@ -206,6 +214,64 @@ export default function JobMonitoringPage() {
</CardContent>
</Card>
{/* Job Inputs — what was sent to the agent */}
<Card>
<CardHeader className="pb-3">
<CardTitle className="text-base">Inputs sent to the agent</CardTitle>
</CardHeader>
<CardContent className="space-y-4">
<div>
<p className="text-xs font-semibold uppercase tracking-wide text-gray-500 mb-1">AI Model</p>
<p className="text-sm">
{job.llm_model === "claude-opus-4-6"
? "Claude Opus 4.6"
: job.llm_model === "claude-sonnet-4-6"
? "Claude Sonnet 4.6"
: job.llm_model || "Default"}
</p>
</div>
<div>
<p className="text-xs font-semibold uppercase tracking-wide text-gray-500 mb-1">TM Files Loaded</p>
{job.tm_channels && job.tm_channels.length > 0 ? (
<div className="flex flex-wrap gap-1">
{job.tm_channels.map((ch) => (
<Badge key={ch} variant="gray" className="text-xs">
{ch}
</Badge>
))}
</div>
) : (
<p className="text-sm text-gray-400">None pipeline ran without TM files</p>
)}
</div>
<div>
<p className="text-xs font-semibold uppercase tracking-wide text-gray-500 mb-1">Supplementary Files</p>
{suppFiles.length > 0 ? (
<ul className="text-sm space-y-0.5">
{suppFiles.map((f) => (
<li key={f} className="flex items-center gap-2 text-gray-700">
<FileText className="h-3.5 w-3.5 text-gray-400" />
<span className="font-mono text-xs">{f}</span>
</li>
))}
</ul>
) : (
<p className="text-sm text-gray-400">None</p>
)}
</div>
<div>
<p className="text-xs font-semibold uppercase tracking-wide text-gray-500 mb-1">Context Override</p>
{job.context_override ? (
<pre className="text-sm whitespace-pre-wrap bg-amber-50 border border-amber-200 rounded p-2 text-gray-700">
{job.context_override}
</pre>
) : (
<p className="text-sm text-gray-400">None</p>
)}
</div>
</CardContent>
</Card>
{/* Locale instances */}
<div>
<h3 className="text-lg font-bold text-amazon-text mb-4">

View file

@ -16,16 +16,16 @@ import {
} from "@/components/ui/select";
import type { JobFormData } from "@/app/jobs/new/page";
import type { Client } from "@/lib/types";
import { getClients } from "@/lib/api";
import { getClients, getTMChannels } from "@/lib/api";
const ALL_LOCALES = [
"de-DE", "fr-FR", "it-IT", "es-ES", "nl-NL",
"sv-SE", "pl-PL", "pt-PT", "de-AT", "fr-BE", "nl-BE", "ca-ES",
];
const CHANNELS = ["MASS", "VALUE", "ONSITE", "OUTBOUND", "BDA", "UEFA"];
const TM_CHANNELS = [
// Fallback list used until the registry-discovered list arrives. Keeps
// the wizard usable on first paint and in case the API call fails.
const DEFAULT_CHANNELS = [
"MASS", "VALUE", "ONSITE", "OUTBOUND", "UEFA", "BDA",
"DoubleDonut", "EUSelection", "PrimeDualBenefit",
"PrimeGourmetGuard", "PrimeMidfunnel", "PrimeSpeed", "TheKiss",
@ -55,6 +55,7 @@ interface StepConfigureProps {
export function StepConfigure({ data, onChange, onNext }: StepConfigureProps) {
const [clients, setClients] = useState<Client[]>([]);
const [discoveredChannels, setDiscoveredChannels] = useState<string[]>([]);
useEffect(() => {
getClients()
@ -75,6 +76,23 @@ export function StepConfigure({ data, onChange, onNext }: StepConfigureProps) {
.catch(() => {});
}, []); // eslint-disable-line react-hooks/exhaustive-deps
// Fetch the registry-discovered channel list whenever the client changes.
// New TM uploads (e.g. flat_PrimeCBM_de-de.json) appear here automatically.
useEffect(() => {
if (!data.client_id) return;
getTMChannels(data.client_id)
.then((channels) => {
const names = channels.map((c) => c.name);
setDiscoveredChannels(names);
})
.catch(() => setDiscoveredChannels([]));
}, [data.client_id]);
// Use registry channels when available; fall back to the static default
// list (covers the very-first-load and offline cases).
const channelOptions =
discoveredChannels.length > 0 ? discoveredChannels : DEFAULT_CHANNELS;
const toggleLocale = (locale: string) => {
if (data.locales.includes(locale)) {
onChange({ locales: data.locales.filter((l) => l !== locale) });
@ -93,7 +111,7 @@ export function StepConfigure({ data, onChange, onNext }: StepConfigureProps) {
onChange({ tm_channels: [...data.tm_channels, ch] });
}
};
const selectAllTM = () => onChange({ tm_channels: [...TM_CHANNELS] });
const selectAllTM = () => onChange({ tm_channels: [...channelOptions] });
const clearAllTM = () => onChange({ tm_channels: [] });
const isValid =
@ -182,18 +200,13 @@ export function StepConfigure({ data, onChange, onNext }: StepConfigureProps) {
<Label>Channel</Label>
<Select
value={data.channel}
onValueChange={(val) => {
const newTM = data.tm_channels.includes(val)
? data.tm_channels
: [...data.tm_channels, val];
onChange({ channel: val, tm_channels: newTM });
}}
onValueChange={(val) => onChange({ channel: val })}
>
<SelectTrigger>
<SelectValue placeholder="Select channel" />
</SelectTrigger>
<SelectContent>
{CHANNELS.map((ch) => (
{channelOptions.map((ch) => (
<SelectItem key={ch} value={ch}>
{ch}
</SelectItem>
@ -275,7 +288,7 @@ export function StepConfigure({ data, onChange, onNext }: StepConfigureProps) {
</div>
</div>
<div className="grid grid-cols-2 sm:grid-cols-4 gap-2">
{TM_CHANNELS.map((ch) => (
{channelOptions.map((ch) => (
<label
key={ch}
className="flex items-center gap-2 p-2 rounded-lg border border-gray-200 hover:border-amazon-teal/40 cursor-pointer transition-colors has-[:checked]:border-amazon-teal has-[:checked]:bg-amazon-teal/5"
@ -289,7 +302,7 @@ export function StepConfigure({ data, onChange, onNext }: StepConfigureProps) {
))}
</div>
<p className="text-xs text-gray-400">
Select which Translation Memory files to load. The campaign channel is auto-included.
Select which Translation Memory files to load. Leave empty to send no TMs (saves tokens) useful when supplying a cut-down TM via supplementary files.
</p>
</div>

View file

@ -51,7 +51,8 @@ export function StepReview({ data, onBack }: StepReviewProps) {
programme: data.programme.toLowerCase(),
channel: data.channel.toLowerCase(),
sub_channel: data.sub_channel ? data.sub_channel.toLowerCase() : null,
tm_channels: data.tm_channels.length > 0 ? data.tm_channels.map(c => c.toLowerCase()) : undefined,
// Always send an array (even empty) so the backend respects "no TMs" vs "default to channel"
tm_channels: data.tm_channels.map(c => c.toLowerCase()),
llm_model: data.llm_model || undefined,
locale_codes: data.locales,
context_prompt: data.context_override || undefined,
@ -171,7 +172,7 @@ export function StepReview({ data, onBack }: StepReviewProps) {
{ch}
</Badge>
)) : (
<span className="text-sm text-gray-400">Auto (campaign channel)</span>
<span className="text-sm text-gray-400">None pipeline will run without TM files</span>
)}
</div>
</div>

View file

@ -3,8 +3,7 @@
import React, { useEffect, useState } from "react";
import { usePathname, useRouter } from "next/navigation";
import { Sidebar } from "./Sidebar";
import { ChevronRight, Bell, Settings } from "lucide-react";
import { Button } from "@/components/ui/button";
import { ChevronRight } from "lucide-react";
import { isAuthenticated } from "@/lib/auth";
function getBreadcrumbs(pathname: string): { label: string; href: string }[] {
@ -121,16 +120,6 @@ export function AppShell({ children }: AppShellProps) {
</h1>
</div>
{/* Header actions */}
<div className="flex items-center gap-2">
<Button variant="ghost" size="icon" className="relative">
<Bell className="h-5 w-5 text-gray-500" />
<span className="absolute top-1.5 right-1.5 h-2 w-2 rounded-full bg-amazon-error" />
</Button>
<Button variant="ghost" size="icon">
<Settings className="h-5 w-5 text-gray-500" />
</Button>
</div>
</div>
</header>

View file

@ -10,7 +10,6 @@ import {
Activity,
Database,
BookOpen,
Building2,
BarChart3,
ScrollText,
Users,
@ -36,7 +35,10 @@ const navItems: NavItem[] = [
{ label: "Monitoring", href: "/dashboard?view=monitoring", icon: Activity },
{ label: "TM Registry", href: "/admin/files/tm", icon: Database },
{ label: "Reference Library", href: "/admin/files/reference", icon: BookOpen },
{ label: "Clients & Voice", href: "/admin/clients", icon: Building2 },
// Clients & Voice hidden from nav (page still reachable by direct URL).
// For Amazon-only deployment voice info lives in the Reference Library;
// re-enable when the platform is adapted for non-Amazon clients.
// { label: "Clients & Voice", href: "/admin/clients", icon: Building2 },
{ label: "Analytics", href: "/admin/reports", icon: BarChart3 },
{ label: "Audit Trail", href: "/admin/logs", icon: ScrollText },
{ label: "User Management", href: "/admin/users", icon: Users, adminOnly: true },

View file

@ -1,17 +1,23 @@
"use client";
import { useState, useEffect, useCallback } from "react";
import { useState, useEffect, useCallback, useRef } from "react";
import type { Job, JobFilters, PaginatedResponse } from "@/lib/types";
import { getJobs, getJob } from "@/lib/api";
const POLL_INTERVAL_MS = 5000;
const ACTIVE_STATUSES = new Set(["QUEUED", "RUNNING", "DRAFT"]);
export function useJobs(filters?: JobFilters) {
const [jobs, setJobs] = useState<Job[]>([]);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const [total, setTotal] = useState(0);
const isFetchingRef = useRef(false);
const fetchJobs = useCallback(async () => {
setLoading(true);
const fetchJobs = useCallback(async (silent = false) => {
if (isFetchingRef.current) return;
isFetchingRef.current = true;
if (!silent) setLoading(true);
setError(null);
try {
const response: PaginatedResponse<Job> = await getJobs(filters);
@ -19,10 +25,13 @@ export function useJobs(filters?: JobFilters) {
setTotal(response.total);
} catch {
setError("Failed to load jobs");
setJobs([]);
setTotal(0);
if (!silent) {
setJobs([]);
setTotal(0);
}
} finally {
setLoading(false);
if (!silent) setLoading(false);
isFetchingRef.current = false;
}
}, [filters]);
@ -30,6 +39,14 @@ export function useJobs(filters?: JobFilters) {
fetchJobs();
}, [fetchJobs]);
// Poll while any job is in an active state
useEffect(() => {
const hasActive = jobs.some((j) => ACTIVE_STATUSES.has(j.status));
if (!hasActive) return;
const id = setInterval(() => fetchJobs(true), POLL_INTERVAL_MS);
return () => clearInterval(id);
}, [jobs, fetchJobs]);
return { jobs, loading, error, total, refetch: fetchJobs };
}
@ -37,19 +54,23 @@ export function useJob(jobId: string) {
const [job, setJob] = useState<Job | null>(null);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const isFetchingRef = useRef(false);
const fetchJob = useCallback(async () => {
const fetchJob = useCallback(async (silent = false) => {
if (!jobId) return;
setLoading(true);
if (isFetchingRef.current) return;
isFetchingRef.current = true;
if (!silent) setLoading(true);
setError(null);
try {
const data = await getJob(jobId);
setJob(data);
} catch {
setError("Failed to load job");
setJob(null);
if (!silent) setJob(null);
} finally {
setLoading(false);
if (!silent) setLoading(false);
isFetchingRef.current = false;
}
}, [jobId]);
@ -57,5 +78,17 @@ export function useJob(jobId: string) {
fetchJob();
}, [fetchJob]);
// Poll while the job or any of its locales is active
useEffect(() => {
if (!job) return;
const jobActive = ACTIVE_STATUSES.has(job.status);
const localeActive = job.locale_instances?.some(
(li) => li.status !== "COMPLETED" && li.status !== "ERROR"
);
if (!jobActive && !localeActive) return;
const id = setInterval(() => fetchJob(true), POLL_INTERVAL_MS);
return () => clearInterval(id);
}, [job, fetchJob]);
return { job, loading, error, refetch: fetchJob };
}

View file

@ -156,6 +156,8 @@ function mapJobResponse(data: any): Job {
programme: (data.programme?.toUpperCase() || "RETAIL") as Job["programme"],
channel: (data.channel?.toUpperCase() || "") as Job["channel"],
sub_channel: data.sub_channel ? (data.sub_channel.toUpperCase() as Job["sub_channel"]) : null,
tm_channels: Array.isArray(data.tm_channels) ? data.tm_channels : undefined,
llm_model: data.llm_model || undefined,
job_type: (data.job_type?.toUpperCase() || "MAIN") as Job["job_type"],
locales: instances.map((li: LocaleInstance) => li.locale_code),
status: mapJobStatus(data.status) as Job["status"],
@ -276,6 +278,13 @@ export async function uploadSupplementary(
return response.data;
}
export async function listSupplementary(
jobId: string
): Promise<{ files: string[] }> {
const response = await api.get(`/jobs/${jobId}/supplementary`);
return response.data;
}
export async function launchJob(jobId: string): Promise<Job> {
const response = await api.post(`/jobs/${jobId}/launch`);
return mapJobResponse(response.data);
@ -521,6 +530,21 @@ export async function getTMFiles(clientId: string): Promise<TMFile[]> {
return response.data;
}
export interface TMChannelInfo {
name: string;
locales: string[];
}
export async function getTMChannels(
clientId: string
): Promise<TMChannelInfo[]> {
const response = await api.get<{ channels: TMChannelInfo[] }>(
"/files/tm/channels",
{ params: { client_id: clientId } }
);
return response.data.channels || [];
}
export async function uploadTMFile(
file: File,
clientId: string,
@ -593,6 +617,7 @@ export interface LocaleStat {
count: number;
total_tokens: number;
total_cost: number;
avg_cost_per_brief: number;
avg_duration_minutes: number;
}

View file

@ -101,6 +101,8 @@ export interface Job {
programme: Programme;
channel: Channel;
sub_channel?: SubChannel | null;
tm_channels?: string[];
llm_model?: string;
job_type: JobType;
locales: string[];
status: JobStatus;