amazon-transcreation/backend/app/pipeline/modules/excel_writer.py
DJP 9825b0497c Round 2 feedback: parser fix, dynamic max_tokens, polling, TM auto-discovery, reviewer comments in export
A1 Export columns shifted (critical):
- V25 LLM occasionally emits 12/13-col tables with Copy Type/Char Limit prefix
- Parser now anchors on "Option 1" header position; robust to any prefix shift
- Verified with 23/23 unit tests covering 11/12/13-col variants
- Source-line block in prompt no longer uses pipe separators (defence in depth)

A2 Linguistic summary fallback:
- Drop the metadata key/value table fallback on Tab 2
- Show "No linguistic summary was generated" when the agent didn't produce one

A3 Dashboard stuck on "Running":
- useJobs / useJob now poll every 5s while any job/locale is in an active state
- Stops polling once everything is COMPLETED or ERROR

B1 TM auto-config: respect empty selection
- Send no TM files when user unchecks all (was auto-adding campaign channel)
- Backend distinguishes empty list vs missing field

B2 Auto-discover channels from TM registry:
- New GET /api/v1/files/tm/channels endpoint reads distinct channels from registry
- Frontend StepConfigure fetches channels per client; falls back to static list
- Pipeline TM resolution falls back to flat_<Channel>_<lc>.json pattern for any
  registered channel (no hardcoded map needed for new channels like PrimeCBM)

B3 Job inputs visible on monitoring:
- New "Inputs sent to the agent" card on /jobs/[id] showing AI model, TM files,
  supplementary file list, and context override
- New GET /api/v1/jobs/{id}/supplementary endpoint listing on-disk supplementary files

C1 Context cap (large briefs truncating):
- max_tokens scales with source line count (8k/16k/32k/64k by tier)
- 172-line briefs now have ~64k output budget instead of fixed 16k

D1 Reviewer comments in xlsx export:
- Export endpoint now copies xlsx to temp path on download, queries Feedback
  joined with User, and appends "Reviewer (Name): comment" to the rationale
  cells of options that have feedback
- Original generated file remains untouched

D2 Hide Clients & Voice from sidebar (page still reachable by URL)
D3 Remove dead notifications + settings icons from header
D4 Cost by Locale table added to Analytics with total + avg cost per brief

Makefile seed target now also runs register_storage_files so TM registry is
populated from disk on first setup (deploy.sh already does this via --init).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-04 16:12:47 -04:00

190 lines
6.2 KiB
Python

"""Generate output xlsx files with structured output and summary tabs.
Tab 1: 11-column output table
Tab 2: Transcreation Summary
Column widths and formatting per specification.
"""
from pathlib import Path
from typing import Any
from openpyxl import Workbook
from openpyxl.styles import Alignment, Font, PatternFill
from openpyxl.utils import get_column_letter
from app.pipeline.modules.line_break_normaliser import normalise_for_excel
# Tab 1 column definitions
OUTPUT_COLUMNS = [
("EN_GB", 40),
("Copy Type", 15),
("Option 1", 40),
("Back-translation 1", 40),
("Rationale 1", 35),
("Option 2", 40),
("Back-translation 2", 40),
("Rationale 2", 35),
("Option 3", 40),
("Back-translation 3", 40),
("Rationale 3", 35),
]
# Header style
HEADER_FONT = Font(bold=True, size=11, color="FFFFFF")
HEADER_FILL = PatternFill(start_color="232F3E", end_color="232F3E", fill_type="solid")
HEADER_ALIGNMENT = Alignment(horizontal="center", vertical="center", wrap_text=True)
# Data style
DATA_ALIGNMENT = Alignment(vertical="top", wrap_text=True)
def generate_output_xlsx(
output_path: str,
source_lines: list[dict[str, Any]],
output_rows: list[dict[str, Any]],
summary: dict[str, Any] | None = None,
linguistic_summary: str = "",
) -> str:
"""Generate the output xlsx file.
Args:
output_path: Absolute path where the xlsx should be saved.
source_lines: List of source line dicts (en_gb, copy_type, etc.).
output_rows: List of output row dicts with options, backtranslations, rationales.
summary: Optional summary data for Tab 2 (metadata fallback).
linguistic_summary: Free-text linguistic summary from the agent (used for Tab 2
when available; falls back to metadata summary if empty).
Returns:
The absolute path to the generated file.
"""
wb = Workbook()
# ---- Tab 1: Output Table ----
ws1 = wb.active
ws1.title = "Transcreation Output"
# Write headers
for col_idx, (header, width) in enumerate(OUTPUT_COLUMNS, start=1):
cell = ws1.cell(row=1, column=col_idx, value=header)
cell.font = HEADER_FONT
cell.fill = HEADER_FILL
cell.alignment = HEADER_ALIGNMENT
ws1.column_dimensions[get_column_letter(col_idx)].width = width
# Write data rows
for row_idx, output_row in enumerate(output_rows, start=2):
# Find matching source line
source_line = _find_source_line(source_lines, output_row)
ws1.cell(
row=row_idx, column=1,
value=normalise_for_excel(source_line.get("en_gb", "")),
).alignment = DATA_ALIGNMENT
ws1.cell(
row=row_idx, column=2,
value=source_line.get("copy_type", ""),
).alignment = DATA_ALIGNMENT
# Option 1
ws1.cell(
row=row_idx, column=3,
value=normalise_for_excel(output_row.get("option_1", "")),
).alignment = DATA_ALIGNMENT
ws1.cell(
row=row_idx, column=4,
value=normalise_for_excel(output_row.get("backtranslation_1", "")),
).alignment = DATA_ALIGNMENT
ws1.cell(
row=row_idx, column=5,
value=output_row.get("rationale_1", ""),
).alignment = DATA_ALIGNMENT
# Option 2
ws1.cell(
row=row_idx, column=6,
value=normalise_for_excel(output_row.get("option_2", "")),
).alignment = DATA_ALIGNMENT
ws1.cell(
row=row_idx, column=7,
value=normalise_for_excel(output_row.get("backtranslation_2", "")),
).alignment = DATA_ALIGNMENT
ws1.cell(
row=row_idx, column=8,
value=output_row.get("rationale_2", ""),
).alignment = DATA_ALIGNMENT
# Option 3
ws1.cell(
row=row_idx, column=9,
value=normalise_for_excel(output_row.get("option_3", "")),
).alignment = DATA_ALIGNMENT
ws1.cell(
row=row_idx, column=10,
value=normalise_for_excel(output_row.get("backtranslation_3", "")),
).alignment = DATA_ALIGNMENT
ws1.cell(
row=row_idx, column=11,
value=output_row.get("rationale_3", ""),
).alignment = DATA_ALIGNMENT
# ---- Tab 2: Linguistic Summary ----
ws2 = wb.create_sheet("Linguistic Summary")
summary_data = summary or {}
ws2.column_dimensions["A"].width = 100
title_cell = ws2.cell(row=1, column=1, value="Linguistic Summary")
title_cell.font = Font(bold=True, size=14)
body = linguistic_summary.strip() if linguistic_summary else ""
if not body:
body = (
"No linguistic summary was generated for this job. "
"This can happen if the agent's output was truncated or "
"if no commentary was produced after the translation table."
)
summary_cell = ws2.cell(row=3, column=1, value=body)
summary_cell.alignment = Alignment(wrap_text=True, vertical="top")
line_count = body.count("\n") + 1
ws2.row_dimensions[3].height = max(15, min(600, line_count * 15))
# Metadata footer (job id / locale / generated at)
footer_row = 5 + line_count
footer_text = (
f"Job: {summary_data.get('job_id', '')} | "
f"Locale: {summary_data.get('locale_code', '')} | "
f"Generated: {summary_data.get('generated_at', '')}"
)
footer_cell = ws2.cell(row=footer_row, column=1, value=footer_text)
footer_cell.font = Font(color="888888", size=9)
# Save
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
wb.save(output_path)
wb.close()
return output_path
def _find_source_line(
source_lines: list[dict[str, Any]], output_row: dict[str, Any]
) -> dict[str, Any]:
"""Find the source line matching an output row by row_order or line_id."""
row_order = output_row.get("row_order")
line_id = output_row.get("line_id")
for sl in source_lines:
if line_id and sl.get("id") == line_id:
return sl
if row_order is not None and sl.get("row_order") == row_order:
return sl
# Fallback: match by index
if row_order is not None and 0 < row_order <= len(source_lines):
return source_lines[row_order - 1]
return {}