amazon-transcreation/backend/app/pipeline/modules/excel_writer.py
DJP 98fa16bfc3 feat: complete Phase 1-2 scaffold — backend, frontend, pipeline skeleton
Full-stack Amazon AI Transcreation Platform with:
- FastAPI backend (async, PostgreSQL, Redis, Celery) with 11 DB tables
- JWT auth (SSO-ready abstract provider pattern)
- 6-agent pipeline orchestrator with deterministic modules
- Next.js 14 frontend with Amazon branding (Ember fonts, orange/dark theme)
- Job wizard, monitoring HUD, output review, admin screens
- 154 TM/reference files imported, 12 locales configured
- Docker Compose for all services

Agents 2-5 (TM retrieval, ranker, transcreator, compliance) are stubs
pending Phase 3 LLM integration.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-10 12:31:43 -04:00

184 lines
6.1 KiB
Python

"""Generate output xlsx files with structured output and summary tabs.
Tab 1: 11-column output table
Tab 2: Transcreation Summary
Column widths and formatting per specification.
"""
from pathlib import Path
from typing import Any
from openpyxl import Workbook
from openpyxl.styles import Alignment, Font, PatternFill
from openpyxl.utils import get_column_letter
from app.pipeline.modules.line_break_normaliser import normalise_for_excel
# Tab 1 column definitions
OUTPUT_COLUMNS = [
("EN_GB", 40),
("Copy Type", 15),
("Option 1", 40),
("Back-translation 1", 40),
("Rationale 1", 35),
("Option 2", 40),
("Back-translation 2", 40),
("Rationale 2", 35),
("Option 3", 40),
("Back-translation 3", 40),
("Rationale 3", 35),
]
# Header style
HEADER_FONT = Font(bold=True, size=11, color="FFFFFF")
HEADER_FILL = PatternFill(start_color="232F3E", end_color="232F3E", fill_type="solid")
HEADER_ALIGNMENT = Alignment(horizontal="center", vertical="center", wrap_text=True)
# Data style
DATA_ALIGNMENT = Alignment(vertical="top", wrap_text=True)
def generate_output_xlsx(
output_path: str,
source_lines: list[dict[str, Any]],
output_rows: list[dict[str, Any]],
summary: dict[str, Any] | None = None,
) -> str:
"""Generate the output xlsx file.
Args:
output_path: Absolute path where the xlsx should be saved.
source_lines: List of source line dicts (en_gb, copy_type, etc.).
output_rows: List of output row dicts with options, backtranslations, rationales.
summary: Optional summary data for Tab 2.
Returns:
The absolute path to the generated file.
"""
wb = Workbook()
# ---- Tab 1: Output Table ----
ws1 = wb.active
ws1.title = "Transcreation Output"
# Write headers
for col_idx, (header, width) in enumerate(OUTPUT_COLUMNS, start=1):
cell = ws1.cell(row=1, column=col_idx, value=header)
cell.font = HEADER_FONT
cell.fill = HEADER_FILL
cell.alignment = HEADER_ALIGNMENT
ws1.column_dimensions[get_column_letter(col_idx)].width = width
# Write data rows
for row_idx, output_row in enumerate(output_rows, start=2):
# Find matching source line
source_line = _find_source_line(source_lines, output_row)
ws1.cell(
row=row_idx, column=1,
value=normalise_for_excel(source_line.get("en_gb", "")),
).alignment = DATA_ALIGNMENT
ws1.cell(
row=row_idx, column=2,
value=source_line.get("copy_type", ""),
).alignment = DATA_ALIGNMENT
# Option 1
ws1.cell(
row=row_idx, column=3,
value=normalise_for_excel(output_row.get("option_1", "")),
).alignment = DATA_ALIGNMENT
ws1.cell(
row=row_idx, column=4,
value=normalise_for_excel(output_row.get("backtranslation_1", "")),
).alignment = DATA_ALIGNMENT
ws1.cell(
row=row_idx, column=5,
value=output_row.get("rationale_1", ""),
).alignment = DATA_ALIGNMENT
# Option 2
ws1.cell(
row=row_idx, column=6,
value=normalise_for_excel(output_row.get("option_2", "")),
).alignment = DATA_ALIGNMENT
ws1.cell(
row=row_idx, column=7,
value=normalise_for_excel(output_row.get("backtranslation_2", "")),
).alignment = DATA_ALIGNMENT
ws1.cell(
row=row_idx, column=8,
value=output_row.get("rationale_2", ""),
).alignment = DATA_ALIGNMENT
# Option 3
ws1.cell(
row=row_idx, column=9,
value=normalise_for_excel(output_row.get("option_3", "")),
).alignment = DATA_ALIGNMENT
ws1.cell(
row=row_idx, column=10,
value=normalise_for_excel(output_row.get("backtranslation_3", "")),
).alignment = DATA_ALIGNMENT
ws1.cell(
row=row_idx, column=11,
value=output_row.get("rationale_3", ""),
).alignment = DATA_ALIGNMENT
# ---- Tab 2: Transcreation Summary ----
ws2 = wb.create_sheet("Transcreation Summary")
summary_data = summary or {}
summary_rows = [
("Job ID", summary_data.get("job_id", "")),
("Campaign", summary_data.get("campaign_name", "")),
("Locale", summary_data.get("locale_code", "")),
("Channel", summary_data.get("channel", "")),
("Programme", summary_data.get("programme", "")),
("Total Source Lines", summary_data.get("total_source_lines", 0)),
("Total Output Rows", summary_data.get("total_output_rows", 0)),
("High Confidence", summary_data.get("high_confidence", 0)),
("Moderate Confidence", summary_data.get("moderate_confidence", 0)),
("Low Confidence", summary_data.get("low_confidence", 0)),
("Total Tokens Used", summary_data.get("total_tokens", 0)),
("Estimated Cost (USD)", summary_data.get("estimated_cost", 0.0)),
("Agent Version", summary_data.get("agent_version", "")),
("Generated At", summary_data.get("generated_at", "")),
]
ws2.column_dimensions["A"].width = 25
ws2.column_dimensions["B"].width = 40
for row_idx, (label, value) in enumerate(summary_rows, start=1):
label_cell = ws2.cell(row=row_idx, column=1, value=label)
label_cell.font = Font(bold=True)
ws2.cell(row=row_idx, column=2, value=str(value))
# Save
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
wb.save(output_path)
wb.close()
return output_path
def _find_source_line(
source_lines: list[dict[str, Any]], output_row: dict[str, Any]
) -> dict[str, Any]:
"""Find the source line matching an output row by row_order or line_id."""
row_order = output_row.get("row_order")
line_id = output_row.get("line_id")
for sl in source_lines:
if line_id and sl.get("id") == line_id:
return sl
if row_order is not None and sl.get("row_order") == row_order:
return sl
# Fallback: match by index
if row_order is not None and 0 < row_order <= len(source_lines):
return source_lines[row_order - 1]
return {}