Full-stack Amazon AI Transcreation Platform with: - FastAPI backend (async, PostgreSQL, Redis, Celery) with 11 DB tables - JWT auth (SSO-ready abstract provider pattern) - 6-agent pipeline orchestrator with deterministic modules - Next.js 14 frontend with Amazon branding (Ember fonts, orange/dark theme) - Job wizard, monitoring HUD, output review, admin screens - 154 TM/reference files imported, 12 locales configured - Docker Compose for all services Agents 2-5 (TM retrieval, ranker, transcreator, compliance) are stubs pending Phase 3 LLM integration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
184 lines
6.1 KiB
Python
184 lines
6.1 KiB
Python
"""Generate output xlsx files with structured output and summary tabs.
|
|
|
|
Tab 1: 11-column output table
|
|
Tab 2: Transcreation Summary
|
|
|
|
Column widths and formatting per specification.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from openpyxl import Workbook
|
|
from openpyxl.styles import Alignment, Font, PatternFill
|
|
from openpyxl.utils import get_column_letter
|
|
|
|
from app.pipeline.modules.line_break_normaliser import normalise_for_excel
|
|
|
|
# Tab 1 column definitions
|
|
OUTPUT_COLUMNS = [
|
|
("EN_GB", 40),
|
|
("Copy Type", 15),
|
|
("Option 1", 40),
|
|
("Back-translation 1", 40),
|
|
("Rationale 1", 35),
|
|
("Option 2", 40),
|
|
("Back-translation 2", 40),
|
|
("Rationale 2", 35),
|
|
("Option 3", 40),
|
|
("Back-translation 3", 40),
|
|
("Rationale 3", 35),
|
|
]
|
|
|
|
# Header style
|
|
HEADER_FONT = Font(bold=True, size=11, color="FFFFFF")
|
|
HEADER_FILL = PatternFill(start_color="232F3E", end_color="232F3E", fill_type="solid")
|
|
HEADER_ALIGNMENT = Alignment(horizontal="center", vertical="center", wrap_text=True)
|
|
|
|
# Data style
|
|
DATA_ALIGNMENT = Alignment(vertical="top", wrap_text=True)
|
|
|
|
|
|
def generate_output_xlsx(
|
|
output_path: str,
|
|
source_lines: list[dict[str, Any]],
|
|
output_rows: list[dict[str, Any]],
|
|
summary: dict[str, Any] | None = None,
|
|
) -> str:
|
|
"""Generate the output xlsx file.
|
|
|
|
Args:
|
|
output_path: Absolute path where the xlsx should be saved.
|
|
source_lines: List of source line dicts (en_gb, copy_type, etc.).
|
|
output_rows: List of output row dicts with options, backtranslations, rationales.
|
|
summary: Optional summary data for Tab 2.
|
|
|
|
Returns:
|
|
The absolute path to the generated file.
|
|
"""
|
|
wb = Workbook()
|
|
|
|
# ---- Tab 1: Output Table ----
|
|
ws1 = wb.active
|
|
ws1.title = "Transcreation Output"
|
|
|
|
# Write headers
|
|
for col_idx, (header, width) in enumerate(OUTPUT_COLUMNS, start=1):
|
|
cell = ws1.cell(row=1, column=col_idx, value=header)
|
|
cell.font = HEADER_FONT
|
|
cell.fill = HEADER_FILL
|
|
cell.alignment = HEADER_ALIGNMENT
|
|
ws1.column_dimensions[get_column_letter(col_idx)].width = width
|
|
|
|
# Write data rows
|
|
for row_idx, output_row in enumerate(output_rows, start=2):
|
|
# Find matching source line
|
|
source_line = _find_source_line(source_lines, output_row)
|
|
|
|
ws1.cell(
|
|
row=row_idx, column=1,
|
|
value=normalise_for_excel(source_line.get("en_gb", "")),
|
|
).alignment = DATA_ALIGNMENT
|
|
|
|
ws1.cell(
|
|
row=row_idx, column=2,
|
|
value=source_line.get("copy_type", ""),
|
|
).alignment = DATA_ALIGNMENT
|
|
|
|
# Option 1
|
|
ws1.cell(
|
|
row=row_idx, column=3,
|
|
value=normalise_for_excel(output_row.get("option_1", "")),
|
|
).alignment = DATA_ALIGNMENT
|
|
ws1.cell(
|
|
row=row_idx, column=4,
|
|
value=normalise_for_excel(output_row.get("backtranslation_1", "")),
|
|
).alignment = DATA_ALIGNMENT
|
|
ws1.cell(
|
|
row=row_idx, column=5,
|
|
value=output_row.get("rationale_1", ""),
|
|
).alignment = DATA_ALIGNMENT
|
|
|
|
# Option 2
|
|
ws1.cell(
|
|
row=row_idx, column=6,
|
|
value=normalise_for_excel(output_row.get("option_2", "")),
|
|
).alignment = DATA_ALIGNMENT
|
|
ws1.cell(
|
|
row=row_idx, column=7,
|
|
value=normalise_for_excel(output_row.get("backtranslation_2", "")),
|
|
).alignment = DATA_ALIGNMENT
|
|
ws1.cell(
|
|
row=row_idx, column=8,
|
|
value=output_row.get("rationale_2", ""),
|
|
).alignment = DATA_ALIGNMENT
|
|
|
|
# Option 3
|
|
ws1.cell(
|
|
row=row_idx, column=9,
|
|
value=normalise_for_excel(output_row.get("option_3", "")),
|
|
).alignment = DATA_ALIGNMENT
|
|
ws1.cell(
|
|
row=row_idx, column=10,
|
|
value=normalise_for_excel(output_row.get("backtranslation_3", "")),
|
|
).alignment = DATA_ALIGNMENT
|
|
ws1.cell(
|
|
row=row_idx, column=11,
|
|
value=output_row.get("rationale_3", ""),
|
|
).alignment = DATA_ALIGNMENT
|
|
|
|
# ---- Tab 2: Transcreation Summary ----
|
|
ws2 = wb.create_sheet("Transcreation Summary")
|
|
|
|
summary_data = summary or {}
|
|
summary_rows = [
|
|
("Job ID", summary_data.get("job_id", "")),
|
|
("Campaign", summary_data.get("campaign_name", "")),
|
|
("Locale", summary_data.get("locale_code", "")),
|
|
("Channel", summary_data.get("channel", "")),
|
|
("Programme", summary_data.get("programme", "")),
|
|
("Total Source Lines", summary_data.get("total_source_lines", 0)),
|
|
("Total Output Rows", summary_data.get("total_output_rows", 0)),
|
|
("High Confidence", summary_data.get("high_confidence", 0)),
|
|
("Moderate Confidence", summary_data.get("moderate_confidence", 0)),
|
|
("Low Confidence", summary_data.get("low_confidence", 0)),
|
|
("Total Tokens Used", summary_data.get("total_tokens", 0)),
|
|
("Estimated Cost (USD)", summary_data.get("estimated_cost", 0.0)),
|
|
("Agent Version", summary_data.get("agent_version", "")),
|
|
("Generated At", summary_data.get("generated_at", "")),
|
|
]
|
|
|
|
ws2.column_dimensions["A"].width = 25
|
|
ws2.column_dimensions["B"].width = 40
|
|
|
|
for row_idx, (label, value) in enumerate(summary_rows, start=1):
|
|
label_cell = ws2.cell(row=row_idx, column=1, value=label)
|
|
label_cell.font = Font(bold=True)
|
|
ws2.cell(row=row_idx, column=2, value=str(value))
|
|
|
|
# Save
|
|
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
wb.save(output_path)
|
|
wb.close()
|
|
|
|
return output_path
|
|
|
|
|
|
def _find_source_line(
|
|
source_lines: list[dict[str, Any]], output_row: dict[str, Any]
|
|
) -> dict[str, Any]:
|
|
"""Find the source line matching an output row by row_order or line_id."""
|
|
row_order = output_row.get("row_order")
|
|
line_id = output_row.get("line_id")
|
|
|
|
for sl in source_lines:
|
|
if line_id and sl.get("id") == line_id:
|
|
return sl
|
|
if row_order is not None and sl.get("row_order") == row_order:
|
|
return sl
|
|
|
|
# Fallback: match by index
|
|
if row_order is not None and 0 < row_order <= len(source_lines):
|
|
return source_lines[row_order - 1]
|
|
|
|
return {}
|