amazon-transcreation/backend/app/pipeline/agents/agent_6_formatter.py
DJP 98fa16bfc3 feat: complete Phase 1-2 scaffold — backend, frontend, pipeline skeleton
Full-stack Amazon AI Transcreation Platform with:
- FastAPI backend (async, PostgreSQL, Redis, Celery) with 11 DB tables
- JWT auth (SSO-ready abstract provider pattern)
- 6-agent pipeline orchestrator with deterministic modules
- Next.js 14 frontend with Amazon branding (Ember fonts, orange/dark theme)
- Job wizard, monitoring HUD, output review, admin screens
- 154 TM/reference files imported, 12 locales configured
- Docker Compose for all services

Agents 2-5 (TM retrieval, ranker, transcreator, compliance) are stubs
pending Phase 3 LLM integration.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-10 12:31:43 -04:00

112 lines
4 KiB
Python

"""Agent 6: Formatter
Generates the output xlsx file and builds output row data.
This agent is deterministic (no LLM call).
"""
from datetime import datetime, timezone
from typing import Any
from uuid import uuid4
from app.pipeline.agents.base import BaseAgent
from app.pipeline.contracts import PipelineContext
from app.pipeline.modules.excel_writer import generate_output_xlsx
from app.config import settings
class Agent6Formatter(BaseAgent):
"""Formats pipeline output into xlsx and structured data."""
name = "agent_6_formatter"
description = "Generates output xlsx and structured output rows"
def __init__(self, output_dir: str | None = None) -> None:
self.output_dir = output_dir or settings.STORAGE_ROOT
def get_system_prompt(self) -> str:
return "" # No LLM call
def build_user_message(self, context: PipelineContext) -> str:
return "" # No LLM call
def parse_response(self, response: str, context: PipelineContext) -> Any:
return None # No LLM call
async def run(self, context: PipelineContext) -> PipelineContext:
"""Generate output xlsx and return updated context with file path.
Returns the context as-is (output rows are built by the orchestrator
for database persistence). The xlsx file is written to disk.
"""
job_id = context.job_params.job_id
locale_code = context.job_params.locale_code
# Build source lines for excel
source_lines_data = [
{
"row_order": sl.row_order,
"en_gb": sl.en_gb,
"copy_type": sl.copy_type or "",
}
for sl in context.source_lines
]
# Build output rows for excel
output_rows_data = []
for i, draft in enumerate(context.draft_outputs):
row: dict[str, Any] = {
"row_order": i + 1,
"line_id": draft.line_id,
"option_1": draft.option_1.text if draft.option_1 else "",
"backtranslation_1": draft.option_1.backtranslation if draft.option_1 else "",
"rationale_1": draft.option_1.rationale if draft.option_1 else "",
}
if draft.option_2:
row["option_2"] = draft.option_2.text
row["backtranslation_2"] = draft.option_2.backtranslation
row["rationale_2"] = draft.option_2.rationale
if draft.option_3:
row["option_3"] = draft.option_3.text
row["backtranslation_3"] = draft.option_3.backtranslation
row["rationale_3"] = draft.option_3.rationale
output_rows_data.append(row)
# Build summary
compliance_counts = {"high": 0, "moderate": 0, "low": 0}
for ranking in context.ranking_declarations:
tier = ranking.confidence_tier
if tier in compliance_counts:
compliance_counts[tier] += 1
summary = {
"job_id": job_id,
"campaign_name": context.job_params.campaign_name,
"locale_code": locale_code,
"channel": context.job_params.channel,
"programme": context.job_params.programme,
"total_source_lines": len(context.source_lines),
"total_output_rows": len(output_rows_data),
"high_confidence": compliance_counts["high"],
"moderate_confidence": compliance_counts["moderate"],
"low_confidence": compliance_counts["low"],
"total_tokens": 0,
"estimated_cost": 0.0,
"agent_version": "1.0.0",
"generated_at": datetime.now(timezone.utc).isoformat(),
}
# Generate xlsx
output_path = (
f"{self.output_dir}/jobs/{job_id}/output/"
f"{locale_code}_{job_id}_output.xlsx"
)
generate_output_xlsx(
output_path=output_path,
source_lines=source_lines_data,
output_rows=output_rows_data,
summary=summary,
)
return context