amazon-transcreation/backend/app/pipeline/agents/agent_6_formatter.py

"""Agent 6: Formatter

Generates the output xlsx file and builds output row data.
This agent is deterministic (no LLM call).
"""

from datetime import datetime, timezone
from typing import Any
from uuid import uuid4

from app.pipeline.agents.base import BaseAgent
from app.pipeline.contracts import PipelineContext
from app.pipeline.modules.excel_writer import generate_output_xlsx
from app.config import settings


class Agent6Formatter(BaseAgent):
    """Formats pipeline output into xlsx and structured data."""

    name = "agent_6_formatter"
    description = "Generates output xlsx and structured output rows"

    def __init__(self, output_dir: str | None = None) -> None:
        self.output_dir = output_dir or settings.STORAGE_ROOT

    def get_system_prompt(self) -> str:
        return ""  # No LLM call

    def build_user_message(self, context: PipelineContext) -> str:
        return ""  # No LLM call

    def parse_response(self, response: str, context: PipelineContext) -> Any:
        return None  # No LLM call

    async def run(self, context: PipelineContext) -> PipelineContext:
        """Generate output xlsx and return updated context with file path.

        Returns the context as-is (output rows are built by the orchestrator
        for database persistence). The xlsx file is written to disk.
        """
        job_id = context.job_params.job_id
        locale_code = context.job_params.locale_code

        # Build source lines for excel
        source_lines_data = [
            {
                "row_order": sl.row_order,
                "en_gb": sl.en_gb,
                "copy_type": sl.copy_type or "",
            }
            for sl in context.source_lines
        ]

        # Build output rows for excel
        output_rows_data = []
        for i, draft in enumerate(context.draft_outputs):
            row: dict[str, Any] = {
                "row_order": i + 1,
                "line_id": draft.line_id,
                "option_1": draft.option_1.text if draft.option_1 else "",
                "backtranslation_1": draft.option_1.backtranslation if draft.option_1 else "",
                "rationale_1": draft.option_1.rationale if draft.option_1 else "",
            }
            if draft.option_2:
                row["option_2"] = draft.option_2.text
                row["backtranslation_2"] = draft.option_2.backtranslation
                row["rationale_2"] = draft.option_2.rationale
            if draft.option_3:
                row["option_3"] = draft.option_3.text
                row["backtranslation_3"] = draft.option_3.backtranslation
                row["rationale_3"] = draft.option_3.rationale

            output_rows_data.append(row)

        # Build summary
        compliance_counts = {"high": 0, "moderate": 0, "low": 0}
        for ranking in context.ranking_declarations:
            tier = ranking.confidence_tier
            if tier in compliance_counts:
                compliance_counts[tier] += 1

        summary = {
            "job_id": job_id,
            "campaign_name": context.job_params.campaign_name,
            "locale_code": locale_code,
            "channel": context.job_params.channel,
            "programme": context.job_params.programme,
            "total_source_lines": len(context.source_lines),
            "total_output_rows": len(output_rows_data),
            "high_confidence": compliance_counts["high"],
            "moderate_confidence": compliance_counts["moderate"],
            "low_confidence": compliance_counts["low"],
            "total_tokens": 0,
            "estimated_cost": 0.0,
            "agent_version": "1.0.0",
            "generated_at": datetime.now(timezone.utc).isoformat(),
        }

        # Generate xlsx
        output_path = (
            f"{self.output_dir}/jobs/{job_id}/output/"
            f"{locale_code}_{job_id}_output.xlsx"
        )

        generate_output_xlsx(
            output_path=output_path,
            source_lines=source_lines_data,
            output_rows=output_rows_data,
            summary=summary,
        )

        return context