amazon-transcreation/backend/app/pipeline/contracts.py
DJP 5e0a148b96 feat: add token usage tracking, feedback highlighting, cost on cards, help page
- Wire token usage from LLM agents through pipeline context to DB and frontend
- Agents 2 and 4 accumulate input/output tokens and cost into PipelineContext
- job_tasks.py saves token totals to locale instance after pipeline completion
- Monitoring cards show total tokens and estimated cost instead of broken 0/0
- Make feedback highlighting bolder: colored card borders, stronger button states
- Add estimated cost display to dashboard job cards
- Add Help page with full documentation and link in sidebar navigation
- Comprehensive README with ASCII architecture diagrams

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-10 16:47:36 -04:00

144 lines
3.7 KiB
Python

"""Pipeline data contracts - Pydantic models for inter-agent communication."""
from typing import Any
from pydantic import BaseModel
class TMEntry(BaseModel):
"""A single Translation Memory entry."""
seg_key: str
date: str
en: str
lc: str
tx: str
nt: str = ""
channel: str = ""
sub_channel: str = ""
_text: str = ""
model_config = {"from_attributes": True}
class SourceLineContract(BaseModel):
"""A parsed source line from the input xlsx."""
line_id: str
row_order: int
en_gb: str
copy_type: str | None = None
creative_guidance: str | None = None
visual_ref: str | None = None
char_limit: str | None = None
is_display_format: bool = False
class FileManifest(BaseModel):
"""Manifest of all files loaded for a job."""
tm_files: list[str] = []
glossary_file: str | None = None
blacklist_file: str | None = None
tov_global_file: str | None = None
tov_supplement_file: str | None = None
locale_considerations_file: str | None = None
date_pct_formats_file: str | None = None
class JobParams(BaseModel):
"""Parameters for a transcreation job."""
job_id: str
client_id: str
locale_code: str
channel: str
sub_channel: str | None = None
programme: str
campaign_name: str
context_prompt: str | None = None
class ParsedJob(BaseModel):
"""Output of Agent 1 (Validator): validated job parameters + source."""
job_params: JobParams
source_lines: list[SourceLineContract]
file_manifest: FileManifest
class ConfirmedMatch(BaseModel):
"""A confirmed TM match for a source line."""
seg_key: str
pass_found: int
date: str
en: str
tx: str
nt: str = ""
channel: str = ""
sub_channel: str = ""
is_cross_channel: bool = False
class TMSweepResult(BaseModel):
"""TM sweep results for a single source line."""
line_id: str
confirmed_matches: list[ConfirmedMatch] = []
pass_4_triggered: bool = False
pass_4_result: ConfirmedMatch | None = None
no_match: bool = False
class RankingDeclaration(BaseModel):
"""Ranking decision for a single source line."""
line_id: str
winning_entry: ConfirmedMatch | None = None
runner_ups: list[ConfirmedMatch] = []
confidence_tier: str = "low"
option_count: int = 3
is_new_creative_line: bool = False
notes: str = ""
class DraftOption(BaseModel):
"""A single draft transcreation option."""
text: str
backtranslation: str
rationale: str
class DraftOutput(BaseModel):
"""Transcreation draft output for a single source line."""
line_id: str
option_1: DraftOption
option_2: DraftOption | None = None
option_3: DraftOption | None = None
tm_entries_cited: list[str] = []
adaptations_applied: list[str] = []
class ComplianceViolation(BaseModel):
"""A single compliance violation found during checking."""
type: str
option_affected: int
description: str
severity: str = "warning"
class ComplianceResult(BaseModel):
"""Compliance check result for a single source line."""
line_id: str
passed: bool
violations: list[ComplianceViolation] = []
character_counts: dict[str, int] = {}
class PipelineContext(BaseModel):
"""Full pipeline context passed between agents."""
job_params: JobParams
source_lines: list[SourceLineContract] = []
file_manifest: FileManifest = FileManifest()
tm_sweep_results: list[TMSweepResult] = []
ranking_declarations: list[RankingDeclaration] = []
draft_outputs: list[DraftOutput] = []
compliance_results: list[ComplianceResult] = []
# Token usage accumulators (updated by agents after each LLM call)
total_input_tokens: int = 0
total_output_tokens: int = 0
total_estimated_cost: float = 0.0