diff --git a/README.md b/README.md index e742f90..0a06713 100644 --- a/README.md +++ b/README.md @@ -45,13 +45,9 @@ An AI-powered transcreation platform that adapts Amazon marketing copy across 12 │ ┌────────────────────▼───────────────────────┐ │ │ │ Pipeline Orchestrator │ │ │ │ │ │ - │ │ VALIDATE ► TM_RETRIEVE ► RANK │ │ - │ │ │ │ │ │ │ - │ │ ▼ ▼ ▼ │ │ - │ │ TRANSCREATE ◄── COMPLY ──► FORMAT │ │ - │ │ │ (retry x3) │ │ │ - │ │ ▼ ▼ │ │ - │ │ DONE │ │ + │ │ VALIDATE ► SINGLE_AGENT ► FORMAT ► DONE │ │ + │ │ │ │ + │ │ (Single LLM call with full V25 prompt) │ │ │ └─────────────────────────────────────────────┘ │ └──────────┬──────────────────────────┬─────────────┘ │ │ @@ -67,8 +63,8 @@ An AI-powered transcreation platform that adapts Amazon marketing copy across 12 │ Claude API │ │ File Storage │ │ (Anthropic) │ │ │ │ │ │ /storage/amazon/ │ - │ Agents 2 & 4 │ │ tm/ (JSONL) │ - │ make LLM calls │ │ ref/ (JSON) │ + │ Single agent │ │ tm/ (JSONL) │ + │ (1 LLM call) │ │ ref/ (JSON) │ └─────────────────┘ └─────────────────────┘ ``` @@ -99,12 +95,11 @@ An AI-powered transcreation platform that adapts Amazon marketing copy across 12 │ │ │ │ │ 4. Monitor Progress │ ┌───────▼────────┐ │ │ (polls every 3 sec) │ │ Agent Pipeline │ │ - │ ◄─── 25% Matching TM ─────│ │ │──── LLM ────►│ - │ ◄─── 50% Translating ─────│ │ 6 stages per │◄── matches ──│ - │ ◄─── 65% Batch 2/4 ───────│ │ locale │──── LLM ────►│ - │ ◄─── 80% Batch 4/4 ───────│ │ │◄── drafts ───│ - │ ◄─── 90% Formatting ──────│ └───────┬─────────┘ │ - │ ◄── 100% Complete ────────│ │ │ + │ ◄─── 10% Loading Files ───│ │ │ │ + │ ◄─── 20% Transcreating ───│ │ Single agent │──── LLM ────►│ + │ ◄─── 90% Formatting ──────│ │ per locale │◄── table ────│ + │ ◄── 100% Complete ────────│ │ │ │ + │ │ └───────┬─────────┘ │ │ │ ┌───────▼────────┐ │ │ 5. Review Output │ │ Output saved │ │ │ (per-locale, per-line │ │ to DB + xlsx │ │ @@ -120,66 +115,56 @@ An AI-powered transcreation platform that adapts Amazon marketing copy across 12 ### What Happens When You Launch a Job -1. **Job created** with campaign name, programme (Retail/Prime/Brand), channel (Value/Mass/Onsite/Outbound), and target locales +1. **Job created** with campaign name, programme (Retail/Prime/Brand), channel, multiple TM files, and target locales (all 12 selectable in a single flat list) 2. **Source file uploaded** - an xlsx with English (en_GB) marketing copy, character limits, copy types, and creative guidance 3. **Launch** dispatches one Celery task per locale - up to 4 run in parallel -4. Each locale runs through the **6-agent pipeline** (see below) +4. Each locale runs through the **single-agent pipeline** — one LLM call with the full V25 prompt (see below) 5. Real-time **progress updates** are stored in the database and polled by the frontend every 3 seconds 6. On completion, output is viewable in the **review interface** with confidence badges, backtranslations, and rationale -7. **Export** downloads a formatted xlsx file +7. **Export** downloads a formatted xlsx (Tab 1: output table, Tab 2: linguistic summary) --- ## The Agent Pipeline -Each locale goes through 6 sequential agents. The pipeline includes a compliance retry loop (max 3 attempts). +### Single-Agent Pipeline (Default) + +The platform uses a **single consolidated LLM call** with the complete V25 Agent Instructions JSON as the system prompt. This replaces the earlier 6-agent sequential pipeline and produces better results by preserving inter-step context (TM reasoning, ranking rationale, cultural nuance) within a single prompt. ``` ┌─────────────────────────────────────────────────────────────────────────┐ │ PER-LOCALE PIPELINE │ │ │ │ ┌──────────────┐ Deterministic. Parses xlsx, loads glossary, │ -│ │ AGENT 1 │ blacklist, TOV, locale considerations, and │ -│ │ Validator │ date/percent format files. Builds PipelineContext. │ -│ │ [no LLM] │ ~0.1 seconds │ -│ └──────┬───────┘ │ +│ │ VALIDATE │ blacklist, TOV, locale considerations, and │ +│ │ [no LLM] │ date/percent format files. Builds PipelineContext. │ +│ └──────┬───────┘ ~0.1 seconds │ │ │ 10% │ -│ ┌──────▼───────┐ Loads TM file (~128 entries for de-DE Value). │ -│ │ AGENT 2 │ Sends ALL source lines + ALL TM entries to │ -│ │ TM Retrieval│ Claude in one call. LLM finds semantic matches │ -│ │ [LLM call] │ (exact / high / medium similarity). │ -│ └──────┬───────┘ ~45 seconds, ~$0.13 │ -│ │ 25% │ -│ ┌──────▼───────┐ Deterministic. Scores each match by channel │ -│ │ AGENT 3 │ fit, sub-channel fit, and recency (year). │ -│ │ Ranker │ Assigns confidence: HIGH (1 opt), MODERATE │ -│ │ [no LLM] │ (2 opts), LOW (3 opts). │ -│ └──────┬───────┘ ~0.01 seconds │ -│ │ 40% │ -│ ┌──────▼───────┐ Core creative agent. Processes source lines in │ -│ │ AGENT 4 │ batches of 15. System prompt includes voice │ -│ │ Transcreator│ profile, glossary, blacklist, TOV guidelines. │ -│ │ [LLM call] │ Generates options with backtranslations and │ -│ │ x4 batches │ rationale. Cites TM entries where applicable. │ -│ └──────┬───────┘ ~4 min (4 batches), ~$0.36 │ -│ │ 50-80% │ -│ ┌──────▼───────┐ Deterministic. 3 checks: │ -│ │ AGENT 5 │ 1. Character count vs char_limit │ -│ │ Compliance │ 2. Blacklist term scanning │ -│ │ [no LLM] │ 3. Domain check (Amazon.co.uk in non-en_GB) │ -│ └──────┬───────┘ If errors: retry from Agent 4 (max 3x) │ -│ │ 82% ~0.01 seconds │ -│ ┌──────▼───────┐ │ -│ │ AGENT 6 │ Deterministic. Generates output xlsx │ -│ │ Formatter │ (Tab 1: output table, Tab 2: summary). │ -│ │ [no LLM] │ Saves output rows to database. │ +│ ┌──────▼───────┐ Single LLM call using V25 Agent Instructions. │ +│ │ SINGLE │ System prompt: full V25 JSON (899 lines). │ +│ │ AGENT │ User message: job params, ALL source lines, │ +│ │ [1 LLM call]│ ALL TM entries (multiple channels), ALL reference │ +│ │ │ files (glossary, blacklist, TOV, locale rules). │ +│ │ │ │ +│ │ │ The agent handles TM matching, ranking, │ +│ │ │ transcreation, and compliance in one pass. │ +│ │ │ Outputs a markdown table + linguistic summary. │ +│ └──────┬───────┘ ~2-4 min, ~$0.30-0.50 │ +│ │ 20-90% │ +│ ┌──────▼───────┐ Deterministic. Generates output xlsx: │ +│ │ FORMAT │ Tab 1: 11-column output table │ +│ │ [no LLM] │ Tab 2: Linguistic summary from the agent │ │ └──────┬───────┘ ~0.1 seconds │ │ │ 100% │ │ ▼ │ -│ DONE (~5.5 min total, ~$0.49 per locale) │ +│ DONE (~2-4 min total per locale) │ └─────────────────────────────────────────────────────────────────────────┘ ``` +### Legacy 6-Agent Pipeline (Feature Flag) + +The original 6-agent sequential pipeline is preserved behind a feature flag (`USE_SINGLE_AGENT=false`). It runs: VALIDATE → TM_RETRIEVE → RANK → TRANSCREATE → COMPLY (retry x3) → FORMAT → DONE. This path makes 2+ LLM calls (TM retrieval + transcreation in batches) and takes longer (~5.5 min per locale). + ### Confidence Tiers and Option Counts ``` @@ -212,7 +197,7 @@ The pipeline uses 9 pure-Python modules (no LLM) for specific tasks: | `date_format_validator` | Validate date/percent formats per locale | | `domain_substitutor` | Amazon.co.uk to locale-specific domain mapping | | `line_break_normaliser` | Handle `\n` for TM queries vs Excel output | -| `excel_writer` | Generate formatted xlsx with output + summary tabs | +| `excel_writer` | Generate formatted xlsx (Tab 1: output table, Tab 2: linguistic summary) | --- @@ -315,7 +300,8 @@ All configuration is via environment variables in `.env`: | `JWT_ALGORITHM` | `HS256` | JWT signing algorithm | | `JWT_EXPIRY_HOURS` | `8` | Access token expiry in hours | | `STORAGE_ROOT` | `/storage` | Root path for file storage | -| `LLM_MODEL` | `claude-sonnet-4-6` | Claude model to use for agents | +| `LLM_MODEL` | `claude-sonnet-4-6` | Default Claude model (overridden per-job via UI: `claude-sonnet-4-6` or `claude-opus-4-6`) | +| `USE_SINGLE_AGENT` | `true` | Use single-agent pipeline (`true`) or legacy 6-agent (`false`) | --- @@ -325,10 +311,12 @@ All configuration is via environment variables in `.env`: storage/amazon/ ├── tm/ # Translation Memory files (JSONL) │ ├── de-DE/ -│ │ ├── flat_value_de-de.json # Value channel TM (~128 entries) │ │ ├── flat_MASS_de-de.json # Mass channel TM +│ │ ├── flat_value_de-de.json # Value channel TM │ │ ├── flat_Onsite_de-de.json # Onsite channel TM -│ │ └── flat_Outbound_de-de.json # Outbound channel TM +│ │ ├── flat_Outbound_de-de.json # Outbound channel TM +│ │ ├── flat_UEFA_de-de.json # UEFA channel TM +│ │ └── ... # + BDA, DoubleDonut, EUSelection, etc. │ ├── fr-FR/ │ │ └── ... │ └── ... (12 locale directories) @@ -387,12 +375,23 @@ Each line is a JSON object. Two formats are supported: ### Channels & TM Files +Jobs can select **multiple TM channels** to load into the agent's context. The campaign channel is auto-selected, and users can add additional TM files for cross-channel reference (e.g. MASS as a fallback alongside the primary channel). + | Channel | TM File Pattern | |---------|----------------| -| Value | `flat_value_{lc}.json` | | Mass | `flat_MASS_{lc}.json` | +| Value | `flat_value_{lc}.json` | | Onsite | `flat_Onsite_{lc}.json` | | Outbound | `flat_Outbound_{lc}.json` | +| UEFA | `flat_UEFA_{lc}.json` | +| BDA | `flat_BDA_{lc}.json` | +| DoubleDonut | `flat_DoubleDonut_{lc}.json` | +| EUSelection | `flat_EUSelection_{lc}.json` | +| PrimeDualBenefit | `flat_PrimeDualBenefit_{lc}.json` | +| PrimeGourmetGuard | `flat_PrimeGourmetGuard_{lc}.json` | +| PrimeMidfunnel | `flat_PrimeMidfunnel_{lc}.json` | +| PrimeSpeed | `flat_PrimeSpeed_{lc}.json` | +| TheKiss | `flat_TheKiss_{lc}.json` | ### Programmes & Voice Profiles @@ -483,9 +482,10 @@ Each line is a JSON object. Two formats are supported: │ campaign_name│ │ source_lines │ │ programme │ │──────────────────│ │ channel │ │ id (PK) │ -│ status │◄────│ job_id (FK) │ -│ job_type │ │ en_gb │ -└──────┬───────┘ │ copy_type │ +│ tm_channels │ │ job_id (FK) │ +│ status │◄────│ en_gb │ +│ job_type │ │ copy_type │ +└──────┬───────┘ │ char_limit │ │ │ char_limit │ │ │ is_display_format│ ┌──────▼───────────┐ └──────────────────┘ @@ -537,8 +537,9 @@ Each line is a JSON object. Two formats are supported: - **Client** - Select the client (e.g. Amazon) - **Campaign Name** - Name of the campaign (e.g. "DDA 26 BFW") - **Programme** - Retail, Prime, or Brand (determines voice profile) - - **Channel** - Value, Mass, Onsite, or Outbound (determines TM file) - - **Locales** - Select one or more target locales + - **Channel** - Campaign channel (e.g. Value, Mass, Onsite, Outbound) + - **TM Files** - Select one or more TM channels to load (campaign channel auto-selected; add MASS as fallback or other channels for cross-reference) + - **Locales** - All 12 locales in a single flat grid (main and derived locales are auto-classified — no separate "Job Type" selection needed) 3. Upload the **source xlsx** file with columns: - `EN_GB` (required) - English source copy - `Copy Type` - Type of copy (headline, body, CTA, script, etc.) @@ -552,7 +553,7 @@ Each line is a JSON object. Two formats are supported: Once launched, the job monitoring page shows real-time updates: - Per-locale progress bars (0-100%) -- Current stage: Loading Files > Matching TM > Ranking > Translating (batch X/Y) > Reviewing > Formatting > Complete +- Current stage: Loading Files > Transcreating > Formatting Output > Complete - Token usage and elapsed time - Error details if any locale fails @@ -568,7 +569,7 @@ Click **Preview** on a completed locale to open the review interface: - Every option includes a **backtranslation** and **character count** - Expandable **rationale** explains the translation choices and TM citations - Feedback buttons: **Approve**, **Needs Revision**, or add a **Comment** -- **Export** button downloads the formatted xlsx +- **Export** button downloads the formatted xlsx (Tab 1: output table, Tab 2: linguistic summary explaining the agent's approach and cultural choices) ### Admin Features @@ -600,9 +601,15 @@ amazon-transcreation/ │ │ ├── schemas/ # Pydantic request/response models │ │ ├── services/ # Business logic layer │ │ ├── pipeline/ -│ │ │ ├── orchestrator.py # State machine (6 stages + retry) +│ │ │ ├── orchestrator.py # State machine (single-agent or legacy 6-agent) │ │ │ ├── contracts.py # Inter-agent Pydantic models -│ │ │ ├── agents/ # 6 pipeline agents +│ │ │ ├── agents/ +│ │ │ │ ├── agent_single.py # Consolidated single-agent (V25 prompt) +│ │ │ │ ├── agent_1_validator.py # Deterministic file/input validation +│ │ │ │ ├── agent_6_formatter.py # Excel output generation +│ │ │ │ ├── agent_2-5_*.py # Legacy agents (behind feature flag) +│ │ │ │ └── prompts/ +│ │ │ │ └── v25_instructions.json # V25 Agent Instructions (system prompt) │ │ │ └── modules/ # 9 deterministic modules │ │ ├── tasks/ # Celery task definitions │ │ ├── llm/ # Anthropic SDK wrapper + retry @@ -635,8 +642,8 @@ docker compose exec backend python -m pytest tests/ -v 1. Create TM files in `storage/amazon/tm/{locale_code}/` 2. Create reference files in the appropriate `storage/amazon/ref/` subdirectories -3. Add the locale to the frontend locale selector -4. Add the locale display name to `LOCALE_NAMES` in `agent_4_transcreator.py` +3. Add the locale code to `ALL_LOCALES` in `frontend/src/components/jobs/JobWizard/StepConfigure.tsx` +4. If it's a derived locale, add it to `DERIVED_LOCALE_CODES` in `backend/app/services/job_service.py` --- @@ -668,11 +675,9 @@ docker compose exec backend python -m pytest tests/ -v ### Cost Estimation -For a typical 53-line source brief: +For a typical 53-line source brief (single-agent pipeline): | | Per Locale | 12 Locales | |---|-----------|------------| -| Agent 2 (TM Retrieval) | ~$0.13 | ~$1.56 | -| Agent 4 (Transcreation) | ~$0.36 | ~$4.32 | -| **Total** | **~$0.49** | **~$5.88** | -| Processing time | ~5.5 min | ~5.5 min (parallel) | +| Single Agent (V25) | ~$0.30-0.50 | ~$3.60-6.00 | +| Processing time | ~2-4 min | ~2-4 min (parallel) | diff --git a/backend/alembic/versions/b2c3d4e5f6a7_add_llm_model_to_jobs.py b/backend/alembic/versions/b2c3d4e5f6a7_add_llm_model_to_jobs.py new file mode 100644 index 0000000..997c66a --- /dev/null +++ b/backend/alembic/versions/b2c3d4e5f6a7_add_llm_model_to_jobs.py @@ -0,0 +1,26 @@ +"""Add llm_model to jobs + +Revision ID: b2c3d4e5f6a7 +Revises: a1b2c3d4e5f6 +Create Date: 2026-04-14 12:00:00.000000 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = "b2c3d4e5f6a7" +down_revision: Union[str, None] = "a1b2c3d4e5f6" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column("jobs", sa.Column("llm_model", sa.String(100), nullable=True)) + + +def downgrade() -> None: + op.drop_column("jobs", "llm_model") diff --git a/backend/app/llm/client.py b/backend/app/llm/client.py index ae7c15b..1ef687b 100644 --- a/backend/app/llm/client.py +++ b/backend/app/llm/client.py @@ -10,9 +10,15 @@ from app.config import settings logger = logging.getLogger(__name__) -# Cost per token (approximate, varies by model) -COST_PER_INPUT_TOKEN = 3.0 / 1_000_000 # $3 per 1M input tokens -COST_PER_OUTPUT_TOKEN = 15.0 / 1_000_000 # $15 per 1M output tokens +# Cost per token by model (approximate) +MODEL_COSTS: dict[str, tuple[float, float]] = { + # (input_cost_per_token, output_cost_per_token) + "claude-sonnet-4-6": (3.0 / 1_000_000, 15.0 / 1_000_000), + "claude-opus-4-6": (15.0 / 1_000_000, 75.0 / 1_000_000), +} +# Default fallback (Sonnet pricing) +COST_PER_INPUT_TOKEN = 3.0 / 1_000_000 +COST_PER_OUTPUT_TOKEN = 15.0 / 1_000_000 class LLMClient: @@ -81,9 +87,12 @@ class LLMClient: input_tokens = response.usage.input_tokens output_tokens = response.usage.output_tokens total_tokens = input_tokens + output_tokens + input_rate, output_rate = MODEL_COSTS.get( + self.model, (COST_PER_INPUT_TOKEN, COST_PER_OUTPUT_TOKEN) + ) estimated_cost = ( - input_tokens * COST_PER_INPUT_TOKEN - + output_tokens * COST_PER_OUTPUT_TOKEN + input_tokens * input_rate + + output_tokens * output_rate ) usage = { diff --git a/backend/app/models/job.py b/backend/app/models/job.py index 5aef8c1..94b5048 100644 --- a/backend/app/models/job.py +++ b/backend/app/models/job.py @@ -72,6 +72,7 @@ class Job(Base, TimestampMixin): channel: Mapped[str] = mapped_column(String(100), nullable=False) sub_channel: Mapped[str | None] = mapped_column(String(100), nullable=True) tm_channels: Mapped[list | None] = mapped_column(JSON, nullable=True) + llm_model: Mapped[str | None] = mapped_column(String(100), nullable=True) context_prompt: Mapped[str | None] = mapped_column(Text, nullable=True) job_type: Mapped[JobType] = mapped_column( Enum(JobType, name="job_type", create_constraint=True), diff --git a/backend/app/pipeline/agents/agent_single.py b/backend/app/pipeline/agents/agent_single.py index 153c90e..c944964 100644 --- a/backend/app/pipeline/agents/agent_single.py +++ b/backend/app/pipeline/agents/agent_single.py @@ -141,12 +141,14 @@ def _format_ref_data_for_prompt(ref_data: dict[str, Any]) -> str: # Markdown table parser # --------------------------------------------------------------------------- -def _parse_markdown_table(response_text: str) -> tuple[list[dict[str, Any]], str]: +def _parse_markdown_table(response_text: str) -> tuple[list[list[str]], str]: """Parse the V25 markdown table output into structured rows. Returns: Tuple of (parsed_rows, linguistic_summary). - Each row is a dict with keys matching the output columns. + Each row is a list of cell strings in column order. + We use lists (not dicts) because the V25 table has duplicate + column names (Backtranslation x3, Rationale x3). linguistic_summary is any text after the table. """ lines = response_text.split("\n") @@ -174,64 +176,63 @@ def _parse_markdown_table(response_text: str) -> tuple[list[dict[str, Any]], str logger.warning("No markdown table found in response") return [], response_text.strip() - # Parse header row - header_line = table_lines[0] - headers = [h.strip() for h in header_line.split("|") if h.strip()] + def _split_row(line: str) -> list[str]: + """Split a markdown table row, preserving empty cells.""" + # Strip leading/trailing pipe and split + stripped = line.strip() + if stripped.startswith("|"): + stripped = stripped[1:] + if stripped.endswith("|"): + stripped = stripped[:-1] + return [c.strip() for c in stripped.split("|")] - # Skip separator line (---|---|---) - data_lines = [] - for tl in table_lines[1:]: - cells = [c.strip() for c in tl.split("|") if c.strip()] - # Skip separator rows - if cells and all(re.match(r"^[-:]+$", c) for c in cells): + # Skip header row and separator line, collect data rows + data_lines: list[list[str]] = [] + for tl in table_lines[1:]: # skip header + cells = _split_row(tl) + # Skip separator rows (---|---|---) + if cells and all(re.match(r"^[-:]+$", c) for c in cells if c): continue - if cells: + if any(c for c in cells): # at least one non-empty cell data_lines.append(cells) - # Map rows to dicts - rows: list[dict[str, Any]] = [] - for cells in data_lines: - row: dict[str, Any] = {} - for i, header in enumerate(headers): - row[header.lower().strip()] = cells[i] if i < len(cells) else "" - rows.append(row) - linguistic_summary = "\n".join(post_table_lines).strip() - return rows, linguistic_summary + return data_lines, linguistic_summary def _rows_to_draft_outputs( - rows: list[dict[str, Any]], + rows: list[list[str]], source_lines: list[Any], ) -> tuple[list[DraftOutput], list[RankingDeclaration]]: """Convert parsed table rows into DraftOutput and RankingDeclaration objects. - The V25 table has columns: - Locale | Source | Option 1 | Backtranslation | Rationale | Option 2 | Backtranslation | Rationale | Option 3 | Backtranslation | Rationale + The V25 table has columns (by position): + 0: Locale | 1: Source | 2: Option 1 | 3: Backtranslation | 4: Rationale | + 5: Option 2 | 6: Backtranslation | 7: Rationale | + 8: Option 3 | 9: Backtranslation | 10: Rationale + + Rows are passed as lists of cell strings (not dicts) because the V25 + table has duplicate column names that would collide in a dict. """ drafts: list[DraftOutput] = [] rankings: list[RankingDeclaration] = [] - # Try to match by column position since headers may vary - for i, row in enumerate(rows): - vals = list(row.values()) - # Expected: locale, source, opt1, bt1, rat1, opt2, bt2, rat2, opt3, bt3, rat3 - # Minimum useful: 5 values (locale, source, opt1, bt1, rat1) - - if len(vals) < 5: - logger.warning("Row %d has too few columns (%d), skipping", i, len(vals)) + for i, cells in enumerate(rows): + # Expected 11 columns; minimum useful: 5 (locale, source, opt1, bt1, rat1) + if len(cells) < 5: + logger.warning("Row %d has too few columns (%d), skipping", i, len(cells)) continue # Extract values by position - opt1_text = vals[2] if len(vals) > 2 else "" - bt1 = vals[3] if len(vals) > 3 else "" - rat1 = vals[4] if len(vals) > 4 else "" - opt2_text = vals[5] if len(vals) > 5 else "" - bt2 = vals[6] if len(vals) > 6 else "" - rat2 = vals[7] if len(vals) > 7 else "" - opt3_text = vals[8] if len(vals) > 8 else "" - bt3 = vals[9] if len(vals) > 9 else "" - rat3 = vals[10] if len(vals) > 10 else "" + opt1_text = cells[2] if len(cells) > 2 else "" + bt1 = cells[3] if len(cells) > 3 else "" + rat1 = cells[4] if len(cells) > 4 else "" + opt2_text = cells[5] if len(cells) > 5 else "" + bt2 = cells[6] if len(cells) > 6 else "" + rat2 = cells[7] if len(cells) > 7 else "" + opt3_text = cells[8] if len(cells) > 8 else "" + bt3 = cells[9] if len(cells) > 9 else "" + rat3 = cells[10] if len(cells) > 10 else "" # Clean up
tags to newlines def _clean(text: str) -> str: @@ -376,7 +377,7 @@ class AgentSingle(BaseAgent): # ── Call LLM ───────────────────────────────────────────────── system_prompt = self.get_system_prompt() - llm = LLMClient() + llm = LLMClient(model=context.job_params.llm_model or None) logger.info( "Sending to LLM: system_prompt=%d chars, user_message=%d chars", @@ -408,6 +409,9 @@ class AgentSingle(BaseAgent): if not rows: logger.error("No rows parsed from response. Raw response:\n%s", response_text[:2000]) + elif rows: + # Log first row column count for debugging + logger.info("First row has %d columns: %s", len(rows[0]), [c[:30] for c in rows[0]]) # Convert to structured outputs drafts, rankings = _rows_to_draft_outputs(rows, context.source_lines) diff --git a/backend/app/pipeline/contracts.py b/backend/app/pipeline/contracts.py index f262ce3..c3261e0 100644 --- a/backend/app/pipeline/contracts.py +++ b/backend/app/pipeline/contracts.py @@ -54,6 +54,7 @@ class JobParams(BaseModel): campaign_name: str context_prompt: str | None = None tm_channels: list[str] = [] + llm_model: str | None = None class ParsedJob(BaseModel): diff --git a/backend/app/schemas/job.py b/backend/app/schemas/job.py index bfd71f2..45ce7cf 100644 --- a/backend/app/schemas/job.py +++ b/backend/app/schemas/job.py @@ -19,6 +19,7 @@ class JobCreate(BaseModel): job_ref: str | None = None locale_codes: list[str] = [] tm_channels: list[str] | None = None + llm_model: str | None = None model_config = {"from_attributes": True} @@ -73,6 +74,7 @@ class JobResponse(BaseModel): total_token_usage: int = 0 total_estimated_cost: float = 0.0 tm_channels: list[str] | None = None + llm_model: str | None = None locale_instances: list[LocaleInstanceResponse] = [] created_at: datetime updated_at: datetime diff --git a/backend/app/services/job_service.py b/backend/app/services/job_service.py index 0e50923..e67963e 100644 --- a/backend/app/services/job_service.py +++ b/backend/app/services/job_service.py @@ -46,6 +46,7 @@ class JobService: parent_job_id=data.parent_job_id, job_ref=data.job_ref, tm_channels=tm_channels, + llm_model=data.llm_model, status=JobStatus.created, ) db.add(job) diff --git a/backend/app/tasks/job_tasks.py b/backend/app/tasks/job_tasks.py index ce10539..7c212df 100644 --- a/backend/app/tasks/job_tasks.py +++ b/backend/app/tasks/job_tasks.py @@ -232,6 +232,7 @@ def process_locale_instance(self, job_id: str, locale_code: str) -> dict: "campaign_name": job.campaign_name, "context_prompt": job.context_prompt, "tm_channels": job.tm_channels or [job.channel], + "llm_model": job.llm_model, } # Resolve file manifest for this locale diff --git a/frontend/src/app/help/page.tsx b/frontend/src/app/help/page.tsx index fd8a33a..4a473d2 100644 --- a/frontend/src/app/help/page.tsx +++ b/frontend/src/app/help/page.tsx @@ -84,16 +84,16 @@ export default function HelpPage() {
  • You create a job with campaign details, target locales, and a programme (Retail/Prime/Brand)
  • You upload a source xlsx file containing the English copy
  • You launch the job — each locale is processed in parallel (up to 4 at a time)
  • -
  • The AI pipeline runs 6 agents per locale: validation, TM matching, ranking, transcreation, compliance checks, and formatting
  • +
  • The AI pipeline runs a single consolidated agent per locale that handles TM matching, ranking, transcreation, and compliance in one pass
  • You review the output with confidence tiers, backtranslations, and rationale
  • You approve, request revisions, or add comments on each line
  • -
  • You export the final output as a formatted xlsx file
  • +
  • You export the final output as a formatted xlsx (Tab 1: output table, Tab 2: linguistic summary)
  • Processing time and cost

    - A typical 53-line brief takes about 5-6 minutes per locale and - costs approximately $0.50 per locale. Multiple locales run - simultaneously, so a 12-locale job completes in about 6 minutes total. + A typical 53-line brief takes about 2-4 minutes per locale and + costs approximately $0.30-0.50 per locale (depending on model). Multiple locales run + simultaneously, so a 12-locale job completes in about 4 minutes total.

    @@ -123,15 +123,23 @@ export default function HelpPage() { Channel - Value, Mass, Onsite, or Outbound — this determines which TM file is loaded + Campaign channel (e.g. Mass, Value, Onsite, Outbound, UEFA, BDA) Sub-channel Optional (e.g. Radio, OLV, Display) + + AI Model + Claude Sonnet 4.6 (fast, cost-effective) or Claude Opus 4.6 (highest quality) + + + TM Files + Select one or more TM channels to load. The campaign channel is auto-selected; add others (e.g. MASS as fallback) for cross-channel reference + Locales - Select one or more target locales + All 12 locales in a single flat grid — main and derived locales are auto-classified @@ -203,32 +211,17 @@ export default function HelpPage() { 10% Loading Files - Validating inputs and loading reference files + Validating inputs and loading TM + reference files - 25% - Matching TM - AI scanning Translation Memory for semantic matches - - - 40% - Ranking Matches - Scoring and ranking TM matches by quality - - - 50-80% - Translating (batch X/Y) - AI generating translations in batches of 15 lines - - - 82% - Reviewing - Checking character limits, blacklist, and domain rules + 20-90% + Transcreating + Single AI agent processing all lines — TM matching, ranking, translation, and compliance in one pass 90% - Formatting - Generating the output xlsx file + Formatting Output + Generating the output xlsx file (Tab 1: output table, Tab 2: linguistic summary) 100% @@ -407,8 +400,9 @@ export default function HelpPage() {

    - TM files are organized by channel (Value, Mass, Onsite, Outbound) and locale. - The system automatically loads the correct files based on your job configuration. + TM files are organized by channel (13 channels including Mass, Value, Onsite, Outbound, + UEFA, BDA, and more) and locale. You can select multiple TM channels per job — the system + loads all selected TM files into the agent's context for cross-channel reference.

    Admins can upload and manage these files in Admin > TM Files and{" "} diff --git a/frontend/src/app/jobs/new/page.tsx b/frontend/src/app/jobs/new/page.tsx index 8544241..0cbf2cd 100644 --- a/frontend/src/app/jobs/new/page.tsx +++ b/frontend/src/app/jobs/new/page.tsx @@ -16,6 +16,7 @@ export interface JobFormData { channel: string; sub_channel: string; tm_channels: string[]; + llm_model: string; locales: string[]; source_file: File | null; supplementary_files: File[]; @@ -31,6 +32,7 @@ const initialFormData: JobFormData = { channel: "", sub_channel: "", tm_channels: [], + llm_model: "claude-sonnet-4-6", locales: [], source_file: null, supplementary_files: [], diff --git a/frontend/src/components/jobs/JobWizard/StepConfigure.tsx b/frontend/src/components/jobs/JobWizard/StepConfigure.tsx index aff0154..6b27d3e 100644 --- a/frontend/src/components/jobs/JobWizard/StepConfigure.tsx +++ b/frontend/src/components/jobs/JobWizard/StepConfigure.tsx @@ -31,6 +31,11 @@ const TM_CHANNELS = [ "PrimeGourmetGuard", "PrimeMidfunnel", "PrimeSpeed", "TheKiss", ]; +const LLM_MODELS = [ + { value: "claude-sonnet-4-6", label: "Claude Sonnet 4.6", description: "Fast, cost-effective" }, + { value: "claude-opus-4-6", label: "Claude Opus 4.6", description: "Highest quality" }, +]; + const SUB_CHANNELS = [ "TV_OLV", "RADIO", @@ -219,6 +224,30 @@ export function StepConfigure({ data, onChange, onNext }: StepConfigureProps) { + {/* Model Selection */} +

    + + +

    + Sonnet is faster and cheaper. Opus produces the highest quality output. +

    +
    + {/* TM File Selection */}
    diff --git a/frontend/src/components/jobs/JobWizard/StepReview.tsx b/frontend/src/components/jobs/JobWizard/StepReview.tsx index c4e0d95..bfa44fb 100644 --- a/frontend/src/components/jobs/JobWizard/StepReview.tsx +++ b/frontend/src/components/jobs/JobWizard/StepReview.tsx @@ -52,6 +52,7 @@ export function StepReview({ data, onBack }: StepReviewProps) { channel: data.channel.toLowerCase(), sub_channel: data.sub_channel ? data.sub_channel.toLowerCase() : null, tm_channels: data.tm_channels.length > 0 ? data.tm_channels.map(c => c.toLowerCase()) : undefined, + llm_model: data.llm_model || undefined, locale_codes: data.locales, context_prompt: data.context_override || undefined, }); @@ -152,6 +153,14 @@ export function StepReview({ data, onBack }: StepReviewProps) { {data.sub_channel || "None"}

    +
    +

    + AI Model +

    +

    + {data.llm_model === "claude-opus-4-6" ? "Claude Opus 4.6" : "Claude Sonnet 4.6"} +

    +

    TM Files diff --git a/frontend/src/lib/types.ts b/frontend/src/lib/types.ts index bca03c2..18444c0 100644 --- a/frontend/src/lib/types.ts +++ b/frontend/src/lib/types.ts @@ -218,6 +218,7 @@ export interface CreateJobRequest { channel: string; sub_channel?: string | null; tm_channels?: string[]; + llm_model?: string; locale_codes: string[]; context_prompt?: string; }