amazon-transcreation/backend/app/services/output_service.py
DJP 9825b0497c Round 2 feedback: parser fix, dynamic max_tokens, polling, TM auto-discovery, reviewer comments in export
A1 Export columns shifted (critical):
- V25 LLM occasionally emits 12/13-col tables with Copy Type/Char Limit prefix
- Parser now anchors on "Option 1" header position; robust to any prefix shift
- Verified with 23/23 unit tests covering 11/12/13-col variants
- Source-line block in prompt no longer uses pipe separators (defence in depth)

A2 Linguistic summary fallback:
- Drop the metadata key/value table fallback on Tab 2
- Show "No linguistic summary was generated" when the agent didn't produce one

A3 Dashboard stuck on "Running":
- useJobs / useJob now poll every 5s while any job/locale is in an active state
- Stops polling once everything is COMPLETED or ERROR

B1 TM auto-config: respect empty selection
- Send no TM files when user unchecks all (was auto-adding campaign channel)
- Backend distinguishes empty list vs missing field

B2 Auto-discover channels from TM registry:
- New GET /api/v1/files/tm/channels endpoint reads distinct channels from registry
- Frontend StepConfigure fetches channels per client; falls back to static list
- Pipeline TM resolution falls back to flat_<Channel>_<lc>.json pattern for any
  registered channel (no hardcoded map needed for new channels like PrimeCBM)

B3 Job inputs visible on monitoring:
- New "Inputs sent to the agent" card on /jobs/[id] showing AI model, TM files,
  supplementary file list, and context override
- New GET /api/v1/jobs/{id}/supplementary endpoint listing on-disk supplementary files

C1 Context cap (large briefs truncating):
- max_tokens scales with source line count (8k/16k/32k/64k by tier)
- 172-line briefs now have ~64k output budget instead of fixed 16k

D1 Reviewer comments in xlsx export:
- Export endpoint now copies xlsx to temp path on download, queries Feedback
  joined with User, and appends "Reviewer (Name): comment" to the rationale
  cells of options that have feedback
- Original generated file remains untouched

D2 Hide Clients & Voice from sidebar (page still reachable by URL)
D3 Remove dead notifications + settings icons from header
D4 Cost by Locale table added to Analytics with total + avg cost per brief

Makefile seed target now also runs register_storage_files so TM registry is
populated from disk on first setup (deploy.sh already does this via --init).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-04 16:12:47 -04:00

189 lines
6.3 KiB
Python

import logging
import shutil
import tempfile
from pathlib import Path
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.models.feedback import Feedback
from app.models.job import LocaleInstance
from app.models.output import OutputRow
from app.models.source import SourceLine
from app.models.user import User
from app.schemas.output import (
OutputPreviewResponse,
OutputRowResponse,
SourceLinePreview,
)
logger = logging.getLogger(__name__)
class OutputService:
"""Service for assembling output preview data and triggering exports."""
async def get_preview(
self,
db: AsyncSession,
job_id: UUID,
locale_code: str,
) -> OutputPreviewResponse | None:
"""Assemble output preview data for a specific locale instance."""
# Get the locale instance
result = await db.execute(
select(LocaleInstance)
.where(
LocaleInstance.job_id == job_id,
LocaleInstance.locale_code == locale_code,
)
)
instance = result.scalar_one_or_none()
if instance is None:
return None
# Get source lines
source_result = await db.execute(
select(SourceLine)
.where(SourceLine.job_id == job_id)
.order_by(SourceLine.row_order)
)
source_lines = [
SourceLinePreview.model_validate(sl)
for sl in source_result.scalars().all()
]
# Get output rows with feedback eagerly loaded
output_result = await db.execute(
select(OutputRow)
.where(OutputRow.instance_id == instance.id)
.order_by(OutputRow.row_order)
.options(selectinload(OutputRow.feedback))
)
output_rows = [
OutputRowResponse.model_validate(row)
for row in output_result.scalars().all()
]
return OutputPreviewResponse(
locale_code=locale_code,
instance_id=instance.id,
source_lines=source_lines,
output_rows=output_rows,
total_rows=len(output_rows),
)
async def get_output_rows(
self,
db: AsyncSession,
instance_id: UUID,
) -> list[OutputRow]:
"""Get all output rows for a locale instance."""
result = await db.execute(
select(OutputRow)
.where(OutputRow.instance_id == instance_id)
.order_by(OutputRow.row_order)
)
return list(result.scalars().all())
async def trigger_export(
self,
db: AsyncSession,
job_id: UUID,
locale_code: str,
) -> str | None:
"""Return path to the export xlsx, with any reviewer feedback applied.
If reviewers have left comments on output rows, this method copies
the original xlsx to a temp path, appends "Reviewer (Name): comment"
to the affected rationale cells, and returns the temp path. If no
feedback exists, returns the original path unchanged.
"""
result = await db.execute(
select(LocaleInstance)
.where(
LocaleInstance.job_id == job_id,
LocaleInstance.locale_code == locale_code,
)
)
instance = result.scalar_one_or_none()
if instance is None or not instance.output_file_path:
return None
original_path = instance.output_file_path
if not Path(original_path).exists():
return None
# Pull all feedback for this instance's output rows
feedback_query = (
select(Feedback, OutputRow.row_order, User.name)
.join(OutputRow, Feedback.output_id == OutputRow.id)
.join(User, Feedback.user_id == User.id)
.where(OutputRow.instance_id == instance.id)
.order_by(OutputRow.row_order, Feedback.created_at)
)
feedback_rows = (await db.execute(feedback_query)).all()
if not feedback_rows:
return original_path
# Group by (row_order, option_column) for placement in the xlsx
from collections import defaultdict
grouped: dict[tuple[int, int], list[str]] = defaultdict(list)
for fb, row_order, user_name in feedback_rows:
if not fb.comment:
continue
label = f"Reviewer ({user_name}): {fb.comment}"
grouped[(row_order, fb.option_column)].append(label)
if not grouped:
return original_path
return self._render_export_with_feedback(original_path, grouped)
def _render_export_with_feedback(
self,
original_path: str,
feedback_by_row: dict[tuple[int, int], list[str]],
) -> str:
"""Copy the xlsx to a temp path and append reviewer comments to
the rationale cells. Returns the temp path."""
from openpyxl import load_workbook
from openpyxl.styles import Alignment
# Tab 1 column indices for rationale of each option (matches excel_writer.OUTPUT_COLUMNS)
# 1: EN_GB, 2: Copy Type, 3: Option 1, 4: BT 1, 5: Rationale 1,
# 6: Option 2, 7: BT 2, 8: Rationale 2,
# 9: Option 3, 10: BT 3, 11: Rationale 3
RATIONALE_COL_BY_OPTION = {1: 5, 2: 8, 3: 11}
tmp = tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False)
tmp.close()
shutil.copy2(original_path, tmp.name)
try:
wb = load_workbook(tmp.name)
ws = wb["Transcreation Output"]
except Exception as exc:
logger.warning(
"Could not open xlsx for feedback injection (%s); "
"returning original", exc,
)
return original_path
for (row_order, option_col), comments in feedback_by_row.items():
col_idx = RATIONALE_COL_BY_OPTION.get(option_col)
if col_idx is None:
continue
xlsx_row = row_order + 1 # +1 for header
cell = ws.cell(row=xlsx_row, column=col_idx)
existing = cell.value or ""
suffix = "\n\n" + "\n".join(comments)
cell.value = (existing + suffix) if existing else suffix.lstrip()
cell.alignment = Alignment(vertical="top", wrap_text=True)
wb.save(tmp.name)
wb.close()
return tmp.name