amazon-transcreation/backend/app/api/v1/files.py
DJP 9825b0497c Round 2 feedback: parser fix, dynamic max_tokens, polling, TM auto-discovery, reviewer comments in export
A1 Export columns shifted (critical):
- V25 LLM occasionally emits 12/13-col tables with Copy Type/Char Limit prefix
- Parser now anchors on "Option 1" header position; robust to any prefix shift
- Verified with 23/23 unit tests covering 11/12/13-col variants
- Source-line block in prompt no longer uses pipe separators (defence in depth)

A2 Linguistic summary fallback:
- Drop the metadata key/value table fallback on Tab 2
- Show "No linguistic summary was generated" when the agent didn't produce one

A3 Dashboard stuck on "Running":
- useJobs / useJob now poll every 5s while any job/locale is in an active state
- Stops polling once everything is COMPLETED or ERROR

B1 TM auto-config: respect empty selection
- Send no TM files when user unchecks all (was auto-adding campaign channel)
- Backend distinguishes empty list vs missing field

B2 Auto-discover channels from TM registry:
- New GET /api/v1/files/tm/channels endpoint reads distinct channels from registry
- Frontend StepConfigure fetches channels per client; falls back to static list
- Pipeline TM resolution falls back to flat_<Channel>_<lc>.json pattern for any
  registered channel (no hardcoded map needed for new channels like PrimeCBM)

B3 Job inputs visible on monitoring:
- New "Inputs sent to the agent" card on /jobs/[id] showing AI model, TM files,
  supplementary file list, and context override
- New GET /api/v1/jobs/{id}/supplementary endpoint listing on-disk supplementary files

C1 Context cap (large briefs truncating):
- max_tokens scales with source line count (8k/16k/32k/64k by tier)
- 172-line briefs now have ~64k output budget instead of fixed 16k

D1 Reviewer comments in xlsx export:
- Export endpoint now copies xlsx to temp path on download, queries Feedback
  joined with User, and appends "Reviewer (Name): comment" to the rationale
  cells of options that have feedback
- Original generated file remains untouched

D2 Hide Clients & Voice from sidebar (page still reachable by URL)
D3 Remove dead notifications + settings icons from header
D4 Cost by Locale table added to Analytics with total + avg cost per brief

Makefile seed target now also runs register_storage_files so TM registry is
populated from disk on first setup (deploy.sh already does this via --init).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-04 16:12:47 -04:00

238 lines
8.1 KiB
Python

from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, Query, UploadFile, File, status
from fastapi.responses import FileResponse
from sqlalchemy.ext.asyncio import AsyncSession
from app.dependencies import get_current_user, get_db
from app.models.files import ReferenceFileType
from app.schemas.files import (
FileUploadResponse,
ReferenceFileResponse,
TMFileResponse,
)
from app.services.audit_service import AuditService
from app.services.file_service import FileService
router = APIRouter(prefix="/files", tags=["files"])
file_service = FileService()
audit_service = AuditService()
# ---- TM Files ----
@router.post("/tm", response_model=FileUploadResponse, status_code=status.HTTP_201_CREATED)
async def upload_tm_file(
client_id: UUID = Query(...),
locale_code: str = Query(...),
channel: str = Query(...),
file: UploadFile = File(...),
db: AsyncSession = Depends(get_db),
current_user: dict = Depends(get_current_user),
) -> FileUploadResponse:
"""Upload a Translation Memory (JSONL) file."""
if not file.filename:
raise HTTPException(status_code=400, detail="File must have a filename")
if not file_service.validate_file_extension(file.filename, [".jsonl", ".json"]):
raise HTTPException(status_code=400, detail="Only .jsonl/.json files accepted")
tm = await file_service.upload_tm_file(
db, client_id, locale_code, channel, file.file, file.filename,
uploaded_by=current_user["user_id"],
)
await audit_service.log(
db, action="upload_tm", entity_type="tm_file", entity_id=str(tm.id),
user_id=current_user["user_id"],
details={"filename": tm.filename, "locale": locale_code, "channel": channel, "segments": tm.segment_count},
)
await db.commit()
return FileUploadResponse(
id=tm.id,
filename=tm.filename,
file_path=tm.file_path,
message=f"Uploaded TM file with {tm.segment_count} segments",
)
@router.get("/tm", response_model=list[TMFileResponse])
async def list_tm_files(
client_id: UUID = Query(...),
locale_code: str | None = Query(None),
channel: str | None = Query(None),
db: AsyncSession = Depends(get_db),
current_user: dict = Depends(get_current_user),
) -> list[TMFileResponse]:
"""List TM files for a client."""
files = await file_service.list_tm_files(db, client_id, locale_code, channel)
return [TMFileResponse.model_validate(f) for f in files]
@router.get("/tm/channels")
async def list_tm_channels(
client_id: UUID = Query(...),
db: AsyncSession = Depends(get_db),
current_user: dict = Depends(get_current_user),
) -> dict:
"""Return distinct channel names found in the TM registry, with the
locales each channel is available for.
Used by the New Job wizard to populate the Channel and TM Files
selectors dynamically — adding a new TM file (e.g. flat_PrimeCBM_de-de.json)
causes the channel to appear here without code changes.
"""
from sqlalchemy import select
from app.models.files import TMFileRegistry
result = await db.execute(
select(TMFileRegistry.channel, TMFileRegistry.locale_code)
.where(TMFileRegistry.client_id == client_id)
)
rows = result.all()
by_channel: dict[str, set[str]] = {}
for ch, lc in rows:
if not ch:
continue
by_channel.setdefault(ch, set()).add(lc)
channels = sorted(
({"name": ch, "locales": sorted(list(locs))} for ch, locs in by_channel.items()),
key=lambda c: c["name"].lower(),
)
return {"channels": channels}
@router.get("/tm/{file_id}/download")
async def download_tm_file(
file_id: UUID,
db: AsyncSession = Depends(get_db),
current_user: dict = Depends(get_current_user),
) -> FileResponse:
"""Download a TM file."""
from sqlalchemy import select
from app.models.files import TMFileRegistry
result = await db.execute(
select(TMFileRegistry).where(TMFileRegistry.id == file_id)
)
tm = result.scalar_one_or_none()
if tm is None:
raise HTTPException(status_code=404, detail="TM file not found")
path = file_service.get_file_path(tm.file_path)
if path is None:
raise HTTPException(status_code=404, detail="File not found on disk")
return FileResponse(path=str(path), filename=tm.filename)
@router.delete("/tm/{file_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_tm_file(
file_id: UUID,
db: AsyncSession = Depends(get_db),
current_user: dict = Depends(get_current_user),
) -> None:
"""Delete a TM file."""
deleted = await file_service.delete_tm_file(db, file_id)
if not deleted:
raise HTTPException(status_code=404, detail="TM file not found")
await audit_service.log(
db, action="delete_tm", entity_type="tm_file", entity_id=str(file_id),
user_id=current_user["user_id"],
)
await db.commit()
# ---- Reference Files ----
@router.post(
"/reference",
response_model=FileUploadResponse,
status_code=status.HTTP_201_CREATED,
)
async def upload_reference_file(
client_id: UUID = Query(...),
file_type: ReferenceFileType = Query(...),
locale_scope: str = Query(...),
file: UploadFile = File(...),
db: AsyncSession = Depends(get_db),
current_user: dict = Depends(get_current_user),
) -> FileUploadResponse:
"""Upload a reference file (glossary, blacklist, TOV, etc.)."""
if not file.filename:
raise HTTPException(status_code=400, detail="File must have a filename")
ref = await file_service.upload_reference_file(
db, client_id, file_type, locale_scope, file.file, file.filename,
uploaded_by=current_user["user_id"],
)
await audit_service.log(
db, action="upload_reference", entity_type="reference_file", entity_id=str(ref.id),
user_id=current_user["user_id"],
details={"filename": ref.filename, "file_type": file_type.value, "locale_scope": locale_scope},
)
await db.commit()
return FileUploadResponse(
id=ref.id,
filename=ref.filename,
file_path=ref.file_path,
message=f"Uploaded {file_type.value} reference file",
)
@router.get("/reference", response_model=list[ReferenceFileResponse])
async def list_reference_files(
client_id: UUID = Query(...),
file_type: ReferenceFileType | None = Query(None),
locale_scope: str | None = Query(None),
db: AsyncSession = Depends(get_db),
current_user: dict = Depends(get_current_user),
) -> list[ReferenceFileResponse]:
"""List reference files for a client."""
files = await file_service.list_reference_files(
db, client_id, file_type, locale_scope
)
return [ReferenceFileResponse.model_validate(f) for f in files]
@router.get("/reference/{file_id}/download")
async def download_reference_file(
file_id: UUID,
db: AsyncSession = Depends(get_db),
current_user: dict = Depends(get_current_user),
) -> FileResponse:
"""Download a reference file."""
from sqlalchemy import select
from app.models.files import ReferenceFile
result = await db.execute(
select(ReferenceFile).where(ReferenceFile.id == file_id)
)
ref = result.scalar_one_or_none()
if ref is None:
raise HTTPException(status_code=404, detail="Reference file not found")
path = file_service.get_file_path(ref.file_path)
if path is None:
raise HTTPException(status_code=404, detail="File not found on disk")
return FileResponse(path=str(path), filename=ref.filename)
@router.delete("/reference/{file_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_reference_file(
file_id: UUID,
db: AsyncSession = Depends(get_db),
current_user: dict = Depends(get_current_user),
) -> None:
"""Delete a reference file."""
deleted = await file_service.delete_reference_file(db, file_id)
if not deleted:
raise HTTPException(status_code=404, detail="Reference file not found")
await audit_service.log(
db, action="delete_reference", entity_type="reference_file", entity_id=str(file_id),
user_id=current_user["user_id"],
)
await db.commit()