amazon-transcreation/backend/app/services/file_service.py
DJP 98fa16bfc3 feat: complete Phase 1-2 scaffold — backend, frontend, pipeline skeleton
Full-stack Amazon AI Transcreation Platform with:
- FastAPI backend (async, PostgreSQL, Redis, Celery) with 11 DB tables
- JWT auth (SSO-ready abstract provider pattern)
- 6-agent pipeline orchestrator with deterministic modules
- Next.js 14 frontend with Amazon branding (Ember fonts, orange/dark theme)
- Job wizard, monitoring HUD, output review, admin screens
- 154 TM/reference files imported, 12 locales configured
- Docker Compose for all services

Agents 2-5 (TM retrieval, ranker, transcreator, compliance) are stubs
pending Phase 3 LLM integration.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-10 12:31:43 -04:00

234 lines
7.5 KiB
Python

import os
import shutil
from pathlib import Path
from typing import BinaryIO
from uuid import UUID, uuid4
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import settings
from app.models.files import ReferenceFile, ReferenceFileType, TMFileRegistry
from app.models.source import SourceLine
from app.pipeline.modules.source_file_parser import parse_source_file
class FileService:
"""Service for file upload, download, path resolution, and storage management."""
def __init__(self) -> None:
self.storage_root = Path(settings.STORAGE_ROOT)
def _resolve_path(self, *parts: str) -> Path:
"""Resolve a storage path and ensure parent directories exist."""
path = self.storage_root.joinpath(*parts)
path.parent.mkdir(parents=True, exist_ok=True)
return path
async def upload_source_file(
self,
db: AsyncSession,
job_id: UUID,
file: BinaryIO,
filename: str,
) -> list[SourceLine]:
"""Upload and parse a source xlsx file, creating SourceLine records."""
# Save to storage
file_path = self._resolve_path("jobs", str(job_id), "source", filename)
with open(file_path, "wb") as f:
shutil.copyfileobj(file, f)
# Parse the xlsx
parsed_lines = parse_source_file(str(file_path))
# Delete existing source lines for this job
existing = await db.execute(
select(SourceLine).where(SourceLine.job_id == job_id)
)
for line in existing.scalars().all():
await db.delete(line)
# Create new source lines
source_lines = []
for i, row in enumerate(parsed_lines):
source_line = SourceLine(
job_id=job_id,
row_order=i + 1,
en_gb=row["en_gb"],
copy_type=row.get("copy_type"),
creative_guidance=row.get("creative_guidance"),
visual_ref=row.get("visual_ref"),
char_limit=row.get("char_limit"),
is_display_format=row.get("is_display_format", False),
)
db.add(source_line)
source_lines.append(source_line)
await db.flush()
return source_lines
async def upload_supplementary_file(
self,
db: AsyncSession,
job_id: UUID,
file: BinaryIO,
filename: str,
) -> str:
"""Upload a supplementary file (TM, glossary, etc.) for a job."""
file_path = self._resolve_path("jobs", str(job_id), "supplementary", filename)
with open(file_path, "wb") as f:
shutil.copyfileobj(file, f)
return str(file_path)
async def upload_tm_file(
self,
db: AsyncSession,
client_id: UUID,
locale_code: str,
channel: str,
file: BinaryIO,
filename: str,
uploaded_by: UUID | None = None,
) -> TMFileRegistry:
"""Upload a TM file and create a registry entry."""
file_path = self._resolve_path(
"clients", str(client_id), "tm", locale_code, filename
)
with open(file_path, "wb") as f:
shutil.copyfileobj(file, f)
# Count segments (lines in JSONL)
segment_count = 0
with open(file_path, "r") as f:
for line in f:
if line.strip():
segment_count += 1
tm_file = TMFileRegistry(
client_id=client_id,
locale_code=locale_code,
channel=channel,
filename=filename,
file_path=str(file_path),
segment_count=segment_count,
uploaded_by=uploaded_by,
)
db.add(tm_file)
await db.flush()
return tm_file
async def upload_reference_file(
self,
db: AsyncSession,
client_id: UUID,
file_type: ReferenceFileType,
locale_scope: str,
file: BinaryIO,
filename: str,
uploaded_by: UUID | None = None,
) -> ReferenceFile:
"""Upload a reference file and create a registry entry."""
file_path = self._resolve_path(
"clients", str(client_id), "reference", file_type.value, filename
)
with open(file_path, "wb") as f:
shutil.copyfileobj(file, f)
ref_file = ReferenceFile(
client_id=client_id,
file_type=file_type,
locale_scope=locale_scope,
filename=filename,
file_path=str(file_path),
uploaded_by=uploaded_by,
)
db.add(ref_file)
await db.flush()
return ref_file
async def list_tm_files(
self,
db: AsyncSession,
client_id: UUID,
locale_code: str | None = None,
channel: str | None = None,
) -> list[TMFileRegistry]:
"""List TM files for a client with optional filters."""
query = select(TMFileRegistry).where(TMFileRegistry.client_id == client_id)
if locale_code:
query = query.where(TMFileRegistry.locale_code == locale_code)
if channel:
query = query.where(TMFileRegistry.channel == channel)
result = await db.execute(query.order_by(TMFileRegistry.uploaded_at.desc()))
return list(result.scalars().all())
async def list_reference_files(
self,
db: AsyncSession,
client_id: UUID,
file_type: ReferenceFileType | None = None,
locale_scope: str | None = None,
) -> list[ReferenceFile]:
"""List reference files for a client with optional filters."""
query = select(ReferenceFile).where(ReferenceFile.client_id == client_id)
if file_type:
query = query.where(ReferenceFile.file_type == file_type)
if locale_scope:
query = query.where(ReferenceFile.locale_scope == locale_scope)
result = await db.execute(query.order_by(ReferenceFile.uploaded_at.desc()))
return list(result.scalars().all())
def get_file_path(self, stored_path: str) -> Path | None:
"""Resolve a stored file path and verify it exists."""
path = Path(stored_path)
if path.exists():
return path
return None
async def delete_tm_file(
self, db: AsyncSession, file_id: UUID
) -> bool:
"""Delete a TM file from storage and database."""
result = await db.execute(
select(TMFileRegistry).where(TMFileRegistry.id == file_id)
)
tm_file = result.scalar_one_or_none()
if tm_file is None:
return False
# Remove from filesystem
file_path = Path(tm_file.file_path)
if file_path.exists():
os.remove(file_path)
await db.delete(tm_file)
await db.flush()
return True
async def delete_reference_file(
self, db: AsyncSession, file_id: UUID
) -> bool:
"""Delete a reference file from storage and database."""
result = await db.execute(
select(ReferenceFile).where(ReferenceFile.id == file_id)
)
ref_file = result.scalar_one_or_none()
if ref_file is None:
return False
file_path = Path(ref_file.file_path)
if file_path.exists():
os.remove(file_path)
await db.delete(ref_file)
await db.flush()
return True
def validate_file_extension(
self, filename: str, allowed_extensions: list[str]
) -> bool:
"""Validate that a file has an allowed extension."""
ext = Path(filename).suffix.lower()
return ext in allowed_extensions