Full-stack Amazon AI Transcreation Platform with: - FastAPI backend (async, PostgreSQL, Redis, Celery) with 11 DB tables - JWT auth (SSO-ready abstract provider pattern) - 6-agent pipeline orchestrator with deterministic modules - Next.js 14 frontend with Amazon branding (Ember fonts, orange/dark theme) - Job wizard, monitoring HUD, output review, admin screens - 154 TM/reference files imported, 12 locales configured - Docker Compose for all services Agents 2-5 (TM retrieval, ranker, transcreator, compliance) are stubs pending Phase 3 LLM integration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
234 lines
7.5 KiB
Python
234 lines
7.5 KiB
Python
import os
|
|
import shutil
|
|
from pathlib import Path
|
|
from typing import BinaryIO
|
|
from uuid import UUID, uuid4
|
|
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.config import settings
|
|
from app.models.files import ReferenceFile, ReferenceFileType, TMFileRegistry
|
|
from app.models.source import SourceLine
|
|
from app.pipeline.modules.source_file_parser import parse_source_file
|
|
|
|
|
|
class FileService:
|
|
"""Service for file upload, download, path resolution, and storage management."""
|
|
|
|
def __init__(self) -> None:
|
|
self.storage_root = Path(settings.STORAGE_ROOT)
|
|
|
|
def _resolve_path(self, *parts: str) -> Path:
|
|
"""Resolve a storage path and ensure parent directories exist."""
|
|
path = self.storage_root.joinpath(*parts)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
return path
|
|
|
|
async def upload_source_file(
|
|
self,
|
|
db: AsyncSession,
|
|
job_id: UUID,
|
|
file: BinaryIO,
|
|
filename: str,
|
|
) -> list[SourceLine]:
|
|
"""Upload and parse a source xlsx file, creating SourceLine records."""
|
|
# Save to storage
|
|
file_path = self._resolve_path("jobs", str(job_id), "source", filename)
|
|
with open(file_path, "wb") as f:
|
|
shutil.copyfileobj(file, f)
|
|
|
|
# Parse the xlsx
|
|
parsed_lines = parse_source_file(str(file_path))
|
|
|
|
# Delete existing source lines for this job
|
|
existing = await db.execute(
|
|
select(SourceLine).where(SourceLine.job_id == job_id)
|
|
)
|
|
for line in existing.scalars().all():
|
|
await db.delete(line)
|
|
|
|
# Create new source lines
|
|
source_lines = []
|
|
for i, row in enumerate(parsed_lines):
|
|
source_line = SourceLine(
|
|
job_id=job_id,
|
|
row_order=i + 1,
|
|
en_gb=row["en_gb"],
|
|
copy_type=row.get("copy_type"),
|
|
creative_guidance=row.get("creative_guidance"),
|
|
visual_ref=row.get("visual_ref"),
|
|
char_limit=row.get("char_limit"),
|
|
is_display_format=row.get("is_display_format", False),
|
|
)
|
|
db.add(source_line)
|
|
source_lines.append(source_line)
|
|
|
|
await db.flush()
|
|
return source_lines
|
|
|
|
async def upload_supplementary_file(
|
|
self,
|
|
db: AsyncSession,
|
|
job_id: UUID,
|
|
file: BinaryIO,
|
|
filename: str,
|
|
) -> str:
|
|
"""Upload a supplementary file (TM, glossary, etc.) for a job."""
|
|
file_path = self._resolve_path("jobs", str(job_id), "supplementary", filename)
|
|
with open(file_path, "wb") as f:
|
|
shutil.copyfileobj(file, f)
|
|
return str(file_path)
|
|
|
|
async def upload_tm_file(
|
|
self,
|
|
db: AsyncSession,
|
|
client_id: UUID,
|
|
locale_code: str,
|
|
channel: str,
|
|
file: BinaryIO,
|
|
filename: str,
|
|
uploaded_by: UUID | None = None,
|
|
) -> TMFileRegistry:
|
|
"""Upload a TM file and create a registry entry."""
|
|
file_path = self._resolve_path(
|
|
"clients", str(client_id), "tm", locale_code, filename
|
|
)
|
|
with open(file_path, "wb") as f:
|
|
shutil.copyfileobj(file, f)
|
|
|
|
# Count segments (lines in JSONL)
|
|
segment_count = 0
|
|
with open(file_path, "r") as f:
|
|
for line in f:
|
|
if line.strip():
|
|
segment_count += 1
|
|
|
|
tm_file = TMFileRegistry(
|
|
client_id=client_id,
|
|
locale_code=locale_code,
|
|
channel=channel,
|
|
filename=filename,
|
|
file_path=str(file_path),
|
|
segment_count=segment_count,
|
|
uploaded_by=uploaded_by,
|
|
)
|
|
db.add(tm_file)
|
|
await db.flush()
|
|
return tm_file
|
|
|
|
async def upload_reference_file(
|
|
self,
|
|
db: AsyncSession,
|
|
client_id: UUID,
|
|
file_type: ReferenceFileType,
|
|
locale_scope: str,
|
|
file: BinaryIO,
|
|
filename: str,
|
|
uploaded_by: UUID | None = None,
|
|
) -> ReferenceFile:
|
|
"""Upload a reference file and create a registry entry."""
|
|
file_path = self._resolve_path(
|
|
"clients", str(client_id), "reference", file_type.value, filename
|
|
)
|
|
with open(file_path, "wb") as f:
|
|
shutil.copyfileobj(file, f)
|
|
|
|
ref_file = ReferenceFile(
|
|
client_id=client_id,
|
|
file_type=file_type,
|
|
locale_scope=locale_scope,
|
|
filename=filename,
|
|
file_path=str(file_path),
|
|
uploaded_by=uploaded_by,
|
|
)
|
|
db.add(ref_file)
|
|
await db.flush()
|
|
return ref_file
|
|
|
|
async def list_tm_files(
|
|
self,
|
|
db: AsyncSession,
|
|
client_id: UUID,
|
|
locale_code: str | None = None,
|
|
channel: str | None = None,
|
|
) -> list[TMFileRegistry]:
|
|
"""List TM files for a client with optional filters."""
|
|
query = select(TMFileRegistry).where(TMFileRegistry.client_id == client_id)
|
|
if locale_code:
|
|
query = query.where(TMFileRegistry.locale_code == locale_code)
|
|
if channel:
|
|
query = query.where(TMFileRegistry.channel == channel)
|
|
|
|
result = await db.execute(query.order_by(TMFileRegistry.uploaded_at.desc()))
|
|
return list(result.scalars().all())
|
|
|
|
async def list_reference_files(
|
|
self,
|
|
db: AsyncSession,
|
|
client_id: UUID,
|
|
file_type: ReferenceFileType | None = None,
|
|
locale_scope: str | None = None,
|
|
) -> list[ReferenceFile]:
|
|
"""List reference files for a client with optional filters."""
|
|
query = select(ReferenceFile).where(ReferenceFile.client_id == client_id)
|
|
if file_type:
|
|
query = query.where(ReferenceFile.file_type == file_type)
|
|
if locale_scope:
|
|
query = query.where(ReferenceFile.locale_scope == locale_scope)
|
|
|
|
result = await db.execute(query.order_by(ReferenceFile.uploaded_at.desc()))
|
|
return list(result.scalars().all())
|
|
|
|
def get_file_path(self, stored_path: str) -> Path | None:
|
|
"""Resolve a stored file path and verify it exists."""
|
|
path = Path(stored_path)
|
|
if path.exists():
|
|
return path
|
|
return None
|
|
|
|
async def delete_tm_file(
|
|
self, db: AsyncSession, file_id: UUID
|
|
) -> bool:
|
|
"""Delete a TM file from storage and database."""
|
|
result = await db.execute(
|
|
select(TMFileRegistry).where(TMFileRegistry.id == file_id)
|
|
)
|
|
tm_file = result.scalar_one_or_none()
|
|
if tm_file is None:
|
|
return False
|
|
|
|
# Remove from filesystem
|
|
file_path = Path(tm_file.file_path)
|
|
if file_path.exists():
|
|
os.remove(file_path)
|
|
|
|
await db.delete(tm_file)
|
|
await db.flush()
|
|
return True
|
|
|
|
async def delete_reference_file(
|
|
self, db: AsyncSession, file_id: UUID
|
|
) -> bool:
|
|
"""Delete a reference file from storage and database."""
|
|
result = await db.execute(
|
|
select(ReferenceFile).where(ReferenceFile.id == file_id)
|
|
)
|
|
ref_file = result.scalar_one_or_none()
|
|
if ref_file is None:
|
|
return False
|
|
|
|
file_path = Path(ref_file.file_path)
|
|
if file_path.exists():
|
|
os.remove(file_path)
|
|
|
|
await db.delete(ref_file)
|
|
await db.flush()
|
|
return True
|
|
|
|
def validate_file_extension(
|
|
self, filename: str, allowed_extensions: list[str]
|
|
) -> bool:
|
|
"""Validate that a file has an allowed extension."""
|
|
ext = Path(filename).suffix.lower()
|
|
return ext in allowed_extensions
|