amazon-transcreation/backend/app/models/job.py
DJP d5fa4e49f7 Fix markdown table parser losing backtranslations/rationales, add model selection, update help page
The V25 table has duplicate column names (Backtranslation x3, Rationale x3).
The dict-based parser collapsed these — only the last value survived (Option 3's
"N/A"), causing all BT/rationale fields to be "N/A" in the output Excel.

Fixed by switching to positional list-based parsing instead of dicts.

Also adds per-job model selection (Sonnet 4.6 / Opus 4.6) through the full
stack: DB column, API schema, job wizard UI dropdown, pipeline contracts, and
LLM client with model-aware cost tracking. Includes Alembic migration.

Updated help page and README to reflect single-agent pipeline, multi-TM
selection, flat locale grid, model selector, and linguistic summary.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-14 12:40:17 -04:00

147 lines
5 KiB
Python

import enum
import uuid
from datetime import datetime
from sqlalchemy import (
JSON,
DateTime,
Enum,
Float,
ForeignKey,
Integer,
String,
Text,
)
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.models.base import Base, TimestampMixin, generate_uuid
class Programme(str, enum.Enum):
retail = "retail"
prime = "prime"
brand = "brand"
class JobType(str, enum.Enum):
main = "main"
derived = "derived"
class JobStatus(str, enum.Enum):
created = "created"
validating = "validating"
queued = "queued"
running = "running"
partial_complete = "partial_complete"
complete = "complete"
error = "error"
exported = "exported"
class LocaleType(str, enum.Enum):
main = "main"
derived = "derived"
class LocaleStatus(str, enum.Enum):
queued = "queued"
running = "running"
complete = "complete"
error = "error"
class Job(Base, TimestampMixin):
__tablename__ = "jobs"
id: Mapped[uuid.UUID] = mapped_column(
primary_key=True, default=generate_uuid
)
client_id: Mapped[uuid.UUID] = mapped_column(
ForeignKey("clients.id", ondelete="CASCADE"), nullable=False
)
created_by: Mapped[uuid.UUID] = mapped_column(
ForeignKey("users.id"), nullable=False
)
job_ref: Mapped[str | None] = mapped_column(String(100), nullable=True)
campaign_name: Mapped[str] = mapped_column(String(255), nullable=False)
programme: Mapped[Programme] = mapped_column(
Enum(Programme, name="programme_type", create_constraint=True),
nullable=False,
)
channel: Mapped[str] = mapped_column(String(100), nullable=False)
sub_channel: Mapped[str | None] = mapped_column(String(100), nullable=True)
tm_channels: Mapped[list | None] = mapped_column(JSON, nullable=True)
llm_model: Mapped[str | None] = mapped_column(String(100), nullable=True)
context_prompt: Mapped[str | None] = mapped_column(Text, nullable=True)
job_type: Mapped[JobType] = mapped_column(
Enum(JobType, name="job_type", create_constraint=True),
default=JobType.main,
nullable=False,
)
parent_job_id: Mapped[uuid.UUID | None] = mapped_column(
ForeignKey("jobs.id"), nullable=True
)
status: Mapped[JobStatus] = mapped_column(
Enum(JobStatus, name="job_status", create_constraint=True),
default=JobStatus.created,
nullable=False,
)
total_token_usage: Mapped[int] = mapped_column(Integer, default=0)
total_estimated_cost: Mapped[float] = mapped_column(Float, default=0.0)
# Relationships
client = relationship("Client", back_populates="jobs")
creator = relationship("User", back_populates="jobs_created")
parent_job = relationship("Job", remote_side="Job.id", lazy="selectin")
locale_instances = relationship(
"LocaleInstance", back_populates="job", lazy="selectin", cascade="all, delete-orphan"
)
source_lines = relationship(
"SourceLine", back_populates="job", lazy="selectin", cascade="all, delete-orphan"
)
class LocaleInstance(Base, TimestampMixin):
__tablename__ = "locale_instances"
id: Mapped[uuid.UUID] = mapped_column(
primary_key=True, default=generate_uuid
)
job_id: Mapped[uuid.UUID] = mapped_column(
ForeignKey("jobs.id", ondelete="CASCADE"), nullable=False
)
locale_code: Mapped[str] = mapped_column(String(10), nullable=False)
locale_type: Mapped[LocaleType] = mapped_column(
Enum(LocaleType, name="locale_type", create_constraint=True),
nullable=False,
)
status: Mapped[LocaleStatus] = mapped_column(
Enum(LocaleStatus, name="locale_status", create_constraint=True),
default=LocaleStatus.queued,
nullable=False,
)
started_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)
completed_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)
token_usage: Mapped[int] = mapped_column(Integer, default=0)
estimated_cost: Mapped[float] = mapped_column(Float, default=0.0)
output_file_path: Mapped[str | None] = mapped_column(String(500), nullable=True)
error_log: Mapped[str | None] = mapped_column(Text, nullable=True)
tm_files_loaded: Mapped[dict | None] = mapped_column(JSON, nullable=True)
ref_files_loaded: Mapped[dict | None] = mapped_column(JSON, nullable=True)
agent_version: Mapped[str | None] = mapped_column(String(50), nullable=True)
progress: Mapped[float] = mapped_column(Float, default=0.0, nullable=False, server_default="0")
current_stage: Mapped[str | None] = mapped_column(String(100), nullable=True)
# Relationships
job = relationship("Job", back_populates="locale_instances")
output_rows = relationship(
"OutputRow", back_populates="instance", lazy="selectin", cascade="all, delete-orphan"
)
token_usage_logs = relationship(
"TokenUsageLog", back_populates="instance", lazy="selectin", cascade="all, delete-orphan"
)