290 lines
12 KiB
Python
290 lines
12 KiB
Python
from datetime import datetime
|
|
from enum import Enum
|
|
from typing import Any, Literal
|
|
|
|
from pydantic import BaseModel, Field, constr
|
|
|
|
FailureStep = Literal["ingestion", "ai_processing", "translation", "tts", "render"]
|
|
|
|
|
|
class JobStatus(str, Enum):
|
|
CREATED = "created"
|
|
INGESTING = "ingesting"
|
|
AI_PROCESSING = "ai_processing"
|
|
PENDING_QC = "pending_qc"
|
|
APPROVED_ENGLISH = "approved_english" # For English source videos
|
|
APPROVED_SOURCE = "approved_source" # For non-English source videos
|
|
REJECTED = "rejected"
|
|
QC_FEEDBACK = "qc_feedback"
|
|
TRANSLATING = "translating"
|
|
TTS_GENERATING = "tts_generating"
|
|
TTS_FAILED = "tts_failed" # legacy: use PROCESSING_FAILED + failure.step="tts" for new failures
|
|
RENDERING_VIDEO = "rendering_video" # Accessible video rendering in progress
|
|
RENDER_FAILED = "render_failed" # legacy: use PROCESSING_FAILED + failure.step="render" for new failures
|
|
PROCESSING_FAILED = "processing_failed" # unified failure status; see Job.failure for step details
|
|
RENDERING_QC = "rendering_qc" # Re-rendering accessible video during QC review
|
|
PENDING_FINAL_REVIEW = "pending_final_review"
|
|
COMPLETED = "completed"
|
|
|
|
@classmethod
|
|
def is_approved(cls, status: str) -> bool:
|
|
"""Check if status indicates source approval (any language)"""
|
|
return status in [cls.APPROVED_ENGLISH.value, cls.APPROVED_SOURCE.value]
|
|
|
|
|
|
class JobFailure(BaseModel):
|
|
step: FailureStep
|
|
type: str
|
|
message: str
|
|
retriable: bool = True
|
|
occurred_at: datetime
|
|
retry_count: int = 0
|
|
|
|
|
|
class Source(BaseModel):
|
|
filename: str
|
|
original_filename: str | None = None
|
|
gcs_uri: str
|
|
duration_s: float | None = None
|
|
language: constr(min_length=2, max_length=10) = "en" # Final source language (from detection or explicit)
|
|
language_hint: str | None = None # User-provided hint for non-English videos
|
|
detected_language: str | None = None # AI-detected language from Gemini
|
|
|
|
|
|
class TTSPreferences(BaseModel):
|
|
"""TTS voice preferences for audio description generation"""
|
|
provider: Literal["gemini", "google", "elevenlabs"] = "gemini"
|
|
default_voice: str = "Kore" # Default Gemini voice
|
|
voices_per_language: dict[str, str] = {} # {"en": "Kore", "es": "Aoede"}
|
|
# TTS quality and style settings
|
|
model: Literal["flash", "pro"] = "flash" # flash = fast/cheap, pro = higher quality
|
|
speed: float = Field(default=1.0, ge=0.5, le=2.0) # Speech rate multiplier
|
|
style_preset: Literal[
|
|
"neutral", "calm", "energetic", "professional", "warm", "documentary", "custom"
|
|
] = "neutral"
|
|
custom_style_prompt: str | None = None # Used when style_preset is "custom"
|
|
# ElevenLabs-specific settings
|
|
stability: float | None = None # 0.0-1.0, default 0.5 when used
|
|
similarity_boost: float | None = None # 0.0-1.0, default 0.5 when used
|
|
|
|
|
|
class RequestedOutputs(BaseModel):
|
|
captions_vtt: bool = True
|
|
audio_description_vtt: bool = True
|
|
audio_description_mp3: bool = True
|
|
accessible_video_mp4: bool = False # Rendered video with embedded audio descriptions
|
|
accessible_video_method: Literal["overlay", "pause_insert"] | None = None # User-selected method
|
|
sdh_vtt: bool = False # SDH (Subtitles for Deaf and Hard of Hearing) captions with speaker labels, sound effects, music notation
|
|
languages: list[str] = []
|
|
transcreation: list[str] = []
|
|
tts_preferences: TTSPreferences | None = None
|
|
translation_mode: Literal["traditional", "video_native"] = "video_native"
|
|
|
|
|
|
class PausePointData(BaseModel):
|
|
"""Pause point timing data for accessible video editing during QC."""
|
|
cue_index: int # AD cue index this pause point belongs to
|
|
original_ms: float # Rendered timeline position (ms) - for UI display
|
|
source_ms: float | None = None # Source video cut point (ms) - for re-rendering (None = use original_ms)
|
|
adjusted_ms: float | None = None # User-adjusted timestamp (ms), None = use original
|
|
min_bound_ms: float # Minimum allowed value (end of previous AD segment)
|
|
max_bound_ms: float # Maximum allowed value (start of next AD segment)
|
|
|
|
|
|
class VideoSegmentMetadata(BaseModel):
|
|
"""Metadata for a video segment between pause points."""
|
|
segment_index: int # 0-based segment index
|
|
start_ms: float # Start timestamp in source video (ms)
|
|
end_ms: float # End timestamp in source video (ms)
|
|
gcs_uri: str # GCS path to segment MP4
|
|
duration_ms: float # Actual segment duration (ms)
|
|
is_freeze_frame: bool = False # True if this is a freeze frame segment with AD audio
|
|
cue_index: int | None = None # AD cue index (only for freeze frame segments)
|
|
|
|
|
|
class TTSRegenerationRequest(BaseModel):
|
|
"""Request to regenerate TTS for a specific cue during QC."""
|
|
cue_index: int
|
|
requested_at: datetime
|
|
new_text: str | None = None # If provided, use this text instead of current VTT
|
|
status: Literal["pending", "processing", "completed", "failed"] = "pending"
|
|
error_message: str | None = None
|
|
|
|
|
|
class AccessibleVideoEditState(BaseModel):
|
|
"""Editable state for accessible video during QC review."""
|
|
pause_points: list[PausePointData] = []
|
|
video_segments: list[VideoSegmentMetadata] = []
|
|
tts_regeneration_queue: list[TTSRegenerationRequest] = []
|
|
last_render_at: datetime | None = None
|
|
whisper_refine_enabled: bool = False # Default: off (user enables if cue positions changed)
|
|
|
|
|
|
class LangOutput(BaseModel):
|
|
captions_vtt_gcs: str | None = None
|
|
sdh_captions_vtt_gcs: str | None = None # SDH-format captions (speaker labels, sound effects, music)
|
|
ad_vtt_gcs: str | None = None
|
|
ad_mp3_gcs: str | None = None
|
|
# Accessible video outputs
|
|
accessible_video_gcs: str | None = None # Rendered accessible MP4
|
|
accessible_video_method: Literal["overlay", "pause_insert"] | None = None
|
|
retimed_captions_vtt_gcs: str | None = None # Re-timed captions for pause-insert method
|
|
ad_cues_gcs_prefix: str | None = None # GCS path prefix for per-cue MP3 segments
|
|
ad_cue_manifest: list[dict] | None = None # Per-cue manifest: [{cue_index, gcs_uri, text, duration_s}]
|
|
# QC editing state for accessible video
|
|
video_segments_gcs_prefix: str | None = None # GCS prefix for persisted video segments
|
|
accessible_video_edit_state: AccessibleVideoEditState | None = None
|
|
origin: Literal["translate", "transcreate", "gemini_translate", "video_native"] | None = None
|
|
qa_notes: str | None = None
|
|
descriptive_transcript_gcs: str | None = None # WCAG-compliant combined speech+description transcript
|
|
|
|
|
|
class ReviewHistoryItem(BaseModel):
|
|
at: datetime
|
|
status: str
|
|
by: str | None = None
|
|
notes: str | None = None
|
|
|
|
|
|
class Review(BaseModel):
|
|
notes: str | None = ""
|
|
reviewer_id: str | None = None
|
|
history: list[ReviewHistoryItem] = []
|
|
|
|
|
|
# ── Per-language QC ───────────────────────────────────────────────────────────
|
|
|
|
class LanguageQCStatus(str, Enum):
|
|
PENDING = "pending"
|
|
IN_PROGRESS = "in_progress" # linguist is working
|
|
PENDING_REVIEW = "pending_review" # linguist submitted, awaiting reviewer
|
|
IN_REVIEW = "in_review" # reviewer has opened it
|
|
APPROVED = "approved"
|
|
REJECTED = "rejected"
|
|
|
|
|
|
class LanguageQCEvent(BaseModel):
|
|
at: datetime
|
|
actor_user_id: str
|
|
actor_email: str
|
|
action: Literal[
|
|
"assign", "reassign",
|
|
"reviewer_assigned", "reviewer_reassigned",
|
|
"start_work", "submit_for_review", "open_review",
|
|
"approve", "reject", "reopen",
|
|
"comment_added",
|
|
]
|
|
notes: str | None = None
|
|
previous_assignee_id: str | None = None
|
|
|
|
|
|
class LanguageQCComment(BaseModel):
|
|
id: str
|
|
author_id: str
|
|
author_name: str
|
|
author_email: str
|
|
body: str
|
|
created_at: datetime
|
|
|
|
|
|
class LanguageQCState(BaseModel):
|
|
status: LanguageQCStatus = LanguageQCStatus.PENDING
|
|
# Linguist slot
|
|
assigned_linguist_id: str | None = None
|
|
assigned_linguist_email: str | None = None
|
|
assigned_linguist_name: str | None = None
|
|
assigned_at: datetime | None = None
|
|
assigned_by_user_id: str | None = None
|
|
submitted_for_review_at: datetime | None = None
|
|
linguist_deadline: datetime | None = None # when linguist must submit
|
|
# Reviewer slot
|
|
assigned_reviewer_id: str | None = None
|
|
assigned_reviewer_email: str | None = None
|
|
assigned_reviewer_name: str | None = None
|
|
assigned_reviewer_at: datetime | None = None
|
|
review_started_at: datetime | None = None
|
|
reviewer_deadline: datetime | None = None # when reviewer must decide
|
|
# Reviewer progress
|
|
total_cues: int | None = None # set when reviewer opens the job
|
|
reviewed_cues: int = 0 # incremented as reviewer marks cues reviewed
|
|
# Final outcome
|
|
reviewed_at: datetime | None = None
|
|
reviewed_by_user_id: str | None = None
|
|
reviewed_by_email: str | None = None
|
|
notes: str | None = None
|
|
reject_category: str | None = None # e.g. timing/mistranslation/terminology/profanity/length
|
|
history: list[LanguageQCEvent] = []
|
|
comments: list[LanguageQCComment] = []
|
|
|
|
|
|
class QCAssignment(BaseModel):
|
|
"""Denormalized for efficient per-linguist queue queries."""
|
|
lang: str
|
|
linguist_id: str
|
|
status: LanguageQCStatus
|
|
|
|
|
|
class AISection(BaseModel):
|
|
ingestion_json: dict[str, Any] | None = None
|
|
confidence: float | None = None
|
|
|
|
|
|
class AccessibleVideoProgressItem(BaseModel):
|
|
"""Progress tracking for accessible video rendering per language."""
|
|
status: Literal["pending", "rendering", "completed", "failed"] = "pending"
|
|
method: Literal["overlay", "pause_insert"] | None = None
|
|
error_message: str | None = None
|
|
started_at: datetime | None = None
|
|
completed_at: datetime | None = None
|
|
|
|
|
|
class Job(BaseModel):
|
|
id: str | None = Field(None, alias="_id")
|
|
client_id: str
|
|
title: str
|
|
source: Source
|
|
requested_outputs: RequestedOutputs
|
|
status: JobStatus = JobStatus.CREATED
|
|
review: Review = Review()
|
|
outputs: dict[str, LangOutput] | None = None
|
|
accessible_video_progress: dict[str, AccessibleVideoProgressItem] | None = None
|
|
ai: AISection | None = None
|
|
error: dict[str, Any] | None = None
|
|
failure: JobFailure | None = None # structured failure info; see failure.step for pipeline stage
|
|
retry_count: int = 0 # total number of manual retries attempted
|
|
tts_rewrites: list[dict[str, Any]] | None = None # Track auto-rewritten TTS cues
|
|
project_id: str | None = None # Platform project this job belongs to (Client → Project → Job)
|
|
organization_id: str | None = None # org-tenant ID; backfilled by 2026-04-28-000003 migration
|
|
brief_id: str | None = None # JobBrief that originated this job (W-12)
|
|
gcs_prefix: str | None = None # GCS path prefix; None = legacy flat {job_id}/ layout
|
|
initial_linguist_id: str | None = None
|
|
initial_reviewer_id: str | None = None
|
|
brand_context: str | None = None # Brand names present in the video for accurate product identification
|
|
cost_tracker_project_id: str | None = None # External project ID for AI cost attribution
|
|
deadline: datetime | None = None # job-level PM deadline (overdue if past and not completed)
|
|
language_qc: dict[str, LanguageQCState] = {} # per-language QC state, keyed by lang code
|
|
qc_assignments: list[QCAssignment] = [] # denormalized for linguist-queue queries
|
|
created_at: datetime | None = None
|
|
updated_at: datetime | None = None
|
|
|
|
class Config:
|
|
populate_by_name = True
|
|
use_enum_values = True
|
|
|
|
|
|
class JobCreate(BaseModel):
|
|
title: str
|
|
source_is_english: bool = True # True = English source, False = other language (auto-detect)
|
|
language_hint: str | None = None # Optional hint when source_is_english=False
|
|
requested_outputs: RequestedOutputs
|
|
brand_context: str | None = None # Comma-separated brand names present in the video (e.g. "Sellotape, Coca-Cola")
|
|
|
|
|
|
class JobUpdate(BaseModel):
|
|
title: str | None = None
|
|
status: JobStatus | None = None
|
|
review: Review | None = None
|
|
outputs: dict[str, LangOutput] | None = None
|
|
ai: AISection | None = None
|
|
error: dict[str, Any] | None = None
|
|
deadline: datetime | None = None
|