video-accessibility/backend/app/services/language_qc.py
Vadym Samoilenko 31199f8705 chore: push all session changes — backend hardening, tests, apache config, deploy scripts
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-30 15:52:14 +01:00

1148 lines
44 KiB
Python

"""Per-language QC service — two-stage (linguist → reviewer) assignment, approval, rejection, comments."""
import asyncio
from datetime import datetime
from typing import Any
from uuid import uuid4
from fastapi import HTTPException
from motor.motor_asyncio import AsyncIOMotorDatabase
from ..core.logging import get_logger
from ..models.audit_log import AuditAction, AuditLogSeverity
from ..models.job import (
JobStatus,
LanguageQCComment,
LanguageQCEvent,
LanguageQCState,
LanguageQCStatus,
)
from ..models.user import User, UserRole
from ..services.audit_logger import audit_logger
from ..services.websocket import connection_manager
logger = get_logger(__name__)
_JOBS = "jobs"
async def _assert_user_in_job_org(
db: AsyncIOMotorDatabase,
user_id: str,
job_doc: dict,
) -> None:
"""Raise 403 if user_id is not a member of the job's organization."""
org_id = job_doc.get("organization_id")
if not org_id:
project_id = job_doc.get("project_id")
if project_id:
project = await db.projects.find_one({"_id": project_id}, {"client_id": 1})
if project:
org_id = project.get("client_id")
if not org_id:
raise HTTPException(
status_code=422,
detail="Job is not bound to an organization; cannot validate cross-org assignment",
)
mem = await db.memberships.find_one({"user_id": user_id, "organization_id": org_id})
if not mem:
raise HTTPException(
status_code=403,
detail="Assignee is not a member of this job's organization",
)
# ── Helpers ───────────────────────────────────────────────────────────────────
def _job_languages(job_doc: dict) -> list[str]:
"""Canonical set of language codes that must be QC-approved before final review."""
source_lang = job_doc.get("source", {}).get("language", "en")
requested = job_doc.get("requested_outputs", {}).get("languages", [])
langs = list({source_lang} | set(requested))
return langs
def _all_approved(job_doc: dict) -> bool:
lang_qc = job_doc.get("language_qc", {})
for lang in _job_languages(job_doc):
state = lang_qc.get(lang, {})
if state.get("status") != LanguageQCStatus.APPROVED.value:
return False
return True
def _any_rejected(job_doc: dict) -> bool:
lang_qc = job_doc.get("language_qc", {})
for lang in _job_languages(job_doc):
state = lang_qc.get(lang, {})
if state.get("status") == LanguageQCStatus.REJECTED.value:
return True
return False
def _rebuild_qc_assignments(language_qc: dict) -> list[dict]:
"""Rebuild the denormalized qc_assignments array from language_qc dict."""
assignments = []
for lang, state in language_qc.items():
linguist_id = state.get("assigned_linguist_id") if isinstance(state, dict) else state.assigned_linguist_id
qc_status = state.get("status") if isinstance(state, dict) else state.status
if linguist_id:
assignments.append({
"lang": lang,
"linguist_id": linguist_id,
"status": qc_status,
})
return assignments
def _qc_recipients(
job_doc: dict,
lang_state: dict,
exclude_user_id: str | None,
) -> list[tuple[str, str]]:
"""Return [(email, full_name)] for linguist + reviewer assigned to a language, minus the actor."""
seen: set[str] = set()
result: list[tuple[str, str]] = []
def _add(email: str | None, name: str | None) -> None:
if email and email not in seen and email != exclude_user_id:
seen.add(email)
result.append((email, name or email.split("@")[0]))
_add(lang_state.get("assigned_linguist_email"), lang_state.get("assigned_linguist_name"))
_add(lang_state.get("assigned_reviewer_email"), lang_state.get("assigned_reviewer_name"))
return result
def _deep_link(job_id: str, lang: str) -> str:
from ..core.config import settings
base = getattr(settings, "app_url", "https://ai-sandbox.oliver.solutions/video-accessibility")
return f"{base}/admin/qc/{job_id}#lang-{lang}"
# ── Auto-assignment ───────────────────────────────────────────────────────────
async def auto_assign_defaults(db: AsyncIOMotorDatabase, job_id: str) -> int:
"""Apply job.initial_linguist_id / initial_reviewer_id to all unassigned languages.
Called lazily when the language-QC map is first fetched in PENDING_QC state,
so PM assignments made at job-creation time take effect without touching Celery tasks.
Returns the number of languages updated.
"""
job_doc = await db[_JOBS].find_one({"_id": job_id})
if not job_doc:
return 0
linguist_id: str | None = job_doc.get("initial_linguist_id")
reviewer_id: str | None = job_doc.get("initial_reviewer_id")
if not linguist_id and not reviewer_id:
return 0
languages: list[str] = (job_doc.get("requested_outputs") or {}).get("languages") or []
if not languages:
return 0
linguist_doc = await db.users.find_one({"_id": linguist_id}) if linguist_id else None
reviewer_doc = await db.users.find_one({"_id": reviewer_id}) if reviewer_id else None
now = datetime.utcnow()
updated = 0
current_qc: dict = job_doc.get("language_qc") or {}
for lang in languages:
lang_state: dict = current_qc.get(lang) or {}
already_assigned = bool(lang_state.get("assigned_linguist_id"))
if already_assigned:
continue
patch: dict = {}
if linguist_doc:
patch.update({
f"language_qc.{lang}.assigned_linguist_id": linguist_id,
f"language_qc.{lang}.assigned_linguist_email": linguist_doc["email"],
f"language_qc.{lang}.assigned_linguist_name": linguist_doc.get("full_name", ""),
f"language_qc.{lang}.assigned_at": now,
f"language_qc.{lang}.assigned_by_user_id": "system",
f"language_qc.{lang}.status": lang_state.get("status", LanguageQCStatus.PENDING.value),
})
if reviewer_doc:
patch.update({
f"language_qc.{lang}.assigned_reviewer_id": reviewer_id,
f"language_qc.{lang}.assigned_reviewer_email": reviewer_doc["email"],
f"language_qc.{lang}.assigned_reviewer_name": reviewer_doc.get("full_name", ""),
})
if patch:
await db[_JOBS].update_one({"_id": job_id}, {"$set": patch})
updated += 1
if updated:
logger.info("auto_assign_defaults: assigned %d languages on job %s", updated, job_id)
return updated
# ── Core mutations ────────────────────────────────────────────────────────────
async def get_state(db: AsyncIOMotorDatabase, job_id: str, lang: str) -> LanguageQCState | None:
job_doc = await db[_JOBS].find_one({"_id": job_id}, {f"language_qc.{lang}": 1})
if not job_doc:
return None
raw = job_doc.get("language_qc", {}).get(lang)
if raw is None:
return None
return LanguageQCState(**raw)
async def get_all_states(db: AsyncIOMotorDatabase, job_id: str) -> dict[str, LanguageQCState]:
job_doc = await db[_JOBS].find_one({"_id": job_id}, {"language_qc": 1})
if not job_doc:
return {}
result = {}
for lang, raw in (job_doc.get("language_qc") or {}).items():
result[lang] = LanguageQCState(**raw)
return result
# ── Linguist assignment ────────────────────────────────────────────────────────
async def assign_linguist(
db: AsyncIOMotorDatabase,
job_id: str,
lang: str,
linguist_user_id: str,
actor: User,
*,
http_request=None,
notes: str | None = None,
deadline: datetime | None = None,
) -> LanguageQCState:
"""PM/PROD/ADMIN assigns a linguist to a language. Creates per-lang state if missing."""
job_doc = await db[_JOBS].find_one({"_id": job_id})
if not job_doc:
raise HTTPException(status_code=404, detail="Job not found")
linguist_doc = await db.users.find_one({"_id": linguist_user_id})
if not linguist_doc:
raise HTTPException(status_code=404, detail="Linguist not found")
await _assert_user_in_job_org(db, linguist_user_id, job_doc)
now = datetime.utcnow()
current_state_raw = (job_doc.get("language_qc") or {}).get(lang, {})
prev_assignee = current_state_raw.get("assigned_linguist_id") if isinstance(current_state_raw, dict) else None
is_reassignment = prev_assignee is not None and prev_assignee != linguist_user_id
action_label = "reassign" if is_reassignment else "assign"
event = LanguageQCEvent(
at=now,
actor_user_id=str(actor.id),
actor_email=actor.email,
action=action_label,
notes=notes,
previous_assignee_id=prev_assignee if is_reassignment else None,
)
updated_state = {
**(current_state_raw if isinstance(current_state_raw, dict) else {}),
"status": current_state_raw.get("status", LanguageQCStatus.PENDING.value) if isinstance(current_state_raw, dict) else LanguageQCStatus.PENDING.value,
"assigned_linguist_id": linguist_user_id,
"assigned_linguist_email": linguist_doc["email"],
"assigned_linguist_name": linguist_doc.get("full_name", ""),
"assigned_at": now,
"assigned_by_user_id": str(actor.id),
"linguist_deadline": deadline,
"history": (current_state_raw.get("history", []) if isinstance(current_state_raw, dict) else []) + [event.model_dump()],
}
full_language_qc = {**(job_doc.get("language_qc") or {}), lang: updated_state}
qc_assignments = _rebuild_qc_assignments(full_language_qc)
await db[_JOBS].update_one(
{"_id": job_id},
{"$set": {
f"language_qc.{lang}": updated_state,
"qc_assignments": qc_assignments,
"updated_at": now,
}}
)
audit_action = AuditAction.LANGUAGE_QC_REASSIGN if is_reassignment else AuditAction.LANGUAGE_QC_ASSIGN
await audit_logger.log_action(
audit_action,
f"Language QC linguist {'reassigned' if is_reassignment else 'assigned'}: {lang} on job {job_id}{linguist_doc['email']}",
user=actor,
request=http_request,
resource_type="job_language",
resource_id=f"{job_id}:{lang}",
details={"lang": lang, "linguist_id": linguist_user_id, "linguist_email": linguist_doc["email"]},
)
# Email the new linguist
try:
from ..services.emailer import email_service
await email_service.send_language_assignment_email(
to_email=linguist_doc["email"],
full_name=linguist_doc.get("full_name", ""),
job_title=job_doc.get("title", job_id),
lang=lang,
role="linguist",
deep_link=_deep_link(job_id, lang),
)
except Exception:
logger.exception("Failed to send linguist assignment email")
try:
await connection_manager.broadcast_to_user(
linguist_user_id,
{"type": "language_qc_assigned", "job_id": job_id, "lang": lang, "job_title": job_doc.get("title")},
)
except Exception:
pass
return LanguageQCState(**updated_state)
async def reassign_linguist(
db: AsyncIOMotorDatabase,
job_id: str,
lang: str,
new_linguist_user_id: str,
actor: User,
*,
http_request=None,
notes: str | None = None,
deadline: datetime | None = None,
) -> LanguageQCState:
"""Currently-assigned linguist OR PM/PROD/ADMIN hands off to a colleague."""
job_doc = await db[_JOBS].find_one({"_id": job_id})
if not job_doc:
raise HTTPException(status_code=404, detail="Job not found")
current_state_raw = (job_doc.get("language_qc") or {}).get(lang)
if not current_state_raw:
raise HTTPException(status_code=400, detail=f"No QC state found for language '{lang}'")
current_assignee = current_state_raw.get("assigned_linguist_id") if isinstance(current_state_raw, dict) else None
if current_assignee != str(actor.id):
if actor.role not in (UserRole.PRODUCTION, UserRole.ADMIN, UserRole.PROJECT_MANAGER):
raise HTTPException(status_code=403, detail="Not authorized to reassign this language")
return await assign_linguist(db, job_id, lang, new_linguist_user_id, actor, http_request=http_request, notes=notes, deadline=deadline)
# ── Reviewer assignment ────────────────────────────────────────────────────────
async def assign_reviewer(
db: AsyncIOMotorDatabase,
job_id: str,
lang: str,
reviewer_user_id: str,
actor: User,
*,
http_request=None,
notes: str | None = None,
deadline: datetime | None = None,
) -> LanguageQCState:
"""PM/PROD/ADMIN assigns a reviewer to a language."""
job_doc = await db[_JOBS].find_one({"_id": job_id})
if not job_doc:
raise HTTPException(status_code=404, detail="Job not found")
reviewer_doc = await db.users.find_one({"_id": reviewer_user_id})
if not reviewer_doc:
raise HTTPException(status_code=404, detail="Reviewer not found")
await _assert_user_in_job_org(db, reviewer_user_id, job_doc)
now = datetime.utcnow()
current_state_raw = (job_doc.get("language_qc") or {}).get(lang, {})
prev_reviewer = current_state_raw.get("assigned_reviewer_id") if isinstance(current_state_raw, dict) else None
is_reassignment = prev_reviewer is not None and prev_reviewer != reviewer_user_id
action_label = "reviewer_reassigned" if is_reassignment else "reviewer_assigned"
event = LanguageQCEvent(
at=now,
actor_user_id=str(actor.id),
actor_email=actor.email,
action=action_label,
notes=notes,
previous_assignee_id=prev_reviewer if is_reassignment else None,
)
updated_state = {
**(current_state_raw if isinstance(current_state_raw, dict) else {}),
"assigned_reviewer_id": reviewer_user_id,
"assigned_reviewer_email": reviewer_doc["email"],
"assigned_reviewer_name": reviewer_doc.get("full_name", ""),
"assigned_reviewer_at": now,
"reviewer_deadline": deadline,
"history": (current_state_raw.get("history", []) if isinstance(current_state_raw, dict) else []) + [event.model_dump()],
}
full_language_qc = {**(job_doc.get("language_qc") or {}), lang: updated_state}
qc_assignments = _rebuild_qc_assignments(full_language_qc)
await db[_JOBS].update_one(
{"_id": job_id},
{"$set": {
f"language_qc.{lang}": updated_state,
"qc_assignments": qc_assignments,
"updated_at": now,
}}
)
audit_action = AuditAction.LANGUAGE_QC_REVIEWER_REASSIGN if is_reassignment else AuditAction.LANGUAGE_QC_REVIEWER_ASSIGN
await audit_logger.log_action(
audit_action,
f"Language QC reviewer {'reassigned' if is_reassignment else 'assigned'}: {lang} on job {job_id}{reviewer_doc['email']}",
user=actor,
request=http_request,
resource_type="job_language",
resource_id=f"{job_id}:{lang}",
details={"lang": lang, "reviewer_id": reviewer_user_id, "reviewer_email": reviewer_doc["email"]},
)
try:
from ..services.emailer import email_service
await email_service.send_language_assignment_email(
to_email=reviewer_doc["email"],
full_name=reviewer_doc.get("full_name", ""),
job_title=job_doc.get("title", job_id),
lang=lang,
role="reviewer",
deep_link=_deep_link(job_id, lang),
)
except Exception:
logger.exception("Failed to send reviewer assignment email")
return LanguageQCState(**updated_state)
async def reassign_reviewer(
db: AsyncIOMotorDatabase,
job_id: str,
lang: str,
new_reviewer_user_id: str,
actor: User,
*,
http_request=None,
notes: str | None = None,
deadline: datetime | None = None,
) -> LanguageQCState:
if actor.role not in (UserRole.PRODUCTION, UserRole.ADMIN, UserRole.PROJECT_MANAGER):
raise HTTPException(status_code=403, detail="Only PM/PROD/ADMIN can reassign reviewer")
return await assign_reviewer(db, job_id, lang, new_reviewer_user_id, actor, http_request=http_request, notes=notes, deadline=deadline)
# ── Workflow transitions ──────────────────────────────────────────────────────
async def start_linguist_work(
db: AsyncIOMotorDatabase,
job_id: str,
lang: str,
actor: User,
) -> LanguageQCState:
"""Linguist opens the language — transitions pending → in_progress."""
job_doc = await db[_JOBS].find_one({"_id": job_id})
if not job_doc:
raise HTTPException(status_code=404, detail="Job not found")
current_state_raw = (job_doc.get("language_qc") or {}).get(lang, {})
current_status = current_state_raw.get("status", LanguageQCStatus.PENDING.value) if isinstance(current_state_raw, dict) else LanguageQCStatus.PENDING.value
if current_status not in (LanguageQCStatus.PENDING.value, LanguageQCStatus.REJECTED.value):
return LanguageQCState(**(current_state_raw if isinstance(current_state_raw, dict) else {}))
assigned = current_state_raw.get("assigned_linguist_id") if isinstance(current_state_raw, dict) else None
if assigned != str(actor.id) and actor.role not in (UserRole.PRODUCTION, UserRole.ADMIN):
raise HTTPException(status_code=403, detail="Not the assigned linguist")
now = datetime.utcnow()
event = LanguageQCEvent(at=now, actor_user_id=str(actor.id), actor_email=actor.email, action="start_work")
history = (current_state_raw.get("history", []) if isinstance(current_state_raw, dict) else []) + [event.model_dump()]
updated_state = {
**(current_state_raw if isinstance(current_state_raw, dict) else {}),
"status": LanguageQCStatus.IN_PROGRESS.value,
"submitted_for_review_at": None,
"history": history,
}
await db[_JOBS].update_one(
{"_id": job_id},
{"$set": {
f"language_qc.{lang}.status": LanguageQCStatus.IN_PROGRESS.value,
f"language_qc.{lang}.submitted_for_review_at": None,
f"language_qc.{lang}.history": history,
"updated_at": now,
}}
)
return LanguageQCState(**updated_state)
# Keep old name as alias so any existing callers don't break immediately
start_review = start_linguist_work
async def submit_for_review(
db: AsyncIOMotorDatabase,
job_id: str,
lang: str,
actor: User,
*,
http_request=None,
) -> LanguageQCState:
"""Linguist submits work — transitions in_progress → pending_review."""
job_doc = await db[_JOBS].find_one({"_id": job_id})
if not job_doc:
raise HTTPException(status_code=404, detail="Job not found")
current_state_raw = (job_doc.get("language_qc") or {}).get(lang, {})
current_status = current_state_raw.get("status", LanguageQCStatus.PENDING.value) if isinstance(current_state_raw, dict) else LanguageQCStatus.PENDING.value
assigned_linguist = current_state_raw.get("assigned_linguist_id") if isinstance(current_state_raw, dict) else None
if actor.role not in (UserRole.PRODUCTION, UserRole.ADMIN) and assigned_linguist != str(actor.id):
raise HTTPException(status_code=403, detail="Not the assigned linguist")
if current_status not in (LanguageQCStatus.IN_PROGRESS.value, LanguageQCStatus.PENDING.value):
raise HTTPException(status_code=400, detail=f"Cannot submit from status '{current_status}'")
now = datetime.utcnow()
event = LanguageQCEvent(at=now, actor_user_id=str(actor.id), actor_email=actor.email, action="submit_for_review")
history = (current_state_raw.get("history", []) if isinstance(current_state_raw, dict) else []) + [event.model_dump()]
updated_state = {
**(current_state_raw if isinstance(current_state_raw, dict) else {}),
"status": LanguageQCStatus.PENDING_REVIEW.value,
"submitted_for_review_at": now,
"reviewed_cues": 0, # R-12: reviewer must re-acknowledge cues after each resubmit
"history": history,
}
full_language_qc = {**(job_doc.get("language_qc") or {}), lang: updated_state}
qc_assignments = _rebuild_qc_assignments(full_language_qc)
await db[_JOBS].update_one(
{"_id": job_id},
{"$set": {
f"language_qc.{lang}": updated_state,
"qc_assignments": qc_assignments,
"updated_at": now,
}}
)
await audit_logger.log_action(
AuditAction.LANGUAGE_QC_SUBMIT,
f"Language QC submitted for review: {lang} on job {job_id}",
user=actor,
request=http_request,
resource_type="job_language",
resource_id=f"{job_id}:{lang}",
details={"lang": lang},
)
# Notify reviewer
reviewer_email = updated_state.get("assigned_reviewer_email")
reviewer_name = updated_state.get("assigned_reviewer_name", "")
if reviewer_email:
try:
from ..services.emailer import email_service
await email_service.send_language_submitted_email(
to_email=reviewer_email,
full_name=reviewer_name,
job_title=job_doc.get("title", job_id),
lang=lang,
linguist_name=updated_state.get("assigned_linguist_name", ""),
deep_link=_deep_link(job_id, lang),
)
except Exception:
logger.exception("Failed to send submission notification email")
return LanguageQCState(**updated_state)
async def open_review(
db: AsyncIOMotorDatabase,
job_id: str,
lang: str,
actor: User,
*,
http_request=None,
) -> LanguageQCState:
"""Reviewer opens the language — transitions pending_review → in_review."""
job_doc = await db[_JOBS].find_one({"_id": job_id})
if not job_doc:
raise HTTPException(status_code=404, detail="Job not found")
current_state_raw = (job_doc.get("language_qc") or {}).get(lang, {})
current_status = current_state_raw.get("status", LanguageQCStatus.PENDING.value) if isinstance(current_state_raw, dict) else LanguageQCStatus.PENDING.value
assigned_reviewer = current_state_raw.get("assigned_reviewer_id") if isinstance(current_state_raw, dict) else None
if actor.role not in (UserRole.PRODUCTION, UserRole.ADMIN) and assigned_reviewer != str(actor.id):
raise HTTPException(status_code=403, detail="Not the assigned reviewer")
if current_status != LanguageQCStatus.PENDING_REVIEW.value:
return LanguageQCState(**(current_state_raw if isinstance(current_state_raw, dict) else {}))
now = datetime.utcnow()
event = LanguageQCEvent(at=now, actor_user_id=str(actor.id), actor_email=actor.email, action="open_review")
history = (current_state_raw.get("history", []) if isinstance(current_state_raw, dict) else []) + [event.model_dump()]
updated_state = {
**(current_state_raw if isinstance(current_state_raw, dict) else {}),
"status": LanguageQCStatus.IN_REVIEW.value,
"review_started_at": now,
"history": history,
}
await db[_JOBS].update_one(
{"_id": job_id},
{"$set": {
f"language_qc.{lang}.status": LanguageQCStatus.IN_REVIEW.value,
f"language_qc.{lang}.review_started_at": now,
f"language_qc.{lang}.history": history,
"updated_at": now,
}}
)
await audit_logger.log_action(
AuditAction.LANGUAGE_QC_OPEN_REVIEW,
f"Language QC review opened: {lang} on job {job_id}",
user=actor,
request=http_request,
resource_type="job_language",
resource_id=f"{job_id}:{lang}",
details={"lang": lang},
)
return LanguageQCState(**updated_state)
# ── Approve / Reject ──────────────────────────────────────────────────────────
async def approve_language(
db: AsyncIOMotorDatabase,
job_id: str,
lang: str,
actor: User,
*,
http_request=None,
notes: str | None = None,
) -> LanguageQCState:
job_doc = await db[_JOBS].find_one({"_id": job_id})
if not job_doc:
raise HTTPException(status_code=404, detail="Job not found")
if job_doc["status"] not in (JobStatus.PENDING_QC.value, JobStatus.QC_FEEDBACK.value):
raise HTTPException(status_code=400, detail="Job is not in QC status")
_assert_can_approve(job_doc, lang, actor)
now = datetime.utcnow()
event = LanguageQCEvent(at=now, actor_user_id=str(actor.id), actor_email=actor.email, action="approve", notes=notes)
current_state_raw = (job_doc.get("language_qc") or {}).get(lang, {})
history = (current_state_raw.get("history", []) if isinstance(current_state_raw, dict) else []) + [event.model_dump()]
updated_state = {
**(current_state_raw if isinstance(current_state_raw, dict) else {}),
"status": LanguageQCStatus.APPROVED.value,
"reviewed_at": now,
"reviewed_by_user_id": str(actor.id),
"reviewed_by_email": actor.email,
"notes": notes,
"history": history,
}
full_language_qc = {**(job_doc.get("language_qc") or {}), lang: updated_state}
qc_assignments = _rebuild_qc_assignments(full_language_qc)
await db[_JOBS].update_one(
{"_id": job_id},
{"$set": {
f"language_qc.{lang}": updated_state,
"qc_assignments": qc_assignments,
"updated_at": now,
}}
)
await audit_logger.log_action(
AuditAction.LANGUAGE_QC_APPROVE,
f"Language QC approved: {lang} on job {job_id}",
user=actor,
request=http_request,
resource_type="job_language",
resource_id=f"{job_id}:{lang}",
details={"lang": lang, "notes": notes},
)
# Notify linguist + any other recipients
recipients = _qc_recipients(job_doc, current_state_raw if isinstance(current_state_raw, dict) else {}, exclude_user_id=actor.email)
if recipients:
try:
from ..services.emailer import email_service
await asyncio.gather(*[
email_service.send_qc_approved_email(
to_email=email, full_name=name,
job_title=job_doc.get("title", job_id), lang=lang,
approver_name=actor.full_name or actor.email,
deep_link=_deep_link(job_id, lang),
)
for email, name in recipients
], return_exceptions=True)
except Exception:
logger.exception("Failed to send approval emails")
refreshed = await db[_JOBS].find_one({"_id": job_id})
await _maybe_advance_job(db, refreshed)
return LanguageQCState(**updated_state)
REJECT_CATEGORIES = frozenset(["timing", "mistranslation", "terminology", "profanity", "length", "other"])
async def reject_language(
db: AsyncIOMotorDatabase,
job_id: str,
lang: str,
actor: User,
notes: str,
*,
category: str | None = None,
http_request=None,
) -> LanguageQCState:
if not notes or not notes.strip():
raise HTTPException(status_code=422, detail="Rejection notes are required")
if category and category not in REJECT_CATEGORIES:
raise HTTPException(status_code=422, detail=f"Invalid reject category. Must be one of: {', '.join(sorted(REJECT_CATEGORIES))}")
job_doc = await db[_JOBS].find_one({"_id": job_id})
if not job_doc:
raise HTTPException(status_code=404, detail="Job not found")
if job_doc["status"] not in (JobStatus.PENDING_QC.value, JobStatus.QC_FEEDBACK.value):
raise HTTPException(status_code=400, detail="Job is not in QC status")
_assert_can_approve(job_doc, lang, actor)
now = datetime.utcnow()
event = LanguageQCEvent(at=now, actor_user_id=str(actor.id), actor_email=actor.email, action="reject", notes=notes)
current_state_raw = (job_doc.get("language_qc") or {}).get(lang, {})
history = (current_state_raw.get("history", []) if isinstance(current_state_raw, dict) else []) + [event.model_dump()]
updated_state = {
**(current_state_raw if isinstance(current_state_raw, dict) else {}),
"status": LanguageQCStatus.IN_PROGRESS.value, # send back to linguist
"reviewed_at": now,
"reviewed_by_user_id": str(actor.id),
"reviewed_by_email": actor.email,
"notes": notes,
"reject_category": category,
"reviewed_cues": 0,
"submitted_for_review_at": None,
"history": history,
}
full_language_qc = {**(job_doc.get("language_qc") or {}), lang: updated_state}
qc_assignments = _rebuild_qc_assignments(full_language_qc)
await db[_JOBS].update_one(
{"_id": job_id},
{"$set": {
f"language_qc.{lang}": updated_state,
"qc_assignments": qc_assignments,
"updated_at": now,
}}
)
await db[_JOBS].update_one(
{"_id": job_id},
{
"$set": {"status": JobStatus.QC_FEEDBACK.value, "updated_at": now},
"$push": {"review.history": {"at": now, "status": JobStatus.QC_FEEDBACK.value, "by": str(actor.id), "notes": notes}},
}
)
await audit_logger.log_action(
AuditAction.LANGUAGE_QC_REJECT,
f"Language QC rejected: {lang} on job {job_id}",
user=actor,
request=http_request,
severity=AuditLogSeverity.WARNING,
resource_type="job_language",
resource_id=f"{job_id}:{lang}",
details={"lang": lang, "notes": notes},
)
recipients = _qc_recipients(job_doc, current_state_raw if isinstance(current_state_raw, dict) else {}, exclude_user_id=actor.email)
if recipients:
try:
from ..services.emailer import email_service
await asyncio.gather(*[
email_service.send_qc_rejected_email(
to_email=email, full_name=name,
job_title=job_doc.get("title", job_id), lang=lang,
reviewer_name=actor.full_name or actor.email,
reason=notes,
deep_link=_deep_link(job_id, lang),
)
for email, name in recipients
], return_exceptions=True)
except Exception:
logger.exception("Failed to send rejection emails")
return LanguageQCState(**updated_state)
async def reopen_language(
db: AsyncIOMotorDatabase,
job_id: str,
lang: str,
actor: User,
*,
http_request=None,
notes: str | None = None,
) -> LanguageQCState:
"""PROD/ADMIN only — resets an approved language back to pending for re-review."""
if actor.role not in (UserRole.PRODUCTION, UserRole.ADMIN):
raise HTTPException(status_code=403, detail="Only PRODUCTION or ADMIN can reopen a language")
job_doc = await db[_JOBS].find_one({"_id": job_id})
if not job_doc:
raise HTTPException(status_code=404, detail="Job not found")
current_state_raw = (job_doc.get("language_qc") or {}).get(lang, {})
now = datetime.utcnow()
event = LanguageQCEvent(at=now, actor_user_id=str(actor.id), actor_email=actor.email, action="reopen", notes=notes)
history = (current_state_raw.get("history", []) if isinstance(current_state_raw, dict) else []) + [event.model_dump()]
updated_state = {
**(current_state_raw if isinstance(current_state_raw, dict) else {}),
"status": LanguageQCStatus.PENDING.value,
"reviewed_at": None,
"reviewed_by_user_id": None,
"reviewed_by_email": None,
"submitted_for_review_at": None,
"review_started_at": None,
"notes": notes,
"history": history,
}
full_language_qc = {**(job_doc.get("language_qc") or {}), lang: updated_state}
qc_assignments = _rebuild_qc_assignments(full_language_qc)
await db[_JOBS].update_one(
{"_id": job_id},
{"$set": {
f"language_qc.{lang}": updated_state,
"qc_assignments": qc_assignments,
"updated_at": now,
}}
)
if job_doc["status"] == JobStatus.PENDING_FINAL_REVIEW.value:
await db[_JOBS].update_one(
{"_id": job_id},
{
"$set": {"status": JobStatus.PENDING_QC.value, "updated_at": now},
"$push": {"review.history": {"at": now, "status": JobStatus.PENDING_QC.value, "by": str(actor.id), "notes": f"Language {lang} reopened: {notes or ''}"}},
}
)
await audit_logger.log_action(
AuditAction.LANGUAGE_QC_REOPEN,
f"Language QC reopened: {lang} on job {job_id}",
user=actor,
request=http_request,
resource_type="job_language",
resource_id=f"{job_id}:{lang}",
details={"lang": lang, "notes": notes},
)
return LanguageQCState(**updated_state)
# ── Comments ──────────────────────────────────────────────────────────────────
async def add_comment(
db: AsyncIOMotorDatabase,
job_id: str,
lang: str,
actor: User,
body: str,
*,
http_request=None,
) -> LanguageQCComment:
if not body or not body.strip():
raise HTTPException(status_code=422, detail="Comment body cannot be empty")
if len(body) > 4000:
raise HTTPException(status_code=422, detail="Comment too long (max 4000 chars)")
job_doc = await db[_JOBS].find_one({"_id": job_id})
if not job_doc:
raise HTTPException(status_code=404, detail="Job not found")
# Gate: only assigned linguist, assigned reviewer, or PM/PROD/ADMIN
current_state_raw = (job_doc.get("language_qc") or {}).get(lang, {})
assigned_linguist = current_state_raw.get("assigned_linguist_id") if isinstance(current_state_raw, dict) else None
assigned_reviewer = current_state_raw.get("assigned_reviewer_id") if isinstance(current_state_raw, dict) else None
if actor.role not in (UserRole.PRODUCTION, UserRole.ADMIN, UserRole.PROJECT_MANAGER):
if str(actor.id) not in (assigned_linguist, assigned_reviewer):
raise HTTPException(status_code=403, detail="Not authorized to comment on this language")
now = datetime.utcnow()
comment = LanguageQCComment(
id=str(uuid4()),
author_id=str(actor.id),
author_name=actor.full_name or "",
author_email=actor.email,
body=body.strip(),
created_at=now,
)
await db[_JOBS].update_one(
{"_id": job_id},
{
"$push": {f"language_qc.{lang}.comments": comment.model_dump()},
"$set": {"updated_at": now},
}
)
await audit_logger.log_action(
AuditAction.LANGUAGE_QC_COMMENT,
f"Comment added to language {lang} on job {job_id}",
user=actor,
request=http_request,
resource_type="job_language",
resource_id=f"{job_id}:{lang}",
details={"lang": lang},
)
# WS broadcast — live comment indicator for everyone on this job
try:
await connection_manager.broadcast_to_job(job_id, {
"type": "language_qc_comment",
"job_id": job_id,
"lang": lang,
"data": {
"author_name": actor.full_name or actor.email,
"lang": lang,
"comment_id": comment.id,
},
})
except Exception:
pass
# Fan-out email to all other assignees
recipients = _qc_recipients(job_doc, current_state_raw if isinstance(current_state_raw, dict) else {}, exclude_user_id=actor.email)
if recipients:
try:
from ..services.emailer import email_service
await asyncio.gather(*[
email_service.send_qc_comment_email(
to_email=email, full_name=name,
job_title=job_doc.get("title", job_id), lang=lang,
author_name=actor.full_name or actor.email,
comment_body=body.strip(),
deep_link=_deep_link(job_id, lang),
)
for email, name in recipients
], return_exceptions=True)
except Exception:
logger.exception("Failed to send comment notification emails")
return comment
# ── Queue / list ──────────────────────────────────────────────────────────────
async def list_for_linguist(
db: AsyncIOMotorDatabase,
linguist_id: str,
*,
accessible_org_ids: list[str] | None = None,
status_filter: str | None = None,
skip: int = 0,
limit: int = 50,
) -> list[dict]:
"""Return jobs where the linguist has an assignment, along with which languages."""
query: dict = {"qc_assignments.linguist_id": linguist_id}
if accessible_org_ids is not None:
query["organization_id"] = {"$in": accessible_org_ids}
if status_filter:
query["qc_assignments"] = {"$elemMatch": {"linguist_id": linguist_id, "status": status_filter}}
cursor = db[_JOBS].find(query, {"title": 1, "status": 1, "language_qc": 1, "qc_assignments": 1, "created_at": 1, "updated_at": 1}).skip(skip).limit(limit).sort("updated_at", -1)
jobs = await cursor.to_list(length=limit)
result = []
for job in jobs:
my_langs = [a for a in (job.get("qc_assignments") or []) if a.get("linguist_id") == linguist_id]
result.append({**job, "_my_assignments": my_langs})
return result
async def list_for_reviewer(
db: AsyncIOMotorDatabase,
reviewer_id: str,
*,
accessible_org_ids: list[str] | None = None,
status_filter: str | None = None,
skip: int = 0,
limit: int = 50,
) -> list[dict]:
"""Return jobs where the reviewer is assigned to at least one language."""
# language_qc is a dict keyed by lang; pre-filter by org then scan in Python for assigned reviewer
base_query: dict = {}
if accessible_org_ids is not None:
base_query["organization_id"] = {"$in": accessible_org_ids}
all_jobs_cursor = db[_JOBS].find(
base_query,
{"title": 1, "status": 1, "language_qc": 1, "qc_assignments": 1, "created_at": 1, "updated_at": 1}
).sort("updated_at", -1).skip(skip).limit(limit * 5) # over-fetch, filter in Python
all_jobs = await all_jobs_cursor.to_list(length=limit * 5)
result = []
for job in all_jobs:
my_langs = []
for lang, state in (job.get("language_qc") or {}).items():
if isinstance(state, dict) and state.get("assigned_reviewer_id") == reviewer_id:
if not status_filter or state.get("status") == status_filter:
my_langs.append({"lang": lang, "status": state.get("status", "pending")})
if my_langs:
result.append({**job, "_my_assignments": my_langs})
if len(result) >= limit:
break
return result
async def seed_language_qc_for_job(db: AsyncIOMotorDatabase, job_doc: dict) -> None:
"""Idempotently seed language_qc entries for all languages in a job's outputs."""
job_id = str(job_doc["_id"])
outputs = job_doc.get("outputs") or {}
source_lang = job_doc.get("source", {}).get("language", "en")
all_langs = list({source_lang} | set(outputs.keys()))
job_status = job_doc.get("status", "")
is_approved = job_status in (
JobStatus.APPROVED_ENGLISH.value, JobStatus.APPROVED_SOURCE.value,
JobStatus.PENDING_FINAL_REVIEW.value, JobStatus.COMPLETED.value,
)
existing_qc = job_doc.get("language_qc") or {}
updates: dict[str, Any] = {}
updated_lang_qc = dict(existing_qc)
for lang in all_langs:
if lang in existing_qc:
continue
state: dict[str, Any] = {
"status": LanguageQCStatus.APPROVED.value if is_approved else LanguageQCStatus.PENDING.value,
"history": [],
"comments": [],
}
if is_approved:
state["reviewed_by_user_id"] = job_doc.get("review", {}).get("reviewer_id")
state["reviewed_at"] = job_doc.get("updated_at")
state["notes"] = job_doc.get("review", {}).get("notes")
updates[f"language_qc.{lang}"] = state
updated_lang_qc[lang] = state
if updates:
updates["qc_assignments"] = _rebuild_qc_assignments(updated_lang_qc)
updates["updated_at"] = datetime.utcnow()
await db[_JOBS].update_one({"_id": job_id}, {"$set": updates})
async def reset_all_for_return_to_qc(db: AsyncIOMotorDatabase, job_id: str) -> None:
"""Called by return_to_qc — resets statuses to pending while preserving assignments and history."""
job_doc = await db[_JOBS].find_one({"_id": job_id}, {"language_qc": 1})
if not job_doc:
return
lang_qc = job_doc.get("language_qc") or {}
updates: dict[str, Any] = {}
for lang, state in lang_qc.items():
if isinstance(state, dict):
updates[f"language_qc.{lang}.status"] = LanguageQCStatus.PENDING.value
updates[f"language_qc.{lang}.reviewed_at"] = None
updates[f"language_qc.{lang}.reviewed_by_user_id"] = None
updates[f"language_qc.{lang}.reviewed_by_email"] = None
updates[f"language_qc.{lang}.submitted_for_review_at"] = None
updates[f"language_qc.{lang}.review_started_at"] = None
if updates:
updated_lang_qc = {}
for lang, state in lang_qc.items():
updated_lang_qc[lang] = {**(state if isinstance(state, dict) else {}), "status": LanguageQCStatus.PENDING.value}
updates["qc_assignments"] = _rebuild_qc_assignments(updated_lang_qc)
await db[_JOBS].update_one({"_id": job_id}, {"$set": updates})
# ── Internal ──────────────────────────────────────────────────────────────────
def _assert_can_approve(job_doc: dict, lang: str, actor: User) -> None:
"""Raise 403 if actor cannot approve this language.
Two-stage QC is enforced: linguist must submit before reviewer can approve.
PRODUCTION and ADMIN may override (explicit admin action, logged separately).
"""
if actor.role in (UserRole.PRODUCTION, UserRole.ADMIN):
return
state = (job_doc.get("language_qc") or {}).get(lang, {})
if not isinstance(state, dict):
state = {}
assigned_reviewer = state.get("assigned_reviewer_id")
if assigned_reviewer is None:
raise HTTPException(
status_code=403,
detail=f"Language '{lang}' has no assigned reviewer — a reviewer must be assigned before approving",
)
if assigned_reviewer != str(actor.id):
raise HTTPException(status_code=403, detail=f"You are not the assigned reviewer for language '{lang}'")
submitted_at = state.get("submitted_for_review_at")
if not submitted_at:
raise HTTPException(
status_code=403,
detail=f"Language '{lang}' has not been submitted for review by the linguist yet",
)
# Keep old name for any remaining callers
_assert_can_act = _assert_can_approve
async def _maybe_advance_job(db: AsyncIOMotorDatabase, job_doc: dict) -> None:
"""Advance job to pending_final_review if all languages are approved."""
job_id = str(job_doc["_id"])
if job_doc["status"] not in (JobStatus.PENDING_QC.value, JobStatus.QC_FEEDBACK.value):
return
if not _all_approved(job_doc):
return
now = datetime.utcnow()
result = await db[_JOBS].find_one_and_update(
{"_id": job_id, "status": {"$in": [JobStatus.PENDING_QC.value, JobStatus.QC_FEEDBACK.value]}},
{
"$set": {"status": JobStatus.PENDING_FINAL_REVIEW.value, "updated_at": now},
"$push": {"review.history": {"at": now, "status": JobStatus.PENDING_FINAL_REVIEW.value, "by": "system", "notes": "All languages approved"}},
},
return_document=True,
)
if result:
logger.info(f"Job {job_id} auto-advanced to pending_final_review — all languages approved")
try:
await connection_manager.broadcast_to_job(
job_id,
{"type": "job_status_change", "job_id": job_id, "status": JobStatus.PENDING_FINAL_REVIEW.value},
)
except Exception:
pass