video-accessibility/backend/app/models/audit_log.py
Vadym Samoilenko fa351e4d25 feat: per-client glossary — hybrid exact/vector retrieval + AI injection
Adds full glossary system so Gemini uses client-approved terminology
when generating subtitles and translations (critical for 3M brand names
and product codes across 16 target locales).

Backend:
- lib/locales.py: BCP-47 locale registry, normalises xlsx fr_fr → fr-FR
- models/glossary.py: Glossary / GlossaryVersion / GlossaryTerm + enums
- services/glossary_service.py: xlsx parse (openpyxl), ingest to Mongo,
  hybrid retrieval (Aho-Corasick exact + Atlas Vector Search), prompt block
- services/embedding_service.py: Gemini text-embedding-004, batch 100, retry
- tasks/embed_glossary.py: Celery background task for async embedding
- api/v1/routes_glossaries.py: CRUD endpoints under /clients/{id}/glossaries
- gemini.py: _build_glossary_block(), {GLOSSARY} injection in all 4 call sites
- tts.py / gemini_tts.py: pass full locale codes (no split("-")[0] truncation)
- tasks/translate_and_synthesize.py: glossary lookup + injection per language
- prompts: {GLOSSARY} placeholder in ingestion, targeted, transcreation prompts
- pyproject.toml: +openpyxl, +pyahocorasick

Frontend:
- routes/admin/glossaries/: GlossaryList, GlossaryUpload, GlossaryDetail
- App.tsx: 3 new routes under /admin/clients/:clientId/glossaries
- ClientDetail.tsx: Glossaries card with count + quick links
- types/api.ts: Glossary, GlossaryVersion, GlossaryDetail, GlossaryTerm types
- lib/api.ts: 7 new API methods (upload, list, detail, terms, versions, activate, archive)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-29 13:03:38 +01:00

188 lines
5.4 KiB
Python

"""Audit log model for tracking sensitive operations."""
from datetime import datetime
from enum import Enum
from typing import Any, Dict, Optional
from bson import ObjectId
from pydantic import BaseModel, Field
from .user import PyObjectId
class AuditAction(str, Enum):
"""Enumeration of auditable actions."""
# Authentication actions
LOGIN_SUCCESS = "auth.login.success"
LOGIN_FAILURE = "auth.login.failure"
LOGOUT = "auth.logout"
TOKEN_REFRESH = "auth.token.refresh"
PASSWORD_CHANGE = "auth.password.change"
PASSWORD_RESET = "auth.password.reset"
# User management actions
USER_CREATE = "user.create"
USER_UPDATE = "user.update"
USER_DELETE = "user.delete"
USER_ROLE_CHANGE = "user.role.change"
USER_ACTIVATE = "user.activate"
USER_DEACTIVATE = "user.deactivate"
# Job management actions
JOB_CREATE = "job.create"
JOB_UPDATE = "job.update"
JOB_DELETE = "job.delete"
JOB_APPROVE = "job.approve"
JOB_REJECT = "job.reject"
JOB_CANCEL = "job.cancel"
JOB_STATUS_CHANGE = "job.status.change"
# File operations
FILE_UPLOAD = "file.upload"
FILE_DOWNLOAD = "file.download"
FILE_DELETE = "file.delete"
FILE_ACCESS = "file.access"
# VTT editing actions
VTT_EDIT = "vtt.edit"
VTT_APPROVE = "vtt.approve"
VTT_REJECT = "vtt.reject"
# Per-language QC actions
LANGUAGE_QC_ASSIGN = "language_qc.assign"
LANGUAGE_QC_REASSIGN = "language_qc.reassign"
LANGUAGE_QC_APPROVE = "language_qc.approve"
LANGUAGE_QC_REJECT = "language_qc.reject"
LANGUAGE_QC_REOPEN = "language_qc.reopen"
# Admin actions
ADMIN_CONFIG_CHANGE = "admin.config.change"
ADMIN_SYSTEM_ACTION = "admin.system.action"
ADMIN_DATA_EXPORT = "admin.data.export"
ADMIN_AUDIT_ACCESS = "admin.audit.access"
# Glossary management
GLOSSARY_UPLOAD = "glossary.upload"
GLOSSARY_VERSION_UPLOAD = "glossary.version.upload"
GLOSSARY_ACTIVATE = "glossary.activate"
GLOSSARY_ARCHIVE = "glossary.archive"
# Security events
RATE_LIMIT_EXCEEDED = "security.rate_limit.exceeded"
VALIDATION_FAILURE = "security.validation.failure"
UNAUTHORIZED_ACCESS = "security.unauthorized.access"
SUSPICIOUS_ACTIVITY = "security.suspicious.activity"
class AuditLogSeverity(str, Enum):
"""Severity levels for audit events."""
INFO = "info" # Normal operations
WARNING = "warning" # Suspicious but not critical
ERROR = "error" # Failed operations
CRITICAL = "critical" # Security incidents
class AuditLog(BaseModel):
"""Audit log entry model."""
id: Optional[PyObjectId] = Field(default_factory=PyObjectId, alias="_id")
# Core audit fields
timestamp: datetime = Field(default_factory=datetime.utcnow)
action: AuditAction
severity: AuditLogSeverity = AuditLogSeverity.INFO
# Actor information
user_id: Optional[PyObjectId] = None
user_email: Optional[str] = None
user_role: Optional[str] = None
# Request context
ip_address: Optional[str] = None
user_agent: Optional[str] = None
request_id: Optional[str] = None
session_id: Optional[str] = None
# Resource information
resource_type: Optional[str] = None # e.g., "job", "user", "file"
resource_id: Optional[str] = None
resource_name: Optional[str] = None
# Action details
description: str
details: Dict[str, Any] = Field(default_factory=dict)
# Outcome
success: bool = True
error_message: Optional[str] = None
# Additional metadata
environment: str = "prod"
service_name: str = "accessible-video-api"
api_version: str = "v1"
class Config:
populate_by_name = True
arbitrary_types_allowed = True
json_encoders = {ObjectId: str}
class AuditLogCreate(BaseModel):
"""Schema for creating audit log entries."""
action: AuditAction
severity: AuditLogSeverity = AuditLogSeverity.INFO
description: str
# Optional fields that can be provided
user_id: Optional[PyObjectId] = None
user_email: Optional[str] = None
user_role: Optional[str] = None
ip_address: Optional[str] = None
user_agent: Optional[str] = None
request_id: Optional[str] = None
resource_type: Optional[str] = None
resource_id: Optional[str] = None
resource_name: Optional[str] = None
details: Dict[str, Any] = Field(default_factory=dict)
success: bool = True
error_message: Optional[str] = None
class AuditLogQuery(BaseModel):
"""Schema for querying audit logs."""
# Time range
start_date: Optional[datetime] = None
end_date: Optional[datetime] = None
# Filters
action: Optional[AuditAction] = None
severity: Optional[AuditLogSeverity] = None
user_id: Optional[PyObjectId] = None
user_email: Optional[str] = None
resource_type: Optional[str] = None
resource_id: Optional[str] = None
success: Optional[bool] = None
# Search
search: Optional[str] = None # Full-text search in description and details
# Pagination
skip: int = 0
limit: int = 100
# Sorting
sort_by: str = "timestamp"
sort_order: int = -1 # -1 for descending, 1 for ascending
class AuditLogResponse(BaseModel):
"""Response schema for audit log queries."""
logs: list[AuditLog]
total_count: int
page: int
page_size: int
has_more: bool