video-accessibility/backend/app/main.py
Vadym Samoilenko fa351e4d25 feat: per-client glossary — hybrid exact/vector retrieval + AI injection
Adds full glossary system so Gemini uses client-approved terminology
when generating subtitles and translations (critical for 3M brand names
and product codes across 16 target locales).

Backend:
- lib/locales.py: BCP-47 locale registry, normalises xlsx fr_fr → fr-FR
- models/glossary.py: Glossary / GlossaryVersion / GlossaryTerm + enums
- services/glossary_service.py: xlsx parse (openpyxl), ingest to Mongo,
  hybrid retrieval (Aho-Corasick exact + Atlas Vector Search), prompt block
- services/embedding_service.py: Gemini text-embedding-004, batch 100, retry
- tasks/embed_glossary.py: Celery background task for async embedding
- api/v1/routes_glossaries.py: CRUD endpoints under /clients/{id}/glossaries
- gemini.py: _build_glossary_block(), {GLOSSARY} injection in all 4 call sites
- tts.py / gemini_tts.py: pass full locale codes (no split("-")[0] truncation)
- tasks/translate_and_synthesize.py: glossary lookup + injection per language
- prompts: {GLOSSARY} placeholder in ingestion, targeted, transcreation prompts
- pyproject.toml: +openpyxl, +pyahocorasick

Frontend:
- routes/admin/glossaries/: GlossaryList, GlossaryUpload, GlossaryDetail
- App.tsx: 3 new routes under /admin/clients/:clientId/glossaries
- ClientDetail.tsx: Glossaries card with count + quick links
- types/api.ts: Glossary, GlossaryVersion, GlossaryDetail, GlossaryTerm types
- lib/api.ts: 7 new API methods (upload, list, detail, terms, versions, activate, archive)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-29 13:03:38 +01:00

329 lines
13 KiB
Python

from contextlib import asynccontextmanager
import sentry_sdk
from fastapi import FastAPI, Request, HTTPException
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from sentry_sdk.integrations.fastapi import FastApiIntegration
from sentry_sdk.integrations.redis import RedisIntegration
from sentry_sdk.integrations.pymongo import PyMongoIntegration
from sentry_sdk.integrations.celery import CeleryIntegration
from .api.v1.routes_admin import router as admin_router
from .api.v1.routes_auth import router as auth_router
from .api.v1.routes_clients import router as clients_router
from .api.v1.routes_files import router as files_router
from .api.v1.routes_jobs import router as jobs_router
from .api.v1.routes_invitations import org_router as invitations_org_router
from .api.v1.routes_invitations import router as invitations_router
from .api.v1.routes_organizations import router as organizations_router
from .api.v1.routes_review_notes import router as review_notes_router
from .api.v1.routes_tts import router as tts_router
from .api.v1.routes_websockets import router as websockets_router
from .api.v1.routes_vtt_versions import router as vtt_versions_router
from .api.v1.routes_language_qc import router as language_qc_router
from .api.v1.routes_glossaries import router as glossaries_router
from .services.websocket import connection_manager
from .core.config import settings
from .core.secrets_config import initialize_config
from .core.database import close_mongo_connection, connect_to_mongo, create_indexes, get_database
from .services.language_qc import seed_language_qc_for_job
from .core.logging import setup_logging
from .core.redis import close_redis_connection, connect_to_redis, get_redis_client
from .core.seed import seed_default_admin
from .middleware import create_rate_limit_middleware, create_validation_middleware
from .telemetry import (
app_metrics,
instrument_dependencies,
instrument_fastapi_app,
setup_tracing
)
from .services.websocket import connection_manager
@asynccontextmanager
async def lifespan(app: FastAPI):
# Startup
setup_logging()
# Initialize configuration with secrets
if settings.app_env == "prod":
try:
await initialize_config()
print("✅ Configuration initialized with Secret Manager")
except Exception as e:
print(f"⚠️ Failed to load secrets from Secret Manager: {e}")
print("⚠️ Falling back to environment variables")
# Initialize Sentry error tracking
if settings.sentry_dsn and settings.sentry_dsn.startswith(('http', 'https')):
sentry_sdk.init(
dsn=settings.sentry_dsn,
integrations=[
FastApiIntegration(),
RedisIntegration(),
PyMongoIntegration(),
CeleryIntegration(monitor_beat_tasks=True),
],
traces_sample_rate=0.1 if settings.app_env == "prod" else 1.0,
environment=settings.app_env,
release="1.0.0",
attach_stacktrace=True,
send_default_pii=False, # Don't send PII for privacy
)
# Initialize telemetry (disabled for local development)
# setup_tracing("accessible-video-api", "1.0.0")
# instrument_dependencies()
# Start Prometheus metrics server in production
if settings.app_env == "prod":
app_metrics.start_prometheus_server(port=8001)
await connect_to_mongo()
await connect_to_redis()
try:
db = await get_database()
await seed_default_admin(db)
except Exception as e:
print(f"⚠️ Could not seed default admin: {e}")
# await create_indexes() # Temporarily disabled for debugging
# Seed language_qc for existing jobs that don't have it yet
try:
db = await get_database()
async for job_doc in db.jobs.find({"language_qc": {"$exists": False}}, {"_id": 1, "status": 1, "outputs": 1, "source": 1, "review": 1, "updated_at": 1, "requested_outputs": 1}):
await seed_language_qc_for_job(db, job_doc)
print("✅ language_qc migration complete")
except Exception as e:
print(f"⚠️ language_qc migration failed: {e}")
# Start WebSocket connection manager
await connection_manager.start()
# Initialize middleware with Redis client
redis_client = get_redis_client()
if redis_client:
rate_limit_middleware = await create_rate_limit_middleware(redis_client)
validation_middleware = await create_validation_middleware()
# Store middleware in app state for access
app.state.rate_limit_middleware = rate_limit_middleware
app.state.validation_middleware = validation_middleware
yield
# Shutdown
await connection_manager.stop()
await close_mongo_connection()
await close_redis_connection()
app = FastAPI(
title="Accessible Video API",
description="API for accessible video processing platform",
version="1.0.0",
lifespan=lifespan,
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=settings.cors_origins_list,
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
allow_headers=["*"],
)
# Custom CORS error handler middleware to ensure CORS headers are added to all error responses
# This must be added BEFORE CORSMiddleware (which will be applied after due to reverse order)
@app.middleware("http")
async def cors_error_handler(request, call_next):
"""Ensure CORS headers are added to all responses, including errors."""
try:
response = await call_next(request)
except Exception as e:
# LOG THE EXCEPTION BEFORE HANDLING IT
print(f"🚨 EXCEPTION IN CORS MIDDLEWARE: {e}")
import traceback
print(f"Traceback:\n{traceback.format_exc()}")
# Handle any unhandled exceptions and add CORS headers
from fastapi.responses import JSONResponse
response = JSONResponse(
status_code=500,
content={"detail": "Internal server error", "error": str(e)}
)
# Always add CORS headers for allowed origins
origin = request.headers.get("origin")
if origin and origin in settings.cors_origins_list:
response.headers["access-control-allow-origin"] = origin
response.headers["access-control-allow-credentials"] = "true"
# Add other necessary CORS headers for error responses
if response.status_code >= 400:
response.headers["access-control-allow-methods"] = "GET, POST, PUT, PATCH, DELETE"
response.headers["access-control-allow-headers"] = "*"
return response
# Global exception handler to ensure CORS headers on all errors
@app.exception_handler(HTTPException)
async def http_exception_handler(request: Request, exc: HTTPException):
"""Handle HTTP exceptions with CORS headers"""
response = JSONResponse(
status_code=exc.status_code,
content={"detail": exc.detail}
)
# Add CORS headers
origin = request.headers.get("origin")
if origin and origin in settings.cors_origins_list:
response.headers["access-control-allow-origin"] = origin
response.headers["access-control-allow-credentials"] = "true"
response.headers["access-control-allow-methods"] = "GET, POST, PUT, PATCH, DELETE"
response.headers["access-control-allow-headers"] = "*"
return response
# Global exception handler for validation errors
@app.exception_handler(RequestValidationError)
async def validation_exception_handler(request: Request, exc: RequestValidationError):
"""Handle request validation errors with CORS headers"""
response = JSONResponse(
status_code=422,
content={"detail": exc.errors(), "body": exc.body}
)
# Add CORS headers
origin = request.headers.get("origin")
if origin and origin in settings.cors_origins_list:
response.headers["access-control-allow-origin"] = origin
response.headers["access-control-allow-credentials"] = "true"
response.headers["access-control-allow-methods"] = "GET, POST, PUT, PATCH, DELETE"
response.headers["access-control-allow-headers"] = "*"
return response
# Global exception handler for all other exceptions
@app.exception_handler(Exception)
async def general_exception_handler(request: Request, exc: Exception):
"""Handle all uncaught exceptions with logging"""
import traceback
from .core.logging import get_logger
logger = get_logger(__name__)
logger.error(f"Unhandled exception in {request.method} {request.url.path}: {exc}")
logger.error(f"Exception type: {type(exc).__name__}")
logger.error(f"Traceback: {traceback.format_exc()}")
# Also print to stdout for immediate visibility
print(f"🚨 UNHANDLED EXCEPTION: {request.method} {request.url.path}")
print(f"Exception: {exc}")
print(f"Traceback:\n{traceback.format_exc()}")
response = JSONResponse(
status_code=500,
content={"detail": "Internal server error", "error": str(exc)}
)
# Add CORS headers
origin = request.headers.get("origin")
if origin and origin in settings.cors_origins_list:
response.headers["access-control-allow-origin"] = origin
response.headers["access-control-allow-credentials"] = "true"
return response
# Add custom middleware (order matters - applied in reverse order)
@app.middleware("http")
async def rate_limiting_middleware(request, call_next):
"""Apply rate limiting middleware."""
# Skip middleware for auth endpoints during debugging
if request.url.path in ["/api/v1/auth/login", "/api/v1/auth/refresh"]:
return await call_next(request)
if hasattr(app.state, 'rate_limit_middleware'):
return await app.state.rate_limit_middleware(request, call_next)
return await call_next(request)
@app.middleware("http")
async def validation_middleware(request, call_next):
"""Apply request validation middleware."""
# TEMPORARILY DISABLED FOR DEBUGGING
return await call_next(request)
# Skip middleware for auth endpoints during debugging
if request.url.path in ["/api/v1/auth/login", "/api/v1/auth/refresh"]:
return await call_next(request)
if hasattr(app.state, 'validation_middleware'):
return await app.state.validation_middleware(request, call_next)
return await call_next(request)
# Instrument FastAPI app for tracing (disabled for local development)
# instrument_fastapi_app(app)
# Include routers
app.include_router(auth_router, prefix="/api/v1")
app.include_router(clients_router, prefix="/api/v1")
app.include_router(organizations_router, prefix="/api/v1")
app.include_router(invitations_org_router, prefix="/api/v1")
app.include_router(invitations_router, prefix="/api/v1")
app.include_router(files_router, prefix="/api/v1")
app.include_router(jobs_router, prefix="/api/v1")
app.include_router(review_notes_router, prefix="/api/v1")
app.include_router(vtt_versions_router, prefix="/api/v1")
app.include_router(language_qc_router, prefix="/api/v1")
app.include_router(glossaries_router, prefix="/api/v1")
app.include_router(tts_router, prefix="/api/v1")
app.include_router(admin_router, prefix="/api/v1")
app.include_router(websockets_router, prefix="/api/v1")
@app.on_event("startup")
async def startup_event():
"""Initialize services on startup"""
logger.info("🚀 Starting up FastAPI application...")
# Start WebSocket connection manager
try:
await connection_manager.start()
logger.info("✅ WebSocket connection manager started successfully")
except Exception as e:
logger.error(f"❌ Failed to start WebSocket connection manager: {e}")
raise
@app.on_event("shutdown")
async def shutdown_event():
"""Cleanup services on shutdown"""
logger.info("🛑 Shutting down FastAPI application...")
# Stop WebSocket connection manager
try:
await connection_manager.stop()
logger.info("✅ WebSocket connection manager stopped successfully")
except Exception as e:
logger.error(f"❌ Error stopping WebSocket connection manager: {e}")
@app.get("/health")
async def health_check():
return {"status": "healthy", "version": "1.0.0"}
@app.get("/debug-test")
async def debug_test():
print("🔥🔥🔥 DEBUG TEST ENDPOINT HIT 🔥🔥🔥")
return {"message": "If you see this, routing works"}
@app.get("/metrics")
async def metrics():
"""Prometheus metrics endpoint"""
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
from fastapi import Response
return Response(
content=generate_latest(),
media_type=CONTENT_TYPE_LATEST
)