video-accessibility/backend/pyproject.toml
Vadym Samoilenko fa351e4d25 feat: per-client glossary — hybrid exact/vector retrieval + AI injection
Adds full glossary system so Gemini uses client-approved terminology
when generating subtitles and translations (critical for 3M brand names
and product codes across 16 target locales).

Backend:
- lib/locales.py: BCP-47 locale registry, normalises xlsx fr_fr → fr-FR
- models/glossary.py: Glossary / GlossaryVersion / GlossaryTerm + enums
- services/glossary_service.py: xlsx parse (openpyxl), ingest to Mongo,
  hybrid retrieval (Aho-Corasick exact + Atlas Vector Search), prompt block
- services/embedding_service.py: Gemini text-embedding-004, batch 100, retry
- tasks/embed_glossary.py: Celery background task for async embedding
- api/v1/routes_glossaries.py: CRUD endpoints under /clients/{id}/glossaries
- gemini.py: _build_glossary_block(), {GLOSSARY} injection in all 4 call sites
- tts.py / gemini_tts.py: pass full locale codes (no split("-")[0] truncation)
- tasks/translate_and_synthesize.py: glossary lookup + injection per language
- prompts: {GLOSSARY} placeholder in ingestion, targeted, transcreation prompts
- pyproject.toml: +openpyxl, +pyahocorasick

Frontend:
- routes/admin/glossaries/: GlossaryList, GlossaryUpload, GlossaryDetail
- App.tsx: 3 new routes under /admin/clients/:clientId/glossaries
- ClientDetail.tsx: Glossaries card with count + quick links
- types/api.ts: Glossary, GlossaryVersion, GlossaryDetail, GlossaryTerm types
- lib/api.ts: 7 new API methods (upload, list, detail, terms, versions, activate, archive)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-29 13:03:38 +01:00

111 lines
No EOL
2.9 KiB
TOML

[tool.poetry]
name = "accessible-video-backend"
version = "0.1.0"
description = "FastAPI backend for accessible video processing platform"
authors = ["Your Name <your.email@example.com>"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.11"
fastapi = "^0.115.0"
uvicorn = {extras = ["standard"], version = "^0.24.0"}
gunicorn = "^21.2.0"
pydantic = {extras = ["email"], version = "^2.5.0"}
pydantic-settings = "^2.1.0"
pymongo = "^4.6.0"
motor = "^3.3.2"
redis = "^5.0.1"
celery = {extras = ["redis"], version = "^5.3.4"}
google-cloud-storage = "^2.10.0"
google-cloud-translate = "^3.12.1"
google-cloud-texttospeech = "^2.16.3"
google-cloud-secret-manager = "^2.18.1"
google-genai = "^1.56.0"
sendgrid = "^6.11.0"
python-jose = {extras = ["cryptography"], version = "^3.3.0"}
libpass = {extras = ["bcrypt"], version = "^1.9.1"}
python-multipart = "^0.0.6"
opentelemetry-api = "^1.21.0"
opentelemetry-sdk = "^1.21.0"
opentelemetry-instrumentation-fastapi = "^0.42b0"
opentelemetry-instrumentation-pymongo = "^0.42b0"
opentelemetry-instrumentation-redis = "^0.42b0"
opentelemetry-exporter-gcp-trace = "^1.6.0"
opentelemetry-exporter-otlp = "^1.21.0"
# opentelemetry-exporter-prometheus = "^1.11.1" # Temporarily disabled - version conflicts
prometheus-client = "^0.19.0"
sentry-sdk = {extras = ["fastapi"], version = "^1.38.0"}
ffmpeg-python = "^0.2.0"
pydub = "^0.25.1"
faster-whisper = "^1.2.0"
python-magic = "^0.4.27"
aiohttp = "^3.12.15"
jinja2 = "^3.1.6"
audioop-lts = {version = "^0.2.2", python = ">=3.13"}
openpyxl = "^3.1.2"
pyahocorasick = "^2.1.1"
[tool.poetry.group.dev.dependencies]
pytest = "^7.4.3"
pytest-asyncio = "^0.21.1"
pytest-mock = "^3.12.0"
httpx = "^0.28.1"
black = "^23.11.0"
ruff = "^0.1.6"
mypy = "^1.7.1"
pre-commit = "^3.6.0"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.black]
line-length = 88
target-version = ['py311']
include = '\.pyi?$'
[tool.ruff]
target-version = "py311"
line-length = 88
select = [
"E", # pycodestyle errors
"W", # pycodestyle warnings
"F", # pyflakes
"I", # isort
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"UP", # pyupgrade
]
ignore = [
"E501", # line too long, handled by black
"B008", # do not perform function calls in argument defaults
"C901", # too complex
]
[tool.ruff.per-file-ignores]
"__init__.py" = ["F401"]
[tool.mypy]
python_version = "3.11"
check_untyped_defs = true
disallow_any_generics = true
disallow_incomplete_defs = true
disallow_untyped_decorators = true
disallow_untyped_defs = true
ignore_missing_imports = true
no_implicit_optional = true
show_error_codes = true
strict_equality = true
warn_redundant_casts = true
warn_return_any = true
warn_unreachable = true
warn_unused_configs = true
warn_unused_ignores = true
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = "-v --tb=short"
asyncio_mode = "auto"