video-accessibility/backend/pyproject.toml
michael 05bde8326d feat: add Whisper-based pause point refinement for audio descriptions
Implements word-level speech analysis using faster-whisper to refine
AD pause points. Gemini's timestamps are snapped to natural speech gaps
(sentence/phrase boundaries) to prevent pauses mid-word.

Key changes:
- Add WhisperService for transcription and gap detection
- Add dedicated Celery task routed to 'whisper' queue
- Integrate refinement into render_accessible_video task
- Cache Whisper transcripts in MongoDB for reuse across languages
- Add dedicated whisper-worker with concurrency=1 to prevent OOM

Configuration:
- Uses faster-whisper 'base' model (multilingual, ~145MB)
- 5-second search window after Gemini's recommended point
- Falls back to original timestamp if no gap found

Infrastructure:
- New Docker stage: whisper-worker
- New Cloud Run service: accessible-video-whisper-worker
- Updated docker-compose.yml with whisper-worker service

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-27 08:27:48 -06:00

109 lines
No EOL
2.9 KiB
TOML

[tool.poetry]
name = "accessible-video-backend"
version = "0.1.0"
description = "FastAPI backend for accessible video processing platform"
authors = ["Your Name <your.email@example.com>"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.11"
fastapi = "^0.115.0"
uvicorn = {extras = ["standard"], version = "^0.24.0"}
gunicorn = "^21.2.0"
pydantic = {extras = ["email"], version = "^2.5.0"}
pydantic-settings = "^2.1.0"
pymongo = "^4.6.0"
motor = "^3.3.2"
redis = "^5.0.1"
celery = {extras = ["redis"], version = "^5.3.4"}
google-cloud-storage = "^2.10.0"
google-cloud-translate = "^3.12.1"
google-cloud-texttospeech = "^2.16.3"
google-cloud-secret-manager = "^2.18.1"
google-genai = "^1.56.0"
sendgrid = "^6.11.0"
python-jose = {extras = ["cryptography"], version = "^3.3.0"}
libpass = {extras = ["bcrypt"], version = "^1.9.1"}
python-multipart = "^0.0.6"
opentelemetry-api = "^1.21.0"
opentelemetry-sdk = "^1.21.0"
opentelemetry-instrumentation-fastapi = "^0.42b0"
opentelemetry-instrumentation-pymongo = "^0.42b0"
opentelemetry-instrumentation-redis = "^0.42b0"
opentelemetry-exporter-gcp-trace = "^1.6.0"
opentelemetry-exporter-otlp = "^1.21.0"
# opentelemetry-exporter-prometheus = "^1.11.1" # Temporarily disabled - version conflicts
prometheus-client = "^0.19.0"
sentry-sdk = {extras = ["fastapi"], version = "^1.38.0"}
ffmpeg-python = "^0.2.0"
pydub = "^0.25.1"
faster-whisper = "^1.2.0"
python-magic = "^0.4.27"
aiohttp = "^3.12.15"
jinja2 = "^3.1.6"
audioop-lts = {version = "^0.2.2", python = ">=3.13"}
[tool.poetry.group.dev.dependencies]
pytest = "^7.4.3"
pytest-asyncio = "^0.21.1"
pytest-mock = "^3.12.0"
httpx = "^0.28.1"
black = "^23.11.0"
ruff = "^0.1.6"
mypy = "^1.7.1"
pre-commit = "^3.6.0"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.black]
line-length = 88
target-version = ['py311']
include = '\.pyi?$'
[tool.ruff]
target-version = "py311"
line-length = 88
select = [
"E", # pycodestyle errors
"W", # pycodestyle warnings
"F", # pyflakes
"I", # isort
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"UP", # pyupgrade
]
ignore = [
"E501", # line too long, handled by black
"B008", # do not perform function calls in argument defaults
"C901", # too complex
]
[tool.ruff.per-file-ignores]
"__init__.py" = ["F401"]
[tool.mypy]
python_version = "3.11"
check_untyped_defs = true
disallow_any_generics = true
disallow_incomplete_defs = true
disallow_untyped_decorators = true
disallow_untyped_defs = true
ignore_missing_imports = true
no_implicit_optional = true
show_error_codes = true
strict_equality = true
warn_redundant_casts = true
warn_return_any = true
warn_unreachable = true
warn_unused_configs = true
warn_unused_ignores = true
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = "-v --tb=short"
asyncio_mode = "auto"