Compare commits
150 commits
feat/workf
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fb99a5e8c7 | ||
|
|
07d2112e53 | ||
|
|
922cb9318e | ||
|
|
cff62c51ff | ||
|
|
b24f7a9a0f | ||
|
|
11bf08a29d | ||
|
|
42a0c8acb1 | ||
|
|
bd1dd69467 | ||
|
|
82d438df7c | ||
|
|
7bba8256ce | ||
|
|
000e99c2d0 | ||
|
|
700347857a | ||
|
|
3b31012901 | ||
|
|
f22d568fc5 | ||
|
|
4645e67611 | ||
|
|
e70a67718e | ||
|
|
6bf88474ee | ||
|
|
7a7b6c1c12 | ||
|
|
ca312d48fa | ||
|
|
16000a8bd9 | ||
|
|
69eff9ca9d | ||
|
|
76bee82119 | ||
|
|
f7708f0214 | ||
|
|
c380a96c72 | ||
|
|
95dbed03bd | ||
|
|
39a9d62b06 | ||
|
|
36b3b3e47c | ||
|
|
8598852da1 | ||
|
|
77a4eb10e0 | ||
|
|
5a93bdc1b6 | ||
|
|
c8a610b3f7 | ||
|
|
3371466e10 | ||
|
|
cff1b35aa0 | ||
|
|
796cd85a1d | ||
|
|
e2391e2603 | ||
|
|
56a3a62368 | ||
|
|
f38325b461 | ||
|
|
b873f0af6d | ||
|
|
865473937f | ||
|
|
290d5e32e6 | ||
|
|
00dd1643f5 | ||
|
|
c3835843db | ||
|
|
4ba489eaaa | ||
|
|
f99be62256 | ||
|
|
c74fde4f40 | ||
|
|
fddf803b74 | ||
|
|
e2af5c0f2f | ||
|
|
d70b5acaf9 | ||
|
|
f91cb16005 | ||
|
|
3a2bbc9ca0 | ||
|
|
5f084f359f | ||
|
|
6588feedc7 | ||
|
|
e52abca74b | ||
|
|
90867e9824 | ||
|
|
68ac65ac05 | ||
|
|
a14444d61c | ||
|
|
98ece9faac | ||
|
|
27286e23db | ||
|
|
e60e7c96e7 | ||
|
|
a3cfe2ff8c | ||
|
|
9733700874 | ||
|
|
df7fec701d | ||
|
|
2f4925353a | ||
|
|
31d631f70d | ||
|
|
8dee0b6ff5 | ||
|
|
997c1f622b | ||
|
|
d4cb31e5d9 | ||
|
|
2c816a5e69 | ||
|
|
ce048a2196 | ||
|
|
67219797b6 | ||
|
|
6559ccc1f9 | ||
|
|
d2adfbc3b4 | ||
|
|
c3a42cb5fe | ||
|
|
9e6ce657bf | ||
|
|
f2968a2989 | ||
|
|
32b12ff0a6 | ||
|
|
b427ee9f49 | ||
|
|
98764f5065 | ||
|
|
5d8d992e5a | ||
|
|
3bed598025 | ||
|
|
713ae46d4a | ||
|
|
3fb8dce3ee | ||
|
|
12fe4ebcbb | ||
|
|
43ef3a6cd8 | ||
|
|
8a1440201e | ||
|
|
99554173e6 | ||
|
|
2e8cf8269e | ||
|
|
f681bd4f53 | ||
|
|
08a8a0d636 | ||
|
|
77a9d3b255 | ||
|
|
7c15acc18a | ||
|
|
a53cf960ae | ||
|
|
b0a90777ed | ||
|
|
efa2395527 | ||
|
|
0badae9e5d | ||
|
|
5db01248b6 | ||
|
|
37873c433d | ||
|
|
105895dd14 | ||
|
|
ce4b3b0d95 | ||
|
|
46477b7b32 | ||
|
|
31199f8705 | ||
|
|
24d93277de | ||
|
|
ec1ce5c13a | ||
|
|
5fd370c093 | ||
|
|
90cbf23f0d | ||
|
|
5e55d9f27a | ||
|
|
d5e63129dd | ||
|
|
4edd4da0b2 | ||
|
|
c1948ea198 | ||
|
|
e4b350cd7d | ||
|
|
518796c852 | ||
|
|
3f557724d3 | ||
|
|
ff372c7322 | ||
|
|
812a2bffce | ||
|
|
9413200681 | ||
|
|
8e33b413a3 | ||
|
|
2ab5a6f681 | ||
|
|
5679a38f1e | ||
|
|
ea30425a63 | ||
|
|
89fa87ba8a | ||
|
|
f4a82dcf76 | ||
|
|
1e5a07b06e | ||
|
|
582f8ad2e8 | ||
|
|
b3ace22009 | ||
|
|
f723e3f0bc | ||
|
|
c7eaa7a952 | ||
|
|
49835f9b0c | ||
|
|
4623b89aeb | ||
|
|
54fcf47887 | ||
|
|
fe608401be | ||
|
|
595897e61a | ||
|
|
a945653e73 | ||
|
|
264561895e | ||
|
|
dca1ca9c8c | ||
|
|
3e3be935c6 | ||
|
|
38038862c9 | ||
|
|
5209f04318 | ||
|
|
b2d524e702 | ||
|
|
312af2d7fb | ||
|
|
08fcb4daa4 | ||
|
|
bdfa0f82ab | ||
|
|
1317ee7ca4 | ||
|
|
aba43a67d7 | ||
|
|
dc1cfd01dc | ||
|
|
bb751033c0 | ||
|
|
abf81515a4 | ||
|
|
f1a9e6ee46 | ||
|
|
1bf0fb9eed | ||
|
|
13db347d65 | ||
|
|
460c6ce091 |
310 changed files with 22337 additions and 7320 deletions
94
.claude/settings.local.json
Normal file
94
.claude/settings.local.json
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"WebSearch",
|
||||
"Bash(cd /Volumes/SSD/Projects/Oliver/video-accessibility/backend && ruff check app/services/elevenlabs_voices.py app/services/tts.py app/api/v1/routes_tts.py app/models/job.py app/tasks/tts_synthesis.py app/core/config.py 2>&1)",
|
||||
"Bash(cd /Volumes/SSD/Projects/Oliver/video-accessibility/backend && python -m ruff check app/services/elevenlabs_voices.py app/services/tts.py app/api/v1/routes_tts.py app/models/job.py app/tasks/tts_synthesis.py app/core/config.py 2>&1)",
|
||||
"Bash(cd /Volumes/SSD/Projects/Oliver/video-accessibility/backend && pip3 show ruff 2>&1 | head -5; which pip3 2>&1)",
|
||||
"Bash(cd /Volumes/SSD/Projects/Oliver/video-accessibility/frontend && npm run type-check 2>&1 | tail -20)",
|
||||
"Bash(node_modules/.bin/tsc --noEmit 2>&1 | tail -20)",
|
||||
"Bash(./node_modules/.bin/tsc --noEmit 2>&1 | tail -30)",
|
||||
"Bash(npm run type-check 2>&1)",
|
||||
"Bash(cd /Volumes/SSD/Projects/Oliver/video-accessibility/frontend && npm run type-check 2>&1)",
|
||||
"Bash(npm run lint 2>&1)",
|
||||
"WebFetch(domain:dcmp.org)",
|
||||
"WebFetch(domain:www.w3.org)",
|
||||
"WebFetch(domain:partnerhelp.netflixstudios.com)",
|
||||
"WebFetch(domain:m.media-amazon.com)",
|
||||
"WebFetch(domain:www.acb.org)",
|
||||
"Bash(./node_modules/.bin/tsc --noEmit)",
|
||||
"Bash(node_modules/.bin/tsc --noEmit)",
|
||||
"Bash(pandoc --version)",
|
||||
"WebFetch(domain:ai-sandbox.oliver.solutions)",
|
||||
"Bash(gcloud run:*)",
|
||||
"Bash(gcloud logging:*)",
|
||||
"Bash(ssh optical:*)",
|
||||
"Bash(/Volumes/SSD/Projects/Oliver/video-accessibility/backend/.venv/bin/python3.11 -c \"import sys; sys.path.insert\\(0, '.'\\); from app.models.user import UserRole; print\\([r.value for r in UserRole]\\)\")",
|
||||
"Bash(npm list *)",
|
||||
"Bash(brew list *)",
|
||||
"Bash(npx --yes puppeteer --version)",
|
||||
"Bash(node md_to_pdf.js)",
|
||||
"Bash(npm root *)",
|
||||
"Bash(node *)",
|
||||
"Bash(ssh optical-web-1 *)",
|
||||
"Bash(git *)",
|
||||
"WebFetch(domain:docs.anthropic.com)",
|
||||
"Bash(poetry lock *)",
|
||||
"Bash(pip show *)",
|
||||
"Read(//Users/ai_leed/.local/bin/**)",
|
||||
"Read(//opt/homebrew/bin/**)",
|
||||
"Bash(pip3 install *)",
|
||||
"Bash(poetry --version)",
|
||||
"Bash(docker run *)",
|
||||
"Read(//Users/ai_leed/.docker/run/**)",
|
||||
"Bash(docker context *)",
|
||||
"Bash(DOCKER_HOST=unix:///var/run/docker.sock docker run --rm -v \"$\\(pwd\\):/app\" -w /app python:3.11-slim bash -c \"pip install poetry==1.8.2 -q && poetry lock --no-update\")",
|
||||
"Bash(brew install *)",
|
||||
"Bash(npm run *)",
|
||||
"Bash(scp /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/models/audit_log.py optical:/tmp/audit_log.py)",
|
||||
"Bash(scp *)",
|
||||
"Bash(kill %1)",
|
||||
"Bash(ssh optical-dev *)",
|
||||
"Skill(fullstack-dev-skills:security-reviewer)",
|
||||
"Bash(chmod +x *)",
|
||||
"Bash(gcloud auth *)",
|
||||
"Bash(gcloud config *)",
|
||||
"Bash(gcloud artifacts *)",
|
||||
"Bash(sed -n '190,200p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/api/v1/routes_jobs.py)",
|
||||
"Bash(sed -n '1914,1922p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/api/v1/routes_jobs.py)",
|
||||
"Bash(sed -n '2048,2062p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/api/v1/routes_jobs.py)",
|
||||
"Bash(sed -n '2490,2502p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/api/v1/routes_jobs.py)",
|
||||
"Bash(sed -n '2628,2638p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/api/v1/routes_jobs.py)",
|
||||
"Bash(gcloud builds submit *)",
|
||||
"Bash(gcloud builds describe 79802b34-e17b-4446-b01d-68d99d569262 *)",
|
||||
"Bash(gcloud compute instances list *)",
|
||||
"Bash(gcloud compute networks vpc-access connectors list *)",
|
||||
"Bash(gcloud builds *)",
|
||||
"Bash(gcloud projects get-iam-policy optical-414516 *)",
|
||||
"Bash(gcloud projects *)",
|
||||
"Bash(npm audit *)",
|
||||
"Skill(codebase-audit-suite:ln-622-build-auditor)",
|
||||
"Skill(codebase-audit-suite:ln-624-code-quality-auditor)",
|
||||
"Skill(codebase-audit-suite:ln-625-dependencies-auditor)",
|
||||
"Skill(codebase-audit-suite:ln-626-dead-code-auditor)",
|
||||
"Bash(/opt/homebrew/bin/ruff check *)",
|
||||
"Bash(npm test *)",
|
||||
"Bash(sed -n '35,42p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/frontend/src/test/utils.tsx)",
|
||||
"Bash(sed -n '55,90p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/frontend/tests/helpers/auth.ts)",
|
||||
"Bash(sed -n '48,60p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/frontend/src/components/Layout/Sidebar.tsx)",
|
||||
"Bash(sed -n '152,170p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/frontend/src/components/Layout/Sidebar.tsx)",
|
||||
"Bash(poetry env *)",
|
||||
"Bash(poetry install *)",
|
||||
"Bash(poetry run *)",
|
||||
"Bash(docker info *)",
|
||||
"Bash(sed -n '1,30p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/services/gcs.py)",
|
||||
"Bash(sed -n '155,165p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/services/gcs.py)",
|
||||
"Bash(gcloud secrets *)",
|
||||
"Bash(openssl rand *)",
|
||||
"Bash(ssh *)",
|
||||
"Skill(commit-commands:commit-push-pr)",
|
||||
"Bash(obsidian read *)",
|
||||
"Bash(obsidian search *)"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
@ -9,18 +9,18 @@
|
|||
# App Configuration
|
||||
# -----------------------------------------------------------------------------
|
||||
APP_ENV=prod
|
||||
API_BASE_URL=https://ai-sandbox.oliver.solutions/video-accessibility-back
|
||||
API_BASE_URL=https://optical-dev.oliver.solutions/video-accessibility
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Authentication & Security
|
||||
# -----------------------------------------------------------------------------
|
||||
# IMPORTANT: Generate a secure random secret for JWT_SECRET
|
||||
# Example: openssl rand -hex 32
|
||||
JWT_SECRET=CHANGE_ME_TO_SECURE_RANDOM_64_CHAR_STRING
|
||||
JWT_SECRET=d81fd31798510f53b374951908b6bedd75f7ddaabe9b4e4c4ca5bf81393f48b7
|
||||
JWT_ALG=HS256
|
||||
JWT_ACCESS_TTL_MIN=240
|
||||
JWT_REFRESH_TTL_DAYS=7
|
||||
COOKIE_DOMAIN=ai-sandbox.oliver.solutions
|
||||
COOKIE_DOMAIN=optical-dev.oliver.solutions
|
||||
COOKIE_SECURE=true
|
||||
COOKIE_SAMESITE=Lax
|
||||
|
||||
|
|
@ -63,29 +63,31 @@ TRANSLATE_API_KEY=
|
|||
ELEVENLABS_API_KEY=sk_c17be2768ca784f1807018420b84c7f1ee969946e698f986
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Email Configuration (SendGrid)
|
||||
# Email Configuration (Mailgun)
|
||||
# -----------------------------------------------------------------------------
|
||||
# IMPORTANT: Get SendGrid API key from https://app.sendgrid.com/settings/api_keys
|
||||
SENDGRID_API_KEY=
|
||||
MAILGUN_API_KEY=1d8c6f38c53f237305353cc2e55f39f2-c6620443-4b9961f5
|
||||
MAILGUN_DOMAIN=mg.oliver.solutions
|
||||
MAILGUN_FROM=noreply@mg.oliver.solutions
|
||||
|
||||
# Email sender address (must be verified in SendGrid)
|
||||
EMAIL_FROM=noreply@ai-sandbox.oliver.solutions
|
||||
# Email sender address
|
||||
EMAIL_FROM=noreply@mg.oliver.solutions
|
||||
|
||||
# Client-facing URL (used in emails)
|
||||
CLIENT_BASE_URL=https://ai-sandbox.oliver.solutions/video-accessibility
|
||||
CLIENT_BASE_URL=https://optical-dev.oliver.solutions/video-accessibility
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Microsoft Authentication (Azure AD)
|
||||
# -----------------------------------------------------------------------------
|
||||
AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
|
||||
AZURE_AUTHORITY=https://login.microsoftonline.com/e519c2e6-bc6d-4fdf-8d9c-923c2f002385
|
||||
AZURE_REDIRECT_URI=https://ai-sandbox.oliver.solutions/video-accessibility/
|
||||
AZURE_REDIRECT_URI=https://optical-dev.oliver.solutions/video-accessibility/
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# CORS Configuration
|
||||
# -----------------------------------------------------------------------------
|
||||
# Comma-separated list of allowed origins
|
||||
CORS_ORIGINS=https://ai-sandbox.oliver.solutions
|
||||
CORS_ORIGINS=https://optical-dev.oliver.solutions
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Observability & Monitoring (Optional)
|
||||
|
|
@ -116,6 +118,9 @@ OTEL_EXPORTER_OTLP_ENDPOINT=
|
|||
WHISPER_SERVICE_URL=https://whisper-http-service-bcb6ipdqka-uc.a.run.app
|
||||
FFMPEG_SERVICE_URL=https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app
|
||||
|
||||
# optical-dev uses Celery workers (not Cloud Run Jobs) for pipeline dispatch
|
||||
USE_CELERY_FALLBACK=true
|
||||
|
||||
# Worker Concurrency (higher values for Cloud Run mode since workers just make HTTP calls)
|
||||
WHISPER_WORKER_CONCURRENCY=10
|
||||
FFMPEG_WORKER_CONCURRENCY=20
|
||||
|
|
|
|||
23
.env.screenshots.example
Normal file
23
.env.screenshots.example
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
# Screenshot capture credentials — copy to .env.screenshots and fill in values
|
||||
# NEVER commit .env.screenshots (it is gitignored)
|
||||
|
||||
BASE_URL=https://optical-dev.oliver.solutions/video-accessibility
|
||||
|
||||
# Local-password admin seeded by backend/scripts/seed_test_users.py
|
||||
TEST_ADMIN_EMAIL=test-admin@oliver.agency
|
||||
TEST_ADMIN_PASSWORD=TestAdmin2026!
|
||||
|
||||
TEST_CLIENT_EMAIL=test-client@oliver.agency
|
||||
TEST_CLIENT_PASSWORD=TestClient2026!
|
||||
|
||||
TEST_LINGUIST_EMAIL=test-linguist@oliver.agency
|
||||
TEST_LINGUIST_PASSWORD=TestLinguist2026!
|
||||
|
||||
TEST_REVIEWER_EMAIL=test-reviewer@oliver.agency
|
||||
TEST_REVIEWER_PASSWORD=TestReviewer2026!
|
||||
|
||||
TEST_PRODUCTION_EMAIL=test-production@oliver.agency
|
||||
TEST_PRODUCTION_PASSWORD=TestProduction2026!
|
||||
|
||||
TEST_PM_EMAIL=test-pm@oliver.agency
|
||||
TEST_PM_PASSWORD=TestPM2026!
|
||||
13
.gitignore
vendored
13
.gitignore
vendored
|
|
@ -12,6 +12,7 @@ examples/
|
|||
.env.local
|
||||
.env.production
|
||||
.env.*.local
|
||||
.env.screenshots
|
||||
secrets/
|
||||
*.pem
|
||||
*.key
|
||||
|
|
@ -98,3 +99,15 @@ docs/*.pdf
|
|||
/var/www/html/video-accessibility.backup.*
|
||||
|
||||
backend/.env
|
||||
|
||||
# Node / npm artifacts at repo root (Playwright MCP installs these)
|
||||
node_modules/
|
||||
package.json
|
||||
package-lock.json
|
||||
|
||||
# Playwright MCP session snapshots
|
||||
.playwright-mcp/
|
||||
|
||||
# Test videos
|
||||
test-video.mp4
|
||||
.worktrees/
|
||||
|
|
|
|||
118
.hex-skills/audit-reports/ln-622--build-health.md
Normal file
118
.hex-skills/audit-reports/ln-622--build-health.md
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
# Build Health Audit — ln-622
|
||||
**Score: 5.5/10** | Issues: 28 (C:0 H:5 M:18 L:5)
|
||||
**Date:** 2026-04-30 | **Stack:** Python 3.11 / FastAPI / Celery + React 19 / Vite / TypeScript 5.8
|
||||
|
||||
---
|
||||
|
||||
## 1. Compiler / Linter Errors
|
||||
|
||||
### Backend — ruff: 1314 errors (HIGH)
|
||||
`ruff check app/` exits non-zero with 1314 violations. The ruff config in `pyproject.toml` uses **deprecated top-level `select`/`ignore`/`per-file-ignores`** instead of `[tool.ruff.lint]` — ruff emits a warning on every run.
|
||||
|
||||
Top violation codes:
|
||||
| Code | Meaning | Volume |
|
||||
|------|---------|--------|
|
||||
| I001 | Import block unsorted | ~400 |
|
||||
| UP | pyupgrade (f-strings, typing aliases) | ~500 |
|
||||
| B | flake8-bugbear | ~200 |
|
||||
| F401 | Unused import | 58 |
|
||||
|
||||
Most violations are **auto-fixable** (`ruff check --fix`). The unsorted imports and UP rules are cosmetic but make CI noisy and block future enforcement.
|
||||
|
||||
**Severity: HIGH** — CI cannot gate on ruff without fixing this first.
|
||||
|
||||
### Frontend — ESLint: 36 problems (30 errors, 6 warnings) (MEDIUM)
|
||||
Key errors:
|
||||
| File | Rule | Count |
|
||||
|------|------|-------|
|
||||
| `contexts/GlobalWebSocketContext.tsx:56` | `react-refresh/only-export-components` | 1 |
|
||||
| `contexts/NotificationContext.tsx:91` | `react-refresh/only-export-components` | 1 |
|
||||
| `contexts/ToastContext.tsx:83` | `react-refresh/only-export-components` | 1 |
|
||||
| `lib/api.ts:539` | `@typescript-eslint/no-explicit-any` | 1 |
|
||||
| `routes/admin/QCDetail.tsx` | `@typescript-eslint/no-explicit-any` | 6 |
|
||||
| `routes/AcceptInvite.tsx` | `@typescript-eslint/no-explicit-any` | 1 |
|
||||
| `routes/jobs/JobDetail.tsx` | `no-unused-vars` (err catch) | 2 |
|
||||
| `hooks/__tests__/useJob.test.tsx` | `no-unused-vars` | 1 |
|
||||
| `tests/helpers/auth.ts` | `no-explicit-any` | 3 |
|
||||
|
||||
**Severity: MEDIUM** — build succeeds, but `any` types and react-refresh errors degrade DX and HMR.
|
||||
|
||||
---
|
||||
|
||||
## 2. Type Errors
|
||||
|
||||
### Frontend — tsc: CLEAN ✓
|
||||
`tsc --noEmit` exits 0. No TypeScript compilation errors. The `any` issues above are ESLint-level, not tsc errors.
|
||||
|
||||
### Backend — mypy: NOT RUN
|
||||
Cannot run mypy outside the poetry venv. Needs `poetry run mypy .` inside Docker or an activated venv.
|
||||
|
||||
**Severity: LOW** (mypy not blocking, but should be run in CI)
|
||||
|
||||
---
|
||||
|
||||
## 3. Tests
|
||||
|
||||
### Frontend — vitest: 13 failed / 75 total (HIGH)
|
||||
8 test files affected:
|
||||
|
||||
| Test | Failures | Root cause |
|
||||
|------|----------|-----------|
|
||||
| `auth.test.ts` | 1 | Mock shape mismatch — response has extra field `organizationId` |
|
||||
| `StatusBadge.test.tsx` | 1 | Unknown status no longer renders text (component changed) |
|
||||
| `VttEditor.test.tsx` | 1 | Multiple elements found for `Insert cue before` title — DOM duplication |
|
||||
| `useJob.test.tsx` | 3 | `useApproveEnglish` — pending state never resolves in test (timeout 1s); `useCreateJob` arg mismatch |
|
||||
| `UploadDropzone.test.tsx` | 6 | Text broken across elements — test uses exact string match, component renders in `<span>` nodes |
|
||||
| `useJobStatusWebSocket.test.tsx` | 1 | (see output) |
|
||||
|
||||
**Severity: HIGH** — 17% test failure rate. Several are stale tests from component refactors (UploadDropzone, StatusBadge).
|
||||
|
||||
### Backend — pytest: CANNOT RUN (CRITICAL)
|
||||
Running `pytest` outside poetry venv fails with `ModuleNotFoundError` for `fastapi`, `aiohttp`, etc. Tests must be run with `poetry run pytest` inside Docker or an activated poetry environment.
|
||||
|
||||
The `backend/.venv` exists but appears to be a plain venv, not the poetry-managed one. **Tests are effectively unrunnable in local dev without explicit poetry activation.**
|
||||
|
||||
**Severity: CRITICAL** — Developers with system Python cannot run tests without explicit setup steps.
|
||||
|
||||
---
|
||||
|
||||
## 4. Build Configuration Issues
|
||||
|
||||
### ruff config deprecated (MEDIUM)
|
||||
`pyproject.toml` uses `[tool.ruff]` top-level `select`, `ignore`, `per-file-ignores`. Current ruff ≥ 0.2 expects `[tool.ruff.lint]`. Fix:
|
||||
|
||||
```toml
|
||||
# Before
|
||||
[tool.ruff]
|
||||
select = ["E", "W", ...]
|
||||
ignore = ["E501", ...]
|
||||
|
||||
# After
|
||||
[tool.ruff]
|
||||
target-version = "py311"
|
||||
line-length = 88
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "W", ...]
|
||||
ignore = ["E501", ...]
|
||||
```
|
||||
|
||||
### Backend venv mismatch (MEDIUM)
|
||||
`backend/.venv` cannot run `ruff`, `pytest`, or `mypy` — they are installed in the poetry-managed venv, not this one. Confusing to new devs.
|
||||
|
||||
### AGENTS.md commands incorrect (LOW)
|
||||
`AGENTS.md` documents `cd backend && poetry run pytest` but the backend has `.venv` and `pyproject.toml` with no Makefile wrapper. The actual working path is `cd backend && .venv/bin/python -m pytest` or requires `poetry shell`.
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
| Check | Result | Severity |
|
||||
|-------|--------|---------|
|
||||
| ruff backend | 1314 violations (auto-fixable) | HIGH |
|
||||
| ESLint frontend | 36 problems | MEDIUM |
|
||||
| tsc frontend | ✓ Clean | OK |
|
||||
| mypy backend | Not runnable locally | LOW |
|
||||
| vitest frontend | 13/75 failing | HIGH |
|
||||
| pytest backend | Not runnable locally | CRITICAL |
|
||||
| ruff config | Deprecated syntax | MEDIUM |
|
||||
| venv setup | Confusing / broken | MEDIUM |
|
||||
116
.hex-skills/audit-reports/ln-624--code-quality.md
Normal file
116
.hex-skills/audit-reports/ln-624--code-quality.md
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
# Code Quality Audit — ln-624
|
||||
**Score: 5.0/10** | Issues: 22 (C:2 H:8 M:9 L:3)
|
||||
**Date:** 2026-04-30
|
||||
|
||||
---
|
||||
|
||||
## 1. God Classes / Files (> 500 lines)
|
||||
|
||||
| File | Lines | Severity |
|
||||
|------|-------|---------|
|
||||
| `backend/app/api/v1/routes_jobs.py` | 2882 | **CRITICAL** |
|
||||
| `frontend/src/routes/admin/QCDetail.tsx` | 2079 | **CRITICAL** |
|
||||
| `backend/app/services/video_renderer.py` | 1695 | **HIGH** |
|
||||
| `frontend/src/routes/jobs/JobsList.tsx` | 1246 | **HIGH** |
|
||||
| `frontend/src/lib/api.ts` | 1056 | **HIGH** |
|
||||
| `backend/app/tasks/translate_and_synthesize.py` | 1019 | **HIGH** |
|
||||
| `frontend/src/routes/jobs/NewJob.tsx` | 1038 | **HIGH** |
|
||||
| `frontend/src/types/api.ts` | 891 | **MEDIUM** |
|
||||
| `frontend/src/routes/jobs/JobDetail.tsx` | 732 | **MEDIUM** |
|
||||
| `frontend/src/routes/admin/UserDetail.tsx` | 523 | **MEDIUM** |
|
||||
| `frontend/src/hooks/useJobStatusWebSocket.ts` | 443 | **MEDIUM** |
|
||||
|
||||
**routes_jobs.py at 2882 lines** is the worst offender — it mixes upload, approval, translation, TTS, VTT editing, download, admin, and websocket concerns in a single router. Splitting by domain (e.g., `routes_upload.py`, `routes_vtt.py`, `routes_review.py`, `routes_tts.py`) would bring each under 500 lines.
|
||||
|
||||
**QCDetail.tsx at 2079 lines** handles the entire QC workflow, VTT display, audio preview, language selection, and approval modals in one component. Needs extraction of at minimum: `LanguageQCPanel`, `VttReviewView`, `ApprovalModal`.
|
||||
|
||||
---
|
||||
|
||||
## 2. Long Methods (> 100 lines)
|
||||
|
||||
| File:line | Function | Length | Severity |
|
||||
|-----------|---------|--------|---------|
|
||||
| `tasks/translate_and_synthesize.py:109` | `_async_translate_and_synthesize()` | 485 lines | **CRITICAL** |
|
||||
| `services/video_renderer.py:487` | `_render_pause_insert_method()` | 419 lines | **CRITICAL** |
|
||||
| `tasks/ingest_and_ai.py:53` | `ingest_and_ai_task_impl()` | 276 lines | **HIGH** |
|
||||
| `tasks/rerender_accessible_video.py:110` | `_async_rerender_accessible_video()` | 280 lines | **HIGH** |
|
||||
| `tasks/render_accessible_video.py:56` | `_async_render_accessible_video()` | 287 lines | **HIGH** |
|
||||
| `api/v1/routes_jobs.py:1552` | `update_job_vtt_content()` | 215 lines | **HIGH** |
|
||||
| `tasks/notify.py:29` | `run_async()` | 169 lines | **HIGH** |
|
||||
| `api/v1/routes_jobs.py:2738` | `update_tts_preferences()` | 144 lines | **MEDIUM** |
|
||||
| `services/whisper_service.py:241` | `_find_sentence_boundaries()` | 120 lines | **MEDIUM** |
|
||||
| `services/gemini.py:591` | `analyze_accessible_video_placement()` | 132 lines | **MEDIUM** |
|
||||
|
||||
The two most critical ones (`_async_translate_and_synthesize` at 485 lines and `_render_pause_insert_method` at 419 lines) are orchestrator-style functions with sequential pipeline steps. They could be split into named pipeline stages, each ~50 lines.
|
||||
|
||||
---
|
||||
|
||||
## 3. Deep Nesting
|
||||
|
||||
Not systematically scanned with a tool (radon/lizard not installed). The long functions above likely contain 4–5+ nesting levels given their complexity.
|
||||
|
||||
---
|
||||
|
||||
## 4. Too Many Parameters
|
||||
|
||||
| Location | Function | Params | Severity |
|
||||
|----------|---------|--------|---------|
|
||||
| `services/gemini.py` | `extract_accessibility_targeted()` | 7+ | **MEDIUM** |
|
||||
| `tasks/translate_and_synthesize.py` | `_generate_language_tts()` | 8+ | **MEDIUM** |
|
||||
|
||||
Pattern: many functions pass `db`, `job`, `language`, `settings`, `gcs_client`, etc. individually instead of grouping into a context dataclass.
|
||||
|
||||
---
|
||||
|
||||
## 5. Magic Numbers
|
||||
|
||||
### Backend (MEDIUM)
|
||||
Scattered timing constants without named definitions:
|
||||
- TTS retry delays (hardcoded seconds)
|
||||
- chunk sizes in upload
|
||||
- Audio padding values in video_renderer.py
|
||||
|
||||
### Frontend (LOW)
|
||||
Mostly clean. Some inline pixel values in Tailwind (acceptable). No concerning business-logic magic numbers found.
|
||||
|
||||
---
|
||||
|
||||
## 6. N+1 Query Patterns (MEDIUM)
|
||||
|
||||
Potential N+1 patterns found:
|
||||
- `app/main.py:102` — `async for job_doc in db.jobs.find(...)` — check if this iterates and makes additional queries per document
|
||||
- `app/core/dependencies.py:185` — `async for m in db.memberships.find(...)` — membership lookup per request in auth middleware (acceptable if cached, but no caching observed)
|
||||
- `app/core/authz.py:54` — `async for doc in db.memberships.find(...)` — similar pattern in auth check
|
||||
|
||||
These are all async iterators over `find()` — not necessarily N+1 if no nested DB calls, but should be reviewed for `.find()` calls inside the loop body.
|
||||
|
||||
---
|
||||
|
||||
## 7. Method Signature Quality
|
||||
|
||||
### Boolean flag parameters (MEDIUM)
|
||||
Several async functions in tasks accept `bool` flags controlling behavior variants (e.g., `skip_tts`, `force_regenerate`). These should be enums or separate functions.
|
||||
|
||||
### Unclear return types (MEDIUM)
|
||||
Some routes return `dict` or untyped responses instead of Pydantic response models. `routes_admin_production.py` has a few endpoints returning bare dicts.
|
||||
|
||||
---
|
||||
|
||||
## 8. Side-Effect Cascade Depth
|
||||
|
||||
`_async_translate_and_synthesize()` at 485 lines is the worst case: it writes to GCS, updates MongoDB, dispatches TTS tasks, sends notifications, and updates job status — 5+ distinct side-effect categories from a single function call. This warrants extraction into an orchestrator that delegates to named sink functions.
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
| Check | Status | Severity |
|
||||
|-------|--------|---------|
|
||||
| God files (>500L) | 11 files | CRITICAL×2, HIGH×4 |
|
||||
| Long methods (>100L) | 10 functions | CRITICAL×2, HIGH×5 |
|
||||
| N+1 patterns | 3 potential | MEDIUM |
|
||||
| Magic numbers | Some in tasks | MEDIUM |
|
||||
| Method signatures | Boolean flags, unclear returns | MEDIUM |
|
||||
| Side-effect cascade | translate_and_synthesize | HIGH |
|
||||
|
||||
**Primary recommendation:** Split `routes_jobs.py` and `QCDetail.tsx` — these two files account for the majority of the quality debt.
|
||||
94
.hex-skills/audit-reports/ln-625--dependencies.md
Normal file
94
.hex-skills/audit-reports/ln-625--dependencies.md
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
# Dependencies & Reuse Audit — ln-625
|
||||
**Score: 7.5/10** | Issues: 9 (C:0 H:2 M:5 L:2)
|
||||
**Date:** 2026-04-30
|
||||
|
||||
---
|
||||
|
||||
## 1. Vulnerability Scan (CVE/CVSS)
|
||||
|
||||
### Frontend — npm audit: ✓ CLEAN
|
||||
```
|
||||
Total packages: 479
|
||||
Vulnerabilities: info:0 low:0 moderate:0 high:0 critical:0 total:0
|
||||
```
|
||||
Zero CVEs. Excellent.
|
||||
|
||||
### Backend — pip-audit: NOT RUN
|
||||
`pip-audit` not installed in local env. Recommended to add to CI:
|
||||
```bash
|
||||
pip install pip-audit && pip-audit -r requirements.txt
|
||||
```
|
||||
Given many heavy deps (Celery 5.3, google-cloud-*, faster-whisper, aiohttp), a CI scan is strongly advised.
|
||||
|
||||
---
|
||||
|
||||
## 2. Outdated Packages
|
||||
|
||||
### Frontend — npm outdated (many minor/major updates pending)
|
||||
|
||||
**MAJOR version gaps (HIGH):**
|
||||
| Package | Installed | Latest | Notes |
|
||||
|---------|-----------|--------|-------|
|
||||
| `@azure/msal-browser` | 4.25.0 | **5.9.0** | MSAL v5 has breaking API changes |
|
||||
| `@azure/msal-react` | 3.0.20 | **5.3.2** | Paired with msal-browser, coordinated upgrade needed |
|
||||
| `@sentry/react` | 8.55.0 | **10.51.0** | Sentry v10 has breaking changes |
|
||||
| `typescript` | 5.8.3 | **6.0.3** | TS 6 has strictness changes |
|
||||
| `vite` | 7.3.2 | **8.0.10** | Vite 8 breaking changes |
|
||||
| `eslint` | 9.33.0 | **10.2.1** | ESLint 10 config format may change |
|
||||
| `jsdom` | 26.1.0 | **29.1.1** | Test environment |
|
||||
|
||||
**Minor updates (LOW-MEDIUM):** Most other packages have minor/patch updates pending (react 19.1→19.2, tailwindcss 4.1→4.2, etc.)
|
||||
|
||||
**Recommendation:** Keep MSAL and Sentry on current major until dedicated upgrade sprint. React, TailwindCSS, react-query minor updates are safe to apply immediately.
|
||||
|
||||
### Backend — pip outdated: pip-audit not available
|
||||
Based on pyproject.toml dates vs ecosystem:
|
||||
- `ruff ^0.1.6` → installed ruff is `0.15.12` (already updated, good)
|
||||
- `google-genai ^1.56.0` → recently updated per git log
|
||||
- `faster-whisper ^1.2.0` → check for 1.x updates
|
||||
|
||||
---
|
||||
|
||||
## 3. Unused Dependencies
|
||||
|
||||
### Backend — `sendgrid` (MEDIUM)
|
||||
`pyproject.toml` lists `sendgrid = "^6.11.0"`. However:
|
||||
- The actual emailer (`app/services/emailer.py`) uses **Mailgun** REST API via `httpx`
|
||||
- `sendgrid` is referenced **only** in `app/core/config.py` as a dead config field `sendgrid_api_key: str = ""` with comment `# Email (Mailgun — primary; sendgrid_api_key kept for backward compat)`
|
||||
- No `import sendgrid` anywhere in app code
|
||||
|
||||
**Action:** Remove `sendgrid` from `pyproject.toml` dependencies and remove the `sendgrid_api_key` config field.
|
||||
|
||||
### Frontend — no unused dependencies found
|
||||
- `axios` → used in `lib/api.ts`
|
||||
- `@azure/msal-*` → used in `main.tsx`, `routes/Login.tsx`
|
||||
- `date-fns` → used in 5+ components
|
||||
- `zustand`, `@tanstack/react-query`, `react-hook-form`, `zod` → all actively used
|
||||
- `react-dropzone` → used in upload components
|
||||
|
||||
---
|
||||
|
||||
## 4. Available Native Alternatives
|
||||
|
||||
### Frontend — axios vs fetch (LOW)
|
||||
`axios` is used for all API calls in `lib/api.ts`. The project targets modern browsers and uses Vite. Native `fetch` + `AbortController` could replace axios, reducing bundle by ~14kb gzipped. However, axios provides request/response interceptors that are actively used for auth token refresh — migration effort is medium. **Not urgent.**
|
||||
|
||||
---
|
||||
|
||||
## 5. Custom Implementations
|
||||
|
||||
No custom crypto or hand-rolled validation libraries found. All auth uses `python-jose` + `libpass` (bcrypt). VTT parsing is domain-specific and not replaceable by a library. No concerns.
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
| Check | Result | Severity |
|
||||
|-------|--------|---------|
|
||||
| Frontend CVEs | ✓ 0 vulnerabilities | OK |
|
||||
| Backend CVEs | ⚠ Not scanned | MEDIUM |
|
||||
| Frontend major updates | MSAL×2, Sentry, TS, Vite, ESLint | HIGH |
|
||||
| Frontend minor updates | Many | LOW |
|
||||
| Backend unused dep | `sendgrid` in pyproject.toml | MEDIUM |
|
||||
| Native alternatives | axios → fetch possible | LOW |
|
||||
| Custom implementations | None found | OK |
|
||||
143
.hex-skills/audit-reports/ln-626--dead-code.md
Normal file
143
.hex-skills/audit-reports/ln-626--dead-code.md
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
# Dead Code Audit — ln-626
|
||||
**Score: 7.0/10** | Issues: 14 (C:0 H:0 M:6 L:8)
|
||||
**Date:** 2026-04-30
|
||||
|
||||
---
|
||||
|
||||
## 1. Unused Imports (Python — F401)
|
||||
|
||||
ruff detected **58 unused import violations** across backend. Sample:
|
||||
|
||||
| File | Unused import |
|
||||
|------|--------------|
|
||||
| `routes_admin.py:9` | `get_current_user` |
|
||||
| `routes_admin.py:11` | `verify_password` |
|
||||
| `routes_admin.py:16` | `ChangePasswordRequest` |
|
||||
| `routes_admin.py:23` | `log_security_event` |
|
||||
| (+ 54 more across all files) | |
|
||||
|
||||
All are auto-fixable with `ruff check --fix --select F401`. The `__init__.py` files are correctly excluded via `per-file-ignores`.
|
||||
|
||||
**Severity: MEDIUM** — clutters imports, increases cognitive load when reading files.
|
||||
|
||||
---
|
||||
|
||||
## 2. Deprecated / Legacy Types (Frontend)
|
||||
|
||||
`frontend/src/types/api.ts` contains 3 deprecated exported types with JSDoc markers:
|
||||
|
||||
| Line | Type | Marker |
|
||||
|------|------|--------|
|
||||
| 96 | `TtsVoicesResponse` | `@deprecated Use ProviderVoicesResponse instead` |
|
||||
| 137 | `TtsOptionsResponse` | `@deprecated Use ProviderOptionsResponse instead` |
|
||||
| 555-566 | `Client` / `OrganizationLegacy` | `@deprecated Use Organization instead` + `export { Client as OrganizationLegacy }` |
|
||||
|
||||
These types are still exported, meaning consumers could use them by mistake. If no external consumers exist (library not published), they should be deleted.
|
||||
|
||||
**Severity: MEDIUM** — active deprecation markers indicate intent to remove. Leaving them causes confusion.
|
||||
|
||||
---
|
||||
|
||||
## 3. Legacy Status Values (Frontend)
|
||||
|
||||
`frontend/src/types/api.ts:12,14`:
|
||||
```ts
|
||||
| "tts_failed" // legacy: keep for back-compat
|
||||
| "render_failed" // legacy: keep for back-compat
|
||||
```
|
||||
|
||||
These job statuses are marked as legacy. If the backend no longer emits them, they are dead type branches. If it still does (for old jobs in MongoDB), they're valid — but should be clearly documented with a removal condition.
|
||||
|
||||
**Severity: LOW** — no runtime impact, but requires clarification.
|
||||
|
||||
---
|
||||
|
||||
## 4. Backward Compatibility Code (Frontend)
|
||||
|
||||
### lib/api.ts:239 — Legacy approval method (MEDIUM)
|
||||
```ts
|
||||
// Legacy method - calls approve_source for backwards compatibility
|
||||
```
|
||||
A backward-compat shim in the API client. If all callers have been updated to the new method, this should be removed.
|
||||
|
||||
### VideoWithCaptions.tsx:16–43 — Legacy single-language props (MEDIUM)
|
||||
```ts
|
||||
// Legacy single-language props (still supported)
|
||||
sourceLanguage?: string; // Language code for legacy props
|
||||
// Legacy props
|
||||
// Combine legacy props with tracks (use useMemo to prevent recreation)
|
||||
```
|
||||
The component maintains backward-compat with old single-language prop API. If no callers use these legacy props, they can be removed.
|
||||
|
||||
### JobDetail.tsx:41 — Legacy status mapping (LOW)
|
||||
```ts
|
||||
// Handle legacy approved_english/approved_source statuses (map to pending_final_review)
|
||||
```
|
||||
Status mapping shim for old job records. Should be removed after all existing jobs are migrated.
|
||||
|
||||
---
|
||||
|
||||
## 5. Commented-Out Code (Backend)
|
||||
|
||||
| File | Line | Content |
|
||||
|------|------|---------|
|
||||
| `telemetry/tracing.py:5` | `# from opentelemetry.exporter.gcp.trace import CloudTraceSpanExporter # Disabled for local dev` | GCP trace exporter disabled |
|
||||
| `telemetry/metrics.py:5` | `# from opentelemetry.exporter.prometheus import PrometheusMetricReader # Disabled for local dev` | Prometheus reader disabled |
|
||||
| `pyproject.toml` | `# opentelemetry-exporter-prometheus = ... # Temporarily disabled - version conflicts` | Dep commented out |
|
||||
|
||||
These are intentional (local dev vs prod config), not dead code. However, the conditional should be expressed via environment config, not source comments. **Low priority.**
|
||||
|
||||
**Severity: LOW**
|
||||
|
||||
---
|
||||
|
||||
## 6. Leftover .old Files (MEDIUM)
|
||||
|
||||
| File | Age | Action |
|
||||
|------|-----|--------|
|
||||
| `docker-compose.yml.old` | Created 2026-03-03 (~2 months) | Delete |
|
||||
| `backend/Dockerfile.old` | Created 2026-03-03 (~2 months) | Delete |
|
||||
| `backend/.dockerignore.old` | — | Delete |
|
||||
|
||||
These files have no build references. Git history preserves them.
|
||||
|
||||
---
|
||||
|
||||
## 7. Unused Dockerfiles
|
||||
|
||||
| File | Referenced in compose? |
|
||||
|------|----------------------|
|
||||
| `backend/Dockerfile.ffmpeg-service` | No — ffmpeg is embedded in main worker |
|
||||
| `backend/Dockerfile.cloudrun` | Yes — referenced for Cloud Run deploys |
|
||||
| `backend/Dockerfile.whisper-service` | Yes — whisper-worker service in compose |
|
||||
|
||||
`Dockerfile.ffmpeg-service` appears to be dead — the main Dockerfile handles ffmpeg. Should be confirmed and deleted if unused.
|
||||
|
||||
**Severity: LOW**
|
||||
|
||||
---
|
||||
|
||||
## 8. Dead Config Field
|
||||
|
||||
`backend/app/core/config.py:272`:
|
||||
```python
|
||||
sendgrid_api_key: str = "" # Email (Mailgun — primary; sendgrid_api_key kept for backward compat)
|
||||
```
|
||||
`sendgrid` package not used. Config field and `secrets_config.py` secret reference both dead.
|
||||
|
||||
**Severity: MEDIUM** — misleads ops into configuring a sendgrid secret that has no effect.
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
| Check | Issues | Severity |
|
||||
|-------|--------|---------|
|
||||
| Unused Python imports | 58 (auto-fixable) | MEDIUM |
|
||||
| Deprecated TS types | 3 types | MEDIUM |
|
||||
| Backward-compat shims | 3 in frontend | MEDIUM |
|
||||
| Commented-out code | 3 telemetry lines | LOW |
|
||||
| .old files | 3 files | MEDIUM |
|
||||
| Unused Dockerfile | Dockerfile.ffmpeg-service | LOW |
|
||||
| Dead config field | sendgrid_api_key | MEDIUM |
|
||||
| Legacy status values | 2 status strings | LOW |
|
||||
|
|
@ -1,172 +1,96 @@
|
|||
# =============================================================================
|
||||
# Apache Configuration for Accessible Video Platform
|
||||
# =============================================================================
|
||||
# Add this configuration to your existing VirtualHost for ai-sandbox.oliver.solutions
|
||||
# Location: /etc/apache2/sites-available/ai-sandbox.oliver.solutions-ssl.conf
|
||||
# Apache config fragment — Accessible Video Platform
|
||||
# Inject into: /etc/apache2/sites-available/optical-dev.oliver.solutions-ssl.conf
|
||||
#
|
||||
# Required modules:
|
||||
# sudo a2enmod proxy proxy_http proxy_wstunnel rewrite headers
|
||||
#
|
||||
# Container port map:
|
||||
# accessible-video-api → 0.0.0.0:8012->8000/tcp
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Frontend - Static React SPA served from subdirectory
|
||||
# -----------------------------------------------------------------------------
|
||||
# ── Timeouts for large video uploads (up to 2 GB, ~10 min) ──────────────────
|
||||
<IfModule mod_proxy.c>
|
||||
ProxyTimeout 600
|
||||
</IfModule>
|
||||
|
||||
# Serve frontend from /video-accessibility subdirectory
|
||||
# ── WebSocket proxy (MUST be before /api/ HTTP proxy) ───────────────────────
|
||||
# disablereuse=on prevents long-lived WS connections from exhausting the pool
|
||||
ProxyPassMatch ^/video-accessibility/api/v1/ws/(.*)$ ws://127.0.0.1:8012/api/v1/ws/$1 disablereuse=on
|
||||
ProxyPassReverse /video-accessibility/api/v1/ws/ ws://127.0.0.1:8012/api/v1/ws/
|
||||
|
||||
# ── API proxy ────────────────────────────────────────────────────────────────
|
||||
# Strips /video-accessibility prefix — FastAPI sees /api/v1/...
|
||||
ProxyPassMatch ^/video-accessibility/api/(.*)$ http://127.0.0.1:8012/api/$1
|
||||
ProxyPassReverse /video-accessibility/api/ http://127.0.0.1:8012/api/
|
||||
|
||||
# Swagger / OpenAPI
|
||||
ProxyPassMatch ^/video-accessibility/docs(/.*)?$ http://127.0.0.1:8012/docs$1
|
||||
ProxyPassReverse /video-accessibility/docs http://127.0.0.1:8012/docs
|
||||
ProxyPassMatch ^/video-accessibility/openapi\.json$ http://127.0.0.1:8012/openapi.json
|
||||
ProxyPassReverse /video-accessibility/openapi.json http://127.0.0.1:8012/openapi.json
|
||||
|
||||
# ── SPA static files ─────────────────────────────────────────────────────────
|
||||
Alias /video-accessibility /var/www/html/video-accessibility
|
||||
|
||||
<Directory /var/www/html/video-accessibility>
|
||||
# Basic options
|
||||
Options -Indexes +FollowSymLinks
|
||||
AllowOverride All
|
||||
AllowOverride None
|
||||
Require all granted
|
||||
|
||||
# React SPA routing - rewrite all requests to index.html
|
||||
# Allow video uploads up to 2 GB
|
||||
LimitRequestBody 2147483648
|
||||
|
||||
RewriteEngine On
|
||||
RewriteBase /video-accessibility
|
||||
RewriteBase /video-accessibility/
|
||||
|
||||
# Don't rewrite files or directories that exist
|
||||
RewriteCond %{REQUEST_FILENAME} !-f
|
||||
RewriteCond %{REQUEST_FILENAME} !-d
|
||||
# Serve real files/directories directly (JS, CSS, assets, fonts)
|
||||
RewriteCond %{REQUEST_FILENAME} -f [OR]
|
||||
RewriteCond %{REQUEST_FILENAME} -d
|
||||
RewriteRule ^ - [L]
|
||||
|
||||
# Rewrite everything else to index.html
|
||||
RewriteRule ^ /video-accessibility/index.html [L]
|
||||
# Everything else → index.html (React Router handles client-side nav)
|
||||
RewriteRule ^ index.html [L]
|
||||
|
||||
# Security headers
|
||||
Header always set X-Frame-Options "SAMEORIGIN"
|
||||
Header always set X-Content-Type-Options "nosniff"
|
||||
Header always set X-XSS-Protection "1; mode=block"
|
||||
Header always set Referrer-Policy "strict-origin-when-cross-origin"
|
||||
|
||||
# Cache control for static assets
|
||||
<FilesMatch "\.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$">
|
||||
# Cache-bust hashed assets indefinitely; never cache HTML
|
||||
<FilesMatch "\.(js|css|woff2?|ttf|eot|png|jpg|jpeg|gif|ico|svg)$">
|
||||
Header set Cache-Control "public, max-age=31536000, immutable"
|
||||
</FilesMatch>
|
||||
|
||||
# No cache for HTML files
|
||||
<FilesMatch "\.(html)$">
|
||||
<FilesMatch "\.html$">
|
||||
Header set Cache-Control "no-cache, no-store, must-revalidate"
|
||||
Header set Pragma "no-cache"
|
||||
Header set Expires "0"
|
||||
</FilesMatch>
|
||||
</Directory>
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Backend API - Reverse proxy to Docker container
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Proxy backend API to Docker container on port 8000
|
||||
<Location /video-accessibility-back>
|
||||
# Preserve original host header
|
||||
ProxyPreserveHost On
|
||||
|
||||
# Proxy HTTP requests
|
||||
ProxyPass http://localhost:8000
|
||||
ProxyPassReverse http://localhost:8000
|
||||
|
||||
# Proxy timeout settings (important for long-running video processing)
|
||||
ProxyTimeout 300
|
||||
|
||||
# WebSocket support (CRITICAL for real-time job updates)
|
||||
RewriteEngine On
|
||||
RewriteCond %{HTTP:Upgrade} =websocket [NC]
|
||||
RewriteRule /video-accessibility-back/(.*) ws://localhost:8000/$1 [P,L]
|
||||
RewriteCond %{HTTP:Upgrade} !=websocket [NC]
|
||||
RewriteRule /video-accessibility-back/(.*) http://localhost:8000/$1 [P,L]
|
||||
|
||||
# Security headers
|
||||
Header always set X-Frame-Options "SAMEORIGIN"
|
||||
Header always set X-Content-Type-Options "nosniff"
|
||||
|
||||
# CORS is handled by the backend, don't add headers here
|
||||
</Location>
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Required Apache Modules
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Enable these modules with:
|
||||
# sudo a2enmod rewrite
|
||||
# sudo a2enmod proxy
|
||||
# sudo a2enmod proxy_http
|
||||
# sudo a2enmod proxy_wstunnel
|
||||
# sudo a2enmod headers
|
||||
# sudo systemctl restart apache2
|
||||
|
||||
# Verify modules are enabled:
|
||||
# apache2ctl -M | grep -E '(rewrite|proxy|headers)'
|
||||
Header always set Referrer-Policy "strict-origin-when-cross-origin"
|
||||
</Directory>
|
||||
|
||||
# =============================================================================
|
||||
# Full VirtualHost Example
|
||||
# Full VirtualHost skeleton (reference — values match optical-web-1)
|
||||
# =============================================================================
|
||||
|
||||
# Example of complete VirtualHost configuration:
|
||||
#
|
||||
# <VirtualHost *:443>
|
||||
# ServerName ai-sandbox.oliver.solutions
|
||||
# ServerAdmin admin@oliver.solutions
|
||||
#
|
||||
# ServerName optical-dev.oliver.solutions
|
||||
# DocumentRoot /var/www/html
|
||||
#
|
||||
# # SSL Configuration (with wildcard cert)
|
||||
# SSLEngine on
|
||||
# SSLCertificateFile /path/to/wildcard-ai-sandbox.oliver.solutions.crt
|
||||
# SSLCertificateKeyFile /path/to/wildcard-ai-sandbox.oliver.solutions.key
|
||||
# SSLCertificateChainFile /path/to/chain.crt # If needed
|
||||
# SSLCertificateFile /path/to/wildcard.crt
|
||||
# SSLCertificateKeyFile /path/to/wildcard.key
|
||||
#
|
||||
# # SSL Protocol and Cipher settings
|
||||
# SSLProtocol all -SSLv2 -SSLv3 -TLSv1 -TLSv1.1
|
||||
# SSLProtocol all -SSLv2 -SSLv3 -TLSv1 -TLSv1.1
|
||||
# SSLCipherSuite HIGH:!aNULL:!MD5
|
||||
#
|
||||
# # Frontend configuration (from above)
|
||||
# Alias /video-accessibility /var/www/html/video-accessibility
|
||||
# <Directory /var/www/html/video-accessibility>
|
||||
# ...
|
||||
# </Directory>
|
||||
# # — paste the block above here —
|
||||
#
|
||||
# # Backend API configuration (from above)
|
||||
# <Location /video-accessibility-back>
|
||||
# ...
|
||||
# </Location>
|
||||
#
|
||||
# # Logging
|
||||
# ErrorLog ${APACHE_LOG_DIR}/ai-sandbox-error.log
|
||||
# CustomLog ${APACHE_LOG_DIR}/ai-sandbox-access.log combined
|
||||
# ErrorLog ${APACHE_LOG_DIR}/optical-dev-error.log
|
||||
# CustomLog ${APACHE_LOG_DIR}/optical-dev-access.log combined
|
||||
# </VirtualHost>
|
||||
|
||||
# =============================================================================
|
||||
# Testing & Verification
|
||||
# Verify
|
||||
# =============================================================================
|
||||
|
||||
# Test Apache configuration:
|
||||
# sudo apache2ctl configtest
|
||||
#
|
||||
# Restart Apache:
|
||||
# sudo systemctl restart apache2
|
||||
#
|
||||
# Test frontend:
|
||||
# curl -I https://ai-sandbox.oliver.solutions/video-accessibility
|
||||
#
|
||||
# Test backend:
|
||||
# curl https://ai-sandbox.oliver.solutions/video-accessibility-back/health
|
||||
#
|
||||
# Test WebSocket (requires wscat):
|
||||
# wscat -c wss://ai-sandbox.oliver.solutions/video-accessibility-back/api/v1/ws/job-list
|
||||
|
||||
# =============================================================================
|
||||
# Troubleshooting
|
||||
# =============================================================================
|
||||
|
||||
# Check Apache logs:
|
||||
# sudo tail -f /var/log/apache2/ai-sandbox-error.log
|
||||
# sudo tail -f /var/log/apache2/ai-sandbox-access.log
|
||||
#
|
||||
# Check if backend is running:
|
||||
# curl http://localhost:8000/health
|
||||
#
|
||||
# Check Docker containers:
|
||||
# cd /opt/accessible-video
|
||||
# docker-compose ps
|
||||
#
|
||||
# Common issues:
|
||||
# - 502 Bad Gateway: Backend container not running
|
||||
# - 404 Not Found: Frontend not deployed or Apache alias incorrect
|
||||
# - WebSocket fails: mod_proxy_wstunnel not enabled
|
||||
# - CORS errors: Check backend CORS configuration, not Apache
|
||||
# sudo apache2ctl configtest
|
||||
# sudo systemctl reload apache2
|
||||
# curl -I https://optical-dev.oliver.solutions/video-accessibility/
|
||||
# curl https://optical-dev.oliver.solutions/video-accessibility/api/v1/health
|
||||
# wscat -c wss://optical-dev.oliver.solutions/video-accessibility/api/v1/ws/job-list
|
||||
|
|
|
|||
|
|
@ -1,92 +0,0 @@
|
|||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# Poetry (keep poetry.lock for reproducible builds)
|
||||
# poetry.lock
|
||||
|
||||
# Virtual environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
|
||||
# Testing
|
||||
.coverage
|
||||
.pytest_cache/
|
||||
.mypy_cache/
|
||||
.tox/
|
||||
htmlcov/
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
|
||||
# Documentation
|
||||
docs/
|
||||
*.md
|
||||
README*
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
logs/
|
||||
|
||||
# Git
|
||||
.git/
|
||||
.gitignore
|
||||
|
||||
# Docker
|
||||
Dockerfile*
|
||||
.dockerignore
|
||||
docker-compose*
|
||||
|
||||
# CI/CD
|
||||
.github/
|
||||
|
||||
# Local development
|
||||
.env.local
|
||||
.env.development
|
||||
.env.test
|
||||
|
||||
# Temporary files
|
||||
tmp/
|
||||
temp/
|
||||
*.tmp
|
||||
*.bak
|
||||
|
|
@ -3,8 +3,8 @@
|
|||
# =============================================================================
|
||||
# Stage 1: Builder - Install dependencies
|
||||
# Stage 2: Base - Common runtime for API and Worker
|
||||
# Stage 3: API - FastAPI + Gunicorn (with ffmpeg for TTS audio conversion)
|
||||
# Stage 4: Worker - Celery worker (with ffmpeg for video processing)
|
||||
# Stage 3: API - FastAPI + Gunicorn (no ffmpeg — heavy tasks run on Cloud Run Jobs)
|
||||
# Stage 4: Worker - Celery worker, lightweight queues only (notify, embed)
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
|
@ -46,6 +46,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
libmagic1 \
|
||||
curl \
|
||||
tini \
|
||||
ffmpeg \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
|
||||
|
|
@ -72,21 +73,10 @@ USER app
|
|||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Stage 3: API - FastAPI + Gunicorn (Production API Server)
|
||||
# Heavy pipeline tasks (ingest/translate/render) run on Cloud Run Jobs
|
||||
# -----------------------------------------------------------------------------
|
||||
FROM base AS api
|
||||
|
||||
# Switch to root to install ffmpeg
|
||||
USER root
|
||||
|
||||
# Install ffmpeg for TTS audio conversion
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ffmpeg \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
|
||||
# Switch back to non-root user
|
||||
USER app
|
||||
|
||||
# Set production environment variables
|
||||
ENV APP_ENV=prod
|
||||
|
||||
|
|
@ -104,22 +94,10 @@ ENTRYPOINT ["tini", "--"]
|
|||
CMD ["gunicorn", "-c", "gunicorn_conf.py", "app.main:app"]
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Stage 4: Worker - Celery Worker (with ffmpeg for video processing)
|
||||
# Stage 4: Worker - Celery Worker (lightweight queues: notify, embed)
|
||||
# -----------------------------------------------------------------------------
|
||||
FROM base AS worker
|
||||
|
||||
# Switch back to root to install ffmpeg
|
||||
USER root
|
||||
|
||||
# Install ffmpeg for video processing
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ffmpeg \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
|
||||
# Switch back to non-root user
|
||||
USER app
|
||||
|
||||
# Set production environment variables
|
||||
# WORKER_CONCURRENCY can be overridden at runtime (default: 8)
|
||||
ENV APP_ENV=prod \
|
||||
|
|
@ -148,18 +126,6 @@ CMD celery -A celery_worker worker \
|
|||
# -----------------------------------------------------------------------------
|
||||
FROM base AS whisper-worker
|
||||
|
||||
# Switch back to root to install ffmpeg
|
||||
USER root
|
||||
|
||||
# Install ffmpeg for audio extraction
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ffmpeg \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
|
||||
# Switch back to non-root user
|
||||
USER app
|
||||
|
||||
# Pre-download Whisper medium model during build to avoid cold start delays
|
||||
# Model is cached in ~/.cache/huggingface/hub (~1.5GB)
|
||||
RUN python -c "from faster_whisper import WhisperModel; WhisperModel('medium', device='cpu', compute_type='int8')"
|
||||
|
|
|
|||
55
backend/Dockerfile.cloudrun
Normal file
55
backend/Dockerfile.cloudrun
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
# =============================================================================
|
||||
# Cloud Run Job image — va-worker
|
||||
#
|
||||
# Reuses the multi-stage base from Dockerfile.
|
||||
# Entrypoint: python -m app.tasks.runner --task <name> --job-id <id>
|
||||
#
|
||||
# Build:
|
||||
# docker build -f backend/Dockerfile.cloudrun -t va-worker backend/
|
||||
# =============================================================================
|
||||
|
||||
# ── Stage 1: Builder ─────────────────────────────────────────────────────────
|
||||
FROM python:3.11-slim AS builder
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip install --no-cache-dir poetry==1.8.3
|
||||
|
||||
WORKDIR /app
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
RUN poetry config virtualenvs.create false \
|
||||
&& poetry install --no-interaction --no-ansi --only main
|
||||
|
||||
# ── Stage 2: Runtime ─────────────────────────────────────────────────────────
|
||||
FROM python:3.11-slim AS runtime
|
||||
|
||||
# ffmpeg required for video rendering tasks
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ffmpeg \
|
||||
tini \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy installed packages from builder
|
||||
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
|
||||
COPY --from=builder /usr/local/bin /usr/local/bin
|
||||
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
|
||||
# Non-root user for security
|
||||
RUN groupadd -r worker && useradd -r -g worker worker \
|
||||
&& chown -R worker:worker /app
|
||||
USER worker
|
||||
|
||||
# Cloud Run Jobs: no persistent HTTP port needed.
|
||||
# Cloud Run passes CLOUD_RUN_TASK_INDEX and CLOUD_RUN_TASK_COUNT env vars.
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONPATH=/app
|
||||
|
||||
ENTRYPOINT ["tini", "--", "python", "-m", "app.tasks.runner"]
|
||||
# Args are injected per-execution via Cloud Run Job overrides:
|
||||
# --task ingest|translate|render|rerender --job-id <id> [--language <lang>] ...
|
||||
|
|
@ -1,127 +0,0 @@
|
|||
# Build stage - Install dependencies and build wheels
|
||||
FROM python:3.11-slim AS builder
|
||||
|
||||
# Install build dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Poetry
|
||||
RUN pip install poetry==1.8.2
|
||||
|
||||
# Set Poetry configuration
|
||||
ENV POETRY_NO_INTERACTION=1 \
|
||||
POETRY_VENV_IN_PROJECT=1 \
|
||||
POETRY_CACHE_DIR=/tmp/poetry_cache
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy dependency files
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
|
||||
# Install dependencies into venv
|
||||
RUN poetry config virtualenvs.in-project true && \
|
||||
poetry lock --no-update || true && \
|
||||
poetry install --only=main --no-root && \
|
||||
rm -rf $POETRY_CACHE_DIR
|
||||
|
||||
# Base runtime stage
|
||||
FROM python:3.11-slim AS base
|
||||
|
||||
# Install runtime system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
ffmpeg \
|
||||
curl \
|
||||
tini \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
|
||||
# Create non-root user
|
||||
RUN groupadd --gid 1000 app \
|
||||
&& useradd --uid 1000 --gid app --shell /bin/bash --create-home app
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy virtual environment from builder stage
|
||||
COPY --from=builder --chown=app:app /app/.venv /app/.venv
|
||||
|
||||
# Ensure venv is in PATH
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
|
||||
# Copy application code
|
||||
COPY --chown=app:app . .
|
||||
|
||||
# Switch to non-root user
|
||||
USER app
|
||||
|
||||
# Production API stage
|
||||
FROM base AS production
|
||||
|
||||
# Set environment variables for production
|
||||
ENV APP_ENV=prod \
|
||||
PYTHONPATH=/app \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Use tini as init system for proper signal handling
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
|
||||
# Default command for API server
|
||||
CMD ["gunicorn", "-c", "gunicorn_conf.py"]
|
||||
|
||||
# Worker stage for Celery workers
|
||||
FROM base AS worker
|
||||
|
||||
# Set environment variables for worker
|
||||
ENV APP_ENV=prod \
|
||||
PYTHONPATH=/app \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
C_FORCE_ROOT=1
|
||||
|
||||
# Health check for worker (check if Celery is responding)
|
||||
HEALTHCHECK --interval=60s --timeout=15s --start-period=10s --retries=3 \
|
||||
CMD python -c "from celery import Celery; app=Celery('app'); print('Worker healthy')" || exit 1
|
||||
|
||||
# Use tini as init system for proper signal handling
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
|
||||
# Default command for Celery worker
|
||||
CMD ["celery", "-A", "app.tasks", "worker", "--loglevel=info", "--concurrency=1"]
|
||||
|
||||
# Development stage with dev dependencies
|
||||
FROM builder AS development
|
||||
|
||||
# Install all dependencies including dev
|
||||
RUN poetry install --no-root && rm -rf $POETRY_CACHE_DIR
|
||||
|
||||
# Install additional dev tools
|
||||
RUN apt-get update && apt-get install -y \
|
||||
git \
|
||||
vim \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy application code
|
||||
COPY --chown=app:app . .
|
||||
|
||||
# Switch to non-root user
|
||||
USER app
|
||||
|
||||
# Set environment for development
|
||||
ENV APP_ENV=dev \
|
||||
PYTHONPATH=/app \
|
||||
PYTHONUNBUFFERED=1
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
# Development command with hot reload
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
|
||||
|
|
@ -1,26 +1,28 @@
|
|||
from datetime import datetime, timedelta
|
||||
from typing import Optional
|
||||
|
||||
from bson import ObjectId
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
|
||||
from ...core.authz import MembershipContext, get_membership_context
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import get_current_user, require_roles
|
||||
from ...core.logging import get_logger
|
||||
from ...core.security import get_password_hash, verify_password
|
||||
from ...models.user import User, UserRole
|
||||
from ...core.security import get_password_hash
|
||||
from ...models.audit_log import AuditAction, AuditLogQuery, AuditLogResponse
|
||||
from ...models.user import User, UserRole
|
||||
from ...schemas.auth import (
|
||||
AdminStatsResponse,
|
||||
ChangePasswordRequest,
|
||||
CreateUserRequest,
|
||||
ResetPasswordRequest,
|
||||
UpdateUserRequest,
|
||||
UserListResponse,
|
||||
UserResponse,
|
||||
)
|
||||
from ...services.audit_logger import audit_logger, log_user_management, log_security_event
|
||||
from ...services.audit_logger import (
|
||||
audit_logger,
|
||||
log_user_management,
|
||||
)
|
||||
from ...telemetry import app_metrics
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
|
@ -31,28 +33,48 @@ router = APIRouter(prefix="/admin", tags=["admin"])
|
|||
async def list_users(
|
||||
page: int = Query(1, ge=1),
|
||||
size: int = Query(20, ge=1, le=500),
|
||||
role: Optional[str] = Query(None),
|
||||
role: str | None = Query(None, description="Single role or comma-separated list, e.g. 'linguist,admin'"),
|
||||
active_only: bool = Query(True),
|
||||
org_id: str | None = Query(None, description="Filter by org (platform admin only)"),
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""List users with filtering and pagination (admin only)"""
|
||||
query = {}
|
||||
|
||||
query: dict = {}
|
||||
|
||||
if role:
|
||||
query["role"] = role
|
||||
|
||||
roles = [r.strip() for r in role.split(",") if r.strip()]
|
||||
query["role"] = {"$in": roles} if len(roles) > 1 else roles[0]
|
||||
|
||||
if active_only:
|
||||
query["is_active"] = True
|
||||
|
||||
|
||||
if not ctx.is_platform_admin:
|
||||
# Org-scoped admin: show only users in their org(s) via membership collection
|
||||
accessible_org_ids = ctx.accessible_org_ids()
|
||||
if not accessible_org_ids:
|
||||
return UserListResponse(users=[], total=0, page=page, size=size)
|
||||
member_ids_cursor = db.memberships.find(
|
||||
{"organization_id": {"$in": accessible_org_ids}},
|
||||
{"user_id": 1},
|
||||
)
|
||||
member_ids = [doc["user_id"] async for doc in member_ids_cursor]
|
||||
query["_id"] = {"$in": member_ids}
|
||||
elif org_id:
|
||||
# Platform admin filtered to a specific org
|
||||
member_ids_cursor = db.memberships.find({"organization_id": org_id}, {"user_id": 1})
|
||||
member_ids = [doc["user_id"] async for doc in member_ids_cursor]
|
||||
query["_id"] = {"$in": member_ids}
|
||||
|
||||
# Get total count
|
||||
total = await db.users.count_documents(query)
|
||||
|
||||
|
||||
# Get paginated results
|
||||
skip = (page - 1) * size
|
||||
cursor = db.users.find(query, {"hashed_password": 0}).sort("created_at", -1).skip(skip).limit(size)
|
||||
users = await cursor.to_list(length=size)
|
||||
|
||||
|
||||
user_responses = []
|
||||
for user_doc in users:
|
||||
user_responses.append(UserResponse(
|
||||
|
|
@ -64,8 +86,9 @@ async def list_users(
|
|||
is_active=user_doc["is_active"],
|
||||
created_at=user_doc.get("created_at", datetime.utcnow()).isoformat(),
|
||||
pm_client_ids=user_doc.get("pm_client_ids", []),
|
||||
languages=user_doc.get("languages", []),
|
||||
))
|
||||
|
||||
|
||||
return UserListResponse(
|
||||
users=user_responses,
|
||||
total=total,
|
||||
|
|
@ -74,6 +97,32 @@ async def list_users(
|
|||
)
|
||||
|
||||
|
||||
@router.get("/brief-assignees", response_model=list[UserResponse])
|
||||
async def list_brief_assignees(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Return users who can be assigned a brief (PM, production, admin). Accessible to all brief-creating roles."""
|
||||
docs = await db.users.find(
|
||||
{
|
||||
"role": {"$in": [UserRole.ADMIN.value, UserRole.PROJECT_MANAGER.value, UserRole.PRODUCTION.value]},
|
||||
"is_active": True,
|
||||
},
|
||||
{"hashed_password": 0},
|
||||
).sort("full_name", 1).to_list(None)
|
||||
return [UserResponse(
|
||||
id=str(d["_id"]),
|
||||
email=d["email"],
|
||||
full_name=d["full_name"],
|
||||
role=d["role"],
|
||||
auth_provider=d.get("auth_provider", "local"),
|
||||
is_active=d["is_active"],
|
||||
created_at=d.get("created_at", datetime.utcnow()).isoformat() if d.get("created_at") else None,
|
||||
pm_client_ids=d.get("pm_client_ids", []),
|
||||
languages=d.get("languages", []),
|
||||
) for d in docs]
|
||||
|
||||
|
||||
@router.get("/users/{user_id}", response_model=UserResponse)
|
||||
async def get_user(
|
||||
user_id: str,
|
||||
|
|
@ -87,7 +136,7 @@ async def get_user(
|
|||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="User not found"
|
||||
)
|
||||
|
||||
|
||||
return UserResponse(
|
||||
id=str(user_doc["_id"]),
|
||||
email=user_doc["email"],
|
||||
|
|
@ -97,6 +146,7 @@ async def get_user(
|
|||
is_active=user_doc["is_active"],
|
||||
created_at=user_doc.get("created_at", datetime.utcnow()).isoformat(),
|
||||
pm_client_ids=user_doc.get("pm_client_ids", []),
|
||||
languages=user_doc.get("languages", []),
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -115,7 +165,7 @@ async def create_user(
|
|||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="User with this email already exists"
|
||||
)
|
||||
|
||||
|
||||
# Create user document
|
||||
user_id = str(ObjectId())
|
||||
user_doc = {
|
||||
|
|
@ -129,12 +179,12 @@ async def create_user(
|
|||
"created_at": datetime.utcnow(),
|
||||
"updated_at": datetime.utcnow()
|
||||
}
|
||||
|
||||
|
||||
await db.users.insert_one(user_doc)
|
||||
|
||||
|
||||
# Record metrics
|
||||
app_metrics.record_auth_attempt("user_created", user_data.role.value)
|
||||
|
||||
|
||||
logger.info(f"Admin {current_user.id} created user {user_id} with role {user_data.role.value}")
|
||||
await log_user_management(
|
||||
AuditAction.USER_CREATE, user_id, current_user, request,
|
||||
|
|
@ -150,6 +200,7 @@ async def create_user(
|
|||
is_active=True,
|
||||
created_at=user_doc["created_at"].isoformat(),
|
||||
pm_client_ids=[],
|
||||
languages=[],
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -169,7 +220,7 @@ async def update_user(
|
|||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="User not found"
|
||||
)
|
||||
|
||||
|
||||
# Check if email is being changed and doesn't conflict
|
||||
if user_update.email and user_update.email != user_doc["email"]:
|
||||
existing_user = await db.users.find_one({"email": user_update.email, "_id": {"$ne": user_id}})
|
||||
|
|
@ -178,10 +229,10 @@ async def update_user(
|
|||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Email already in use by another user"
|
||||
)
|
||||
|
||||
|
||||
# Build update document
|
||||
update_data = {"updated_at": datetime.utcnow()}
|
||||
|
||||
|
||||
if user_update.email:
|
||||
update_data["email"] = user_update.email
|
||||
if user_update.full_name:
|
||||
|
|
@ -190,19 +241,19 @@ async def update_user(
|
|||
update_data["role"] = user_update.role.value
|
||||
if user_update.is_active is not None:
|
||||
update_data["is_active"] = user_update.is_active
|
||||
|
||||
|
||||
# Update user
|
||||
result = await db.users.find_one_and_update(
|
||||
{"_id": user_id},
|
||||
{"$set": update_data},
|
||||
return_document=True
|
||||
)
|
||||
|
||||
|
||||
logger.info(f"Admin {current_user.id} updated user {user_id}")
|
||||
action = AuditAction.USER_ROLE_CHANGE if user_update.role else AuditAction.USER_UPDATE
|
||||
await log_user_management(
|
||||
action, user_id, current_user, request,
|
||||
details={k: v for k, v in user_update.dict(exclude_none=True).items()},
|
||||
details=dict(user_update.dict(exclude_none=True).items()),
|
||||
)
|
||||
|
||||
return UserResponse(
|
||||
|
|
@ -214,6 +265,7 @@ async def update_user(
|
|||
is_active=result["is_active"],
|
||||
created_at=result.get("created_at", datetime.utcnow()).isoformat(),
|
||||
pm_client_ids=result.get("pm_client_ids", []),
|
||||
languages=result.get("languages", []),
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -230,7 +282,7 @@ async def deactivate_user(
|
|||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Cannot deactivate your own account"
|
||||
)
|
||||
|
||||
|
||||
result = await db.users.update_one(
|
||||
{"_id": user_id},
|
||||
{
|
||||
|
|
@ -240,13 +292,13 @@ async def deactivate_user(
|
|||
}
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
if result.matched_count == 0:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="User not found"
|
||||
)
|
||||
|
||||
|
||||
logger.info(f"Admin {current_user.id} deactivated user {user_id}")
|
||||
await log_user_management(AuditAction.USER_DEACTIVATE, user_id, current_user, request)
|
||||
|
||||
|
|
@ -264,10 +316,10 @@ async def admin_reset_password(
|
|||
# Generate temporary password
|
||||
import secrets
|
||||
import string
|
||||
|
||||
|
||||
temp_password = ''.join(secrets.choice(string.ascii_letters + string.digits) for _ in range(12))
|
||||
hashed_password = get_password_hash(temp_password)
|
||||
|
||||
|
||||
result = await db.users.update_one(
|
||||
{"_id": user_id},
|
||||
{
|
||||
|
|
@ -277,15 +329,15 @@ async def admin_reset_password(
|
|||
}
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
if result.matched_count == 0:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="User not found"
|
||||
)
|
||||
|
||||
|
||||
logger.info(f"Admin {current_user.id} reset password for user {user_id}")
|
||||
|
||||
|
||||
# In production, send email with temp password instead of returning it
|
||||
return {
|
||||
"message": "Password reset successfully",
|
||||
|
|
@ -301,23 +353,23 @@ async def get_admin_stats(
|
|||
"""Get system statistics (production/admin only)"""
|
||||
# Get user count
|
||||
total_users = await db.users.count_documents({"is_active": True})
|
||||
|
||||
|
||||
# Get job counts
|
||||
total_jobs = await db.jobs.count_documents({})
|
||||
|
||||
|
||||
# Get jobs by status
|
||||
pipeline = [
|
||||
{"$group": {"_id": "$status", "count": {"$sum": 1}}}
|
||||
]
|
||||
status_counts = await db.jobs.aggregate(pipeline).to_list(None)
|
||||
jobs_by_status = {item["_id"]: item["count"] for item in status_counts}
|
||||
|
||||
|
||||
# Get jobs created today
|
||||
today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
active_jobs_today = await db.jobs.count_documents({
|
||||
"created_at": {"$gte": today_start}
|
||||
})
|
||||
|
||||
|
||||
# Calculate average processing time for completed jobs
|
||||
avg_processing_pipeline = [
|
||||
{"$match": {"status": "completed", "created_at": {"$exists": True}, "updated_at": {"$exists": True}}},
|
||||
|
|
@ -338,10 +390,10 @@ async def get_admin_stats(
|
|||
}
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
avg_result = await db.jobs.aggregate(avg_processing_pipeline).to_list(None)
|
||||
avg_processing_time = avg_result[0]["avg_processing_time"] if avg_result else 0.0
|
||||
|
||||
|
||||
return AdminStatsResponse(
|
||||
total_users=total_users,
|
||||
total_jobs=total_jobs,
|
||||
|
|
@ -362,7 +414,7 @@ async def detailed_health_check(
|
|||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"components": {}
|
||||
}
|
||||
|
||||
|
||||
# Check MongoDB
|
||||
try:
|
||||
await db.command("ping")
|
||||
|
|
@ -370,7 +422,7 @@ async def detailed_health_check(
|
|||
except Exception as e:
|
||||
health_status["components"]["mongodb"] = {"status": "unhealthy", "error": str(e)}
|
||||
health_status["status"] = "degraded"
|
||||
|
||||
|
||||
# Check Redis (via import to avoid circular dependency)
|
||||
try:
|
||||
from ...core.redis import redis_client
|
||||
|
|
@ -382,23 +434,23 @@ async def detailed_health_check(
|
|||
except Exception as e:
|
||||
health_status["components"]["redis"] = {"status": "unhealthy", "error": str(e)}
|
||||
health_status["status"] = "degraded"
|
||||
|
||||
|
||||
# Check GCS (basic check)
|
||||
try:
|
||||
from ...services.gcs import gcs_service
|
||||
# Simple check to see if bucket is accessible
|
||||
bucket_exists = await gcs_service.file_exists("health_check_dummy") # This will return False but won't error if bucket accessible
|
||||
await gcs_service.file_exists("health_check_dummy") # This will return False but won't error if bucket accessible
|
||||
health_status["components"]["gcs"] = {"status": "healthy"}
|
||||
except Exception as e:
|
||||
health_status["components"]["gcs"] = {"status": "unhealthy", "error": str(e)}
|
||||
health_status["status"] = "degraded"
|
||||
|
||||
|
||||
# Check job queue health
|
||||
try:
|
||||
from ...tasks import celery_app
|
||||
inspect = celery_app.control.inspect()
|
||||
active_tasks = inspect.active()
|
||||
|
||||
|
||||
if active_tasks:
|
||||
total_active = sum(len(tasks) for tasks in active_tasks.values())
|
||||
health_status["components"]["celery"] = {
|
||||
|
|
@ -415,7 +467,7 @@ async def detailed_health_check(
|
|||
except Exception as e:
|
||||
health_status["components"]["celery"] = {"status": "unhealthy", "error": str(e)}
|
||||
health_status["status"] = "degraded"
|
||||
|
||||
|
||||
return health_status
|
||||
|
||||
|
||||
|
|
@ -427,18 +479,18 @@ async def get_job_statistics(
|
|||
):
|
||||
"""Get job processing statistics (reviewer/production/admin only)"""
|
||||
since_date = datetime.utcnow() - timedelta(days=days)
|
||||
|
||||
|
||||
# Jobs created in period
|
||||
jobs_in_period = await db.jobs.count_documents({
|
||||
"created_at": {"$gte": since_date}
|
||||
})
|
||||
|
||||
|
||||
# Jobs completed in period
|
||||
jobs_completed = await db.jobs.count_documents({
|
||||
"status": "completed",
|
||||
"updated_at": {"$gte": since_date}
|
||||
})
|
||||
|
||||
|
||||
# Average processing time for completed jobs
|
||||
avg_pipeline = [
|
||||
{
|
||||
|
|
@ -467,12 +519,12 @@ async def get_job_statistics(
|
|||
}
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
avg_result = await db.jobs.aggregate(avg_pipeline).to_list(None)
|
||||
processing_stats = avg_result[0] if avg_result else {
|
||||
"avg_time": 0, "min_time": 0, "max_time": 0
|
||||
}
|
||||
|
||||
|
||||
# Current queue status
|
||||
current_queue_stats = {}
|
||||
pipeline = [
|
||||
|
|
@ -481,7 +533,7 @@ async def get_job_statistics(
|
|||
status_counts = await db.jobs.aggregate(pipeline).to_list(None)
|
||||
for item in status_counts:
|
||||
current_queue_stats[item["_id"]] = item["count"]
|
||||
|
||||
|
||||
return {
|
||||
"period_days": days,
|
||||
"jobs_created": jobs_in_period,
|
||||
|
|
@ -506,7 +558,7 @@ async def admin_force_password_reset(
|
|||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Cannot reset your own password this way"
|
||||
)
|
||||
|
||||
|
||||
# Check if user exists
|
||||
user_doc = await db.users.find_one({"_id": user_id})
|
||||
if not user_doc:
|
||||
|
|
@ -514,15 +566,15 @@ async def admin_force_password_reset(
|
|||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="User not found"
|
||||
)
|
||||
|
||||
|
||||
# Generate secure temporary password
|
||||
import secrets
|
||||
import string
|
||||
|
||||
|
||||
temp_password = ''.join(secrets.choice(
|
||||
string.ascii_letters + string.digits + "!@#$%"
|
||||
) for _ in range(16))
|
||||
|
||||
|
||||
# Update password
|
||||
await db.users.update_one(
|
||||
{"_id": user_id},
|
||||
|
|
@ -533,10 +585,10 @@ async def admin_force_password_reset(
|
|||
}
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# TODO: In production, send via secure email instead of returning password
|
||||
logger.info(f"Admin {current_user.id} reset password for user {user_id}")
|
||||
|
||||
|
||||
return {
|
||||
"message": "Password reset successfully",
|
||||
"temporary_password": temp_password,
|
||||
|
|
@ -559,7 +611,7 @@ async def reprocess_job(
|
|||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Job not found"
|
||||
)
|
||||
|
||||
|
||||
# Reset job to created status for reprocessing
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
|
|
@ -579,7 +631,7 @@ async def reprocess_job(
|
|||
}
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# Broadcast status update
|
||||
try:
|
||||
from ...services.websocket import connection_manager
|
||||
|
|
@ -591,32 +643,32 @@ async def reprocess_job(
|
|||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to broadcast status update for job reset {job_id}: {e}")
|
||||
|
||||
|
||||
# Trigger ingestion task
|
||||
from ...tasks.ingest_and_ai import ingest_and_ai_task
|
||||
ingest_and_ai_task.delay(job_id)
|
||||
|
||||
|
||||
logger.warning(f"Admin {current_user.id} triggered reprocessing for job {job_id}")
|
||||
|
||||
|
||||
return {"message": f"Job {job_id} queued for reprocessing"}
|
||||
|
||||
|
||||
@router.get("/audit-logs", response_model=AuditLogResponse)
|
||||
async def get_audit_logs_detailed(
|
||||
# Time range
|
||||
start_date: Optional[datetime] = Query(None, description="Start date for audit logs"),
|
||||
end_date: Optional[datetime] = Query(None, description="End date for audit logs"),
|
||||
start_date: datetime | None = Query(None, description="Start date for audit logs"),
|
||||
end_date: datetime | None = Query(None, description="End date for audit logs"),
|
||||
|
||||
# Filters
|
||||
action: Optional[str] = Query(None, description="Filter by action type"),
|
||||
severity: Optional[str] = Query(None, description="Filter by severity level"),
|
||||
user_email: Optional[str] = Query(None, description="Filter by user email"),
|
||||
resource_type: Optional[str] = Query(None, description="Filter by resource type"),
|
||||
resource_id: Optional[str] = Query(None, description="Filter by resource ID"),
|
||||
success: Optional[bool] = Query(None, description="Filter by success status"),
|
||||
action: str | None = Query(None, description="Filter by action type"),
|
||||
severity: str | None = Query(None, description="Filter by severity level"),
|
||||
user_email: str | None = Query(None, description="Filter by user email"),
|
||||
resource_type: str | None = Query(None, description="Filter by resource type"),
|
||||
resource_id: str | None = Query(None, description="Filter by resource ID"),
|
||||
success: bool | None = Query(None, description="Filter by success status"),
|
||||
|
||||
# Search
|
||||
search: Optional[str] = Query(None, description="Search in description and details"),
|
||||
search: str | None = Query(None, description="Search in description and details"),
|
||||
|
||||
# Pagination (skip/limit to match frontend AuditLogQuery)
|
||||
skip: int = Query(0, ge=0, description="Number of records to skip"),
|
||||
|
|
@ -647,7 +699,7 @@ async def get_audit_logs_detailed(
|
|||
sort_by=sort_by,
|
||||
sort_order=sort_order
|
||||
)
|
||||
|
||||
|
||||
return await audit_logger.query_logs(query)
|
||||
|
||||
|
||||
|
|
@ -693,7 +745,7 @@ async def get_security_events(
|
|||
request: Request = None,
|
||||
):
|
||||
"""Get recent security events (production/admin only)"""
|
||||
|
||||
|
||||
# Log access to security events
|
||||
await audit_logger.log_action(
|
||||
action="admin.audit.access",
|
||||
|
|
@ -702,7 +754,7 @@ async def get_security_events(
|
|||
request=request,
|
||||
details={"hours_requested": hours}
|
||||
)
|
||||
|
||||
|
||||
logs = await audit_logger.get_security_events(hours)
|
||||
return logs
|
||||
|
||||
|
|
@ -714,7 +766,7 @@ async def cleanup_audit_logs(
|
|||
request: Request = None,
|
||||
):
|
||||
"""Clean up old audit logs (admin only)"""
|
||||
|
||||
|
||||
# Log audit cleanup action
|
||||
await audit_logger.log_action(
|
||||
action="admin.system.action",
|
||||
|
|
@ -724,9 +776,9 @@ async def cleanup_audit_logs(
|
|||
details={"retention_days": retention_days},
|
||||
severity="warning"
|
||||
)
|
||||
|
||||
|
||||
deleted_count = await audit_logger.cleanup_old_logs(retention_days)
|
||||
|
||||
|
||||
# Log cleanup completion
|
||||
await audit_logger.log_action(
|
||||
action="admin.system.action",
|
||||
|
|
@ -738,9 +790,9 @@ async def cleanup_audit_logs(
|
|||
"deleted_count": deleted_count
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
return {
|
||||
"message": f"Deleted {deleted_count} audit logs older than {retention_days} days",
|
||||
"deleted_count": deleted_count,
|
||||
"retention_days": retention_days
|
||||
}
|
||||
}
|
||||
|
|
|
|||
295
backend/app/api/v1/routes_admin_production.py
Normal file
295
backend/app/api/v1/routes_admin_production.py
Normal file
|
|
@ -0,0 +1,295 @@
|
|||
"""Admin production endpoints: failure dashboard, bulk retry, queue stats, VTT override."""
|
||||
from datetime import datetime
|
||||
|
||||
import redis.asyncio as aioredis
|
||||
from fastapi import (
|
||||
APIRouter,
|
||||
Depends,
|
||||
File,
|
||||
Form,
|
||||
HTTPException,
|
||||
Query,
|
||||
UploadFile,
|
||||
status,
|
||||
)
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import require_roles
|
||||
from ...core.logging import get_logger
|
||||
from ...core.redis import get_redis
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.job import JobStatus, RequestedOutputs
|
||||
from ...models.user import User, UserRole
|
||||
from ...schemas.job import JobResponse
|
||||
from ...services.audit_logger import audit_logger
|
||||
from ...services.cloud_run_dispatch import dispatch as _cr_dispatch
|
||||
from ...services.gcs import upload_vtt_to_gcs
|
||||
|
||||
logger = get_logger(__name__)
|
||||
router = APIRouter(prefix="/admin/production", tags=["admin-production"])
|
||||
|
||||
_FAILURE_STATUSES = [
|
||||
JobStatus.PROCESSING_FAILED.value,
|
||||
JobStatus.TTS_FAILED.value,
|
||||
JobStatus.RENDER_FAILED.value,
|
||||
]
|
||||
|
||||
_RETRY_CAP = 50
|
||||
|
||||
|
||||
class BulkRetryRequest(BaseModel):
|
||||
job_ids: list[str]
|
||||
strategy: str = "auto" # "auto" | "from_scratch"
|
||||
|
||||
|
||||
class BulkRetryResponse(BaseModel):
|
||||
retried: list[str]
|
||||
skipped: list[str]
|
||||
errors: list[dict]
|
||||
|
||||
|
||||
@router.get("/failures", response_model=list[JobResponse])
|
||||
async def list_failures(
|
||||
step: str | None = Query(None, description="Filter by failure.step"),
|
||||
org_id: str | None = Query(None, description="Filter by organization_id"),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
skip: int = Query(0, ge=0),
|
||||
current_user: User = Depends(require_roles(UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""List all jobs in a failed status, optionally filtered by step and org."""
|
||||
query: dict = {"status": {"$in": _FAILURE_STATUSES}}
|
||||
if step:
|
||||
query["failure.step"] = step
|
||||
if org_id:
|
||||
query["organization_id"] = org_id
|
||||
|
||||
cursor = db.jobs.find(query).sort("updated_at", -1).skip(skip).limit(limit)
|
||||
jobs = await cursor.to_list(length=limit)
|
||||
|
||||
return [
|
||||
JobResponse(
|
||||
id=str(j["_id"]),
|
||||
title=j["title"],
|
||||
status=j["status"],
|
||||
source=j["source"],
|
||||
requested_outputs=RequestedOutputs(**j["requested_outputs"]),
|
||||
review=j.get("review", {"notes": "", "history": []}),
|
||||
outputs=j.get("outputs"),
|
||||
created_at=j["created_at"].isoformat(),
|
||||
updated_at=j["updated_at"].isoformat(),
|
||||
)
|
||||
for j in jobs
|
||||
]
|
||||
|
||||
|
||||
@router.post("/bulk-retry", response_model=BulkRetryResponse)
|
||||
async def bulk_retry(
|
||||
payload: BulkRetryRequest,
|
||||
current_user: User = Depends(require_roles(UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Retry up to 50 failed jobs in one call."""
|
||||
if len(payload.job_ids) > _RETRY_CAP:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Cannot retry more than {_RETRY_CAP} jobs at once",
|
||||
)
|
||||
|
||||
retried: list[str] = []
|
||||
skipped: list[str] = []
|
||||
errors: list[dict] = []
|
||||
now = datetime.utcnow()
|
||||
|
||||
for job_id in payload.job_ids:
|
||||
try:
|
||||
job_doc = await db.jobs.find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
skipped.append(job_id)
|
||||
continue
|
||||
if job_doc["status"] not in _FAILURE_STATUSES:
|
||||
skipped.append(job_id)
|
||||
continue
|
||||
|
||||
failure = job_doc.get("failure") or {}
|
||||
if payload.strategy == "from_scratch":
|
||||
step = "ingestion"
|
||||
else:
|
||||
step = failure.get("step")
|
||||
if not step:
|
||||
step = "tts" if job_doc["status"] == JobStatus.TTS_FAILED.value else "render"
|
||||
|
||||
if step in ("ingestion", "ai_processing"):
|
||||
reset_status = JobStatus.CREATED.value
|
||||
elif step == "translation":
|
||||
reset_status = JobStatus.AI_PROCESSING.value
|
||||
elif step == "tts":
|
||||
src = job_doc["source"].get("language", "en")
|
||||
reset_status = (
|
||||
JobStatus.APPROVED_ENGLISH.value if src == "en" else JobStatus.APPROVED_SOURCE.value
|
||||
)
|
||||
elif step == "render":
|
||||
reset_status = JobStatus.PENDING_QC.value
|
||||
else:
|
||||
skipped.append(job_id)
|
||||
continue
|
||||
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": {"status": reset_status, "error": None, "updated_at": now},
|
||||
"$inc": {"retry_count": 1},
|
||||
"$push": {
|
||||
"review.history": {
|
||||
"at": now,
|
||||
"status": f"bulk_retry_{step}",
|
||||
"by": str(current_user.id),
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
if step in ("ingestion", "ai_processing"):
|
||||
await _cr_dispatch("ingest", job_id)
|
||||
elif step in ("translation", "tts"):
|
||||
await _cr_dispatch("translate", job_id)
|
||||
elif step == "render":
|
||||
lang = job_doc.get("last_render_language", "en")
|
||||
await _cr_dispatch("rerender", job_id, language=lang)
|
||||
|
||||
retried.append(job_id)
|
||||
except Exception as e:
|
||||
logger.error(f"bulk-retry failed for job {job_id}: {e}")
|
||||
errors.append({"job_id": job_id, "error": str(e)})
|
||||
|
||||
try:
|
||||
await audit_logger.log(
|
||||
action=AuditAction.JOB_BULK_RETRY,
|
||||
user_id=str(current_user.id),
|
||||
user_email=current_user.email,
|
||||
user_role=current_user.role.value if current_user.role else None,
|
||||
resource_type="job",
|
||||
description=f"Bulk retry {len(retried)} jobs (strategy={payload.strategy})",
|
||||
details={"retried": retried, "skipped": skipped, "error_count": len(errors)},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to write bulk-retry audit log: {e}")
|
||||
|
||||
return BulkRetryResponse(retried=retried, skipped=skipped, errors=errors)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PR-7: Queue depth stats
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CELERY_QUEUES = ["default", "ingest", "tts", "render", "ffmpeg", "whisper", "notify", "embed"]
|
||||
|
||||
|
||||
class QueueStats(BaseModel):
|
||||
queues: dict[str, int] # queue_name → pending task count
|
||||
total_pending: int
|
||||
|
||||
|
||||
@router.get("/queue-stats", response_model=QueueStats)
|
||||
async def get_queue_stats(
|
||||
current_user: User = Depends(require_roles(UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
redis: aioredis.Redis = Depends(get_redis),
|
||||
):
|
||||
"""Return pending task counts per Celery queue (via Redis LLEN)."""
|
||||
counts: dict[str, int] = {}
|
||||
for q in _CELERY_QUEUES:
|
||||
try:
|
||||
n = await redis.llen(q)
|
||||
counts[q] = n
|
||||
except Exception:
|
||||
counts[q] = 0
|
||||
return QueueStats(queues=counts, total_pending=sum(counts.values()))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PR-8: Upload final VTT override — bypass AI, jump to PENDING_QC
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_BYPASSABLE_STATUSES = {
|
||||
JobStatus.CREATED.value,
|
||||
JobStatus.INGESTING.value,
|
||||
JobStatus.AI_PROCESSING.value,
|
||||
JobStatus.PROCESSING_FAILED.value,
|
||||
JobStatus.TTS_FAILED.value,
|
||||
JobStatus.RENDER_FAILED.value,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/jobs/{job_id}/upload-final-vtt")
|
||||
async def upload_final_vtt(
|
||||
job_id: str,
|
||||
language: str = Form(..., description="BCP-47 language code, e.g. 'en' or 'fr'"),
|
||||
vtt_file: UploadFile = File(..., description="WebVTT (.vtt) file"),
|
||||
vtt_type: str = Form("captions", description="'captions' or 'ad'"),
|
||||
current_user: User = Depends(require_roles(UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Upload a hand-crafted VTT to override AI output and advance job to PENDING_QC."""
|
||||
job_doc = await db.jobs.find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
if job_doc["status"] not in _BYPASSABLE_STATUSES:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
detail=f"Cannot override VTT when job is in status '{job_doc['status']}'. "
|
||||
f"Only allowed in: {sorted(_BYPASSABLE_STATUSES)}",
|
||||
)
|
||||
|
||||
if not vtt_file.filename or not vtt_file.filename.endswith(".vtt"):
|
||||
raise HTTPException(status_code=400, detail="File must be a .vtt file")
|
||||
|
||||
vtt_content = (await vtt_file.read()).decode("utf-8")
|
||||
if not vtt_content.strip().startswith("WEBVTT"):
|
||||
raise HTTPException(status_code=400, detail="File does not start with WEBVTT header")
|
||||
|
||||
if vtt_type not in ("captions", "ad"):
|
||||
raise HTTPException(status_code=400, detail="vtt_type must be 'captions' or 'ad'")
|
||||
|
||||
lang_key = language.replace("-", "_")
|
||||
field = "captions_vtt_gcs" if vtt_type == "captions" else "ad_vtt_gcs"
|
||||
gcs_path = f"{job_id}/{lang_key}/{vtt_type}.vtt"
|
||||
|
||||
gcs_uri = await upload_vtt_to_gcs(vtt_content, gcs_path)
|
||||
|
||||
now = datetime.utcnow()
|
||||
await db.jobs.update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": {
|
||||
f"outputs.{lang_key}.{field}": gcs_uri,
|
||||
"status": JobStatus.PENDING_QC.value,
|
||||
"updated_at": now,
|
||||
},
|
||||
"$push": {
|
||||
"review.history": {
|
||||
"at": now,
|
||||
"status": "manual_vtt_upload",
|
||||
"by": str(current_user.id),
|
||||
"note": f"Manual {vtt_type} VTT upload for {language} by {current_user.email}",
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
await audit_logger.log(
|
||||
action=AuditAction.VTT_EDIT,
|
||||
user_id=str(current_user.id),
|
||||
user_email=current_user.email,
|
||||
user_role=current_user.role.value if current_user.role else None,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
description=f"Manual {vtt_type} VTT upload for {language} — job advanced to PENDING_QC",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to write upload-final-vtt audit log: {e}")
|
||||
|
||||
return {"status": "ok", "gcs_uri": gcs_uri, "job_status": JobStatus.PENDING_QC.value}
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
import re
|
||||
import secrets
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
|
||||
from fastapi.security import HTTPBearer
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
|
|
@ -14,7 +16,7 @@ from ...core.security import (
|
|||
verify_password,
|
||||
)
|
||||
from ...models.audit_log import AuditAction, AuditLogSeverity
|
||||
from ...models.user import User, AuthProvider, UserRole
|
||||
from ...models.user import AuthProvider, User, UserRole
|
||||
from ...schemas.auth import (
|
||||
LoginRequest,
|
||||
LoginResponse,
|
||||
|
|
@ -23,11 +25,11 @@ from ...schemas.auth import (
|
|||
MicrosoftLoginResponse,
|
||||
RefreshResponse,
|
||||
)
|
||||
from ...services.audit_logger import log_auth_success, log_auth_failure, audit_logger
|
||||
from ...services.audit_logger import audit_logger, log_auth_failure, log_auth_success
|
||||
from ...services.microsoft_auth import (
|
||||
get_microsoft_auth_service,
|
||||
MicrosoftAuthError,
|
||||
MicrosoftTokenValidationError,
|
||||
get_microsoft_auth_service,
|
||||
)
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
|
@ -35,6 +37,40 @@ router = APIRouter(prefix="/auth", tags=["auth"])
|
|||
security = HTTPBearer()
|
||||
|
||||
|
||||
async def _get_user_org_ids(user_id: str, db: AsyncIOMotorDatabase) -> list[str]:
|
||||
"""Return list of org IDs the user belongs to — used as a JWT hint only."""
|
||||
cursor = db.memberships.find({"user_id": user_id}, {"organization_id": 1})
|
||||
memberships = await cursor.to_list(length=200)
|
||||
return [str(m["organization_id"]) for m in memberships if m.get("organization_id")]
|
||||
|
||||
|
||||
def _set_auth_cookies(response: Response, refresh_token: str) -> str:
|
||||
"""Set httponly refresh_token cookie and readable csrf_token cookie. Returns the csrf token."""
|
||||
csrf_token = secrets.token_hex(32)
|
||||
ttl = settings.jwt_refresh_ttl_days * 24 * 60 * 60
|
||||
domain = settings.cookie_domain if settings.app_env == "prod" else None
|
||||
|
||||
response.set_cookie(
|
||||
key="refresh_token",
|
||||
value=refresh_token,
|
||||
httponly=True,
|
||||
secure=settings.cookie_secure,
|
||||
samesite=settings.cookie_samesite,
|
||||
domain=domain,
|
||||
max_age=ttl,
|
||||
)
|
||||
response.set_cookie(
|
||||
key="csrf_token",
|
||||
value=csrf_token,
|
||||
httponly=False, # JS-readable for Double Submit Cookie pattern
|
||||
secure=settings.cookie_secure,
|
||||
samesite=settings.cookie_samesite,
|
||||
domain=domain,
|
||||
max_age=ttl,
|
||||
)
|
||||
return csrf_token
|
||||
|
||||
|
||||
@router.post("/login", response_model=LoginResponse)
|
||||
async def login(
|
||||
login_data: LoginRequest,
|
||||
|
|
@ -73,18 +109,11 @@ async def login(
|
|||
detail="User account is disabled",
|
||||
)
|
||||
|
||||
access_token = create_access_token(subject=str(user.id))
|
||||
org_ids = await _get_user_org_ids(str(user.id), db)
|
||||
access_token = create_access_token(subject=str(user.id), org_ids=org_ids)
|
||||
refresh_token = create_refresh_token(subject=str(user.id))
|
||||
|
||||
response.set_cookie(
|
||||
key="refresh_token",
|
||||
value=refresh_token,
|
||||
httponly=True,
|
||||
secure=settings.cookie_secure,
|
||||
samesite=settings.cookie_samesite,
|
||||
domain=settings.cookie_domain if settings.app_env == "prod" else None,
|
||||
max_age=settings.jwt_refresh_ttl_days * 24 * 60 * 60,
|
||||
)
|
||||
_set_auth_cookies(response, refresh_token)
|
||||
|
||||
await log_auth_success(user, request)
|
||||
return LoginResponse(
|
||||
|
|
@ -114,13 +143,13 @@ async def microsoft_login(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=f"Microsoft authentication failed: {str(e)}",
|
||||
)
|
||||
) from None
|
||||
except MicrosoftAuthError as e:
|
||||
await log_auth_failure("microsoft-sso", request, f"MS auth service error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Microsoft authentication service error",
|
||||
)
|
||||
) from None
|
||||
|
||||
# Look up by Microsoft-derived ID first — handles email casing changes across logins
|
||||
ms_user_id = f"ms-{user_info.sub[:20]}"
|
||||
|
|
@ -162,18 +191,11 @@ async def microsoft_login(
|
|||
detail="User account is disabled",
|
||||
)
|
||||
|
||||
access_token = create_access_token(subject=str(user.id))
|
||||
org_ids = await _get_user_org_ids(str(user.id), db)
|
||||
access_token = create_access_token(subject=str(user.id), org_ids=org_ids)
|
||||
refresh_token = create_refresh_token(subject=str(user.id))
|
||||
|
||||
response.set_cookie(
|
||||
key="refresh_token",
|
||||
value=refresh_token,
|
||||
httponly=True,
|
||||
secure=settings.cookie_secure,
|
||||
samesite=settings.cookie_samesite,
|
||||
domain=settings.cookie_domain if settings.app_env == "prod" else None,
|
||||
max_age=settings.jwt_refresh_ttl_days * 24 * 60 * 60,
|
||||
)
|
||||
_set_auth_cookies(response, refresh_token)
|
||||
|
||||
await log_auth_success(user, request)
|
||||
return MicrosoftLoginResponse(
|
||||
|
|
@ -200,6 +222,15 @@ async def refresh_token(
|
|||
detail="Refresh token not found",
|
||||
)
|
||||
|
||||
# CSRF protection: Double Submit Cookie pattern
|
||||
csrf_cookie = request.cookies.get("csrf_token")
|
||||
csrf_header = request.headers.get("X-CSRF-Token")
|
||||
if csrf_cookie and (not csrf_header or csrf_header != csrf_cookie):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="CSRF token mismatch",
|
||||
)
|
||||
|
||||
try:
|
||||
payload = decode_token(refresh_token)
|
||||
|
||||
|
|
@ -231,20 +262,13 @@ async def refresh_token(
|
|||
detail="User account is disabled",
|
||||
)
|
||||
|
||||
# Create new tokens
|
||||
new_access_token = create_access_token(subject=user_id)
|
||||
# Create new tokens (include org_ids claim for prefilter hint)
|
||||
_org_ids = await _get_user_org_ids(user_id, db)
|
||||
new_access_token = create_access_token(subject=user_id, org_ids=_org_ids)
|
||||
new_refresh_token = create_refresh_token(subject=user_id)
|
||||
|
||||
# Update refresh token cookie
|
||||
response.set_cookie(
|
||||
key="refresh_token",
|
||||
value=new_refresh_token,
|
||||
httponly=True,
|
||||
secure=settings.cookie_secure,
|
||||
samesite=settings.cookie_samesite,
|
||||
domain=settings.cookie_domain if settings.app_env == "prod" else None,
|
||||
max_age=settings.jwt_refresh_ttl_days * 24 * 60 * 60,
|
||||
)
|
||||
# Rotate both refresh and CSRF cookies
|
||||
_set_auth_cookies(response, new_refresh_token)
|
||||
|
||||
logger.info("Token refresh successful for user %s", user_id)
|
||||
return RefreshResponse(
|
||||
|
|
@ -263,7 +287,7 @@ async def refresh_token(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid refresh token",
|
||||
)
|
||||
) from None
|
||||
|
||||
|
||||
@router.post("/logout", response_model=LogoutResponse)
|
||||
|
|
|
|||
245
backend/app/api/v1/routes_briefs.py
Normal file
245
backend/app/api/v1/routes_briefs.py
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
"""Job Brief CRUD endpoints."""
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, status
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
|
||||
from ...core.authz import MembershipContext, assert_user_in_org, get_membership_context
|
||||
from ...core.database import get_database
|
||||
from ...core.logging import get_logger
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.job_brief import (
|
||||
BriefStatus,
|
||||
JobBriefCreate,
|
||||
JobBriefResponse,
|
||||
JobBriefUpdate,
|
||||
)
|
||||
from ...models.organization import OrgRole
|
||||
from ...services.audit_logger import audit_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
router = APIRouter(prefix="/briefs", tags=["briefs"])
|
||||
|
||||
|
||||
def _doc_to_response(doc: dict) -> JobBriefResponse:
|
||||
return JobBriefResponse(
|
||||
id=str(doc["_id"]),
|
||||
organization_id=doc["organization_id"],
|
||||
project_id=doc.get("project_id"),
|
||||
title=doc["title"],
|
||||
description=doc.get("description"),
|
||||
requested_outputs=doc["requested_outputs"],
|
||||
languages=doc.get("languages", []),
|
||||
deadline=doc.get("deadline"),
|
||||
status=doc["status"],
|
||||
created_by=doc["created_by"],
|
||||
assignee_id=doc.get("assignee_id"),
|
||||
job_id=doc.get("job_id"),
|
||||
created_at=doc["created_at"].isoformat(),
|
||||
updated_at=doc["updated_at"].isoformat(),
|
||||
submitted_at=doc["submitted_at"].isoformat() if doc.get("submitted_at") else None,
|
||||
approved_by=doc.get("approved_by"),
|
||||
)
|
||||
|
||||
|
||||
@router.get("", response_model=list[JobBriefResponse])
|
||||
async def list_briefs(
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
org_ids = [m.organization_id for m in ctx.memberships] if hasattr(ctx, "memberships") else []
|
||||
if ctx.is_platform_admin:
|
||||
query: dict = {}
|
||||
elif org_ids:
|
||||
query = {"organization_id": {"$in": org_ids}}
|
||||
else:
|
||||
raise HTTPException(status_code=403, detail="No org memberships")
|
||||
|
||||
cursor = db.job_briefs.find(query).sort("created_at", -1).limit(100)
|
||||
docs = await cursor.to_list(length=100)
|
||||
return [_doc_to_response(d) for d in docs]
|
||||
|
||||
|
||||
@router.post("", response_model=JobBriefResponse, status_code=status.HTTP_201_CREATED)
|
||||
async def create_brief(
|
||||
payload: JobBriefCreate,
|
||||
http_request: Request,
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
# Resolve org from project if not directly identifiable
|
||||
org_id: str | None = None
|
||||
if payload.project_id:
|
||||
project = await db.projects.find_one({"_id": payload.project_id}, {"client_id": 1})
|
||||
if project:
|
||||
org_id = project.get("client_id")
|
||||
if not org_id:
|
||||
# Use first membership org if user has only one (or admin)
|
||||
if ctx.is_platform_admin:
|
||||
raise HTTPException(status_code=400, detail="Admin must supply project_id or org_id cannot be inferred")
|
||||
memberships = [m for m in (ctx.memberships if hasattr(ctx, "memberships") else [])
|
||||
if ctx.can_access_org(m.organization_id, OrgRole.MANAGER)]
|
||||
if len(memberships) == 1:
|
||||
org_id = memberships[0].organization_id
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="Cannot infer organization; supply project_id")
|
||||
|
||||
assert_user_in_org(ctx, org_id, OrgRole.MANAGER)
|
||||
|
||||
now = datetime.utcnow()
|
||||
doc = {
|
||||
"_id": f"brief_{now.strftime('%Y%m%d%H%M%S%f')}_{str(ctx.user.id)[-6:]}",
|
||||
"organization_id": org_id,
|
||||
"project_id": payload.project_id,
|
||||
"title": payload.title,
|
||||
"description": payload.description,
|
||||
"requested_outputs": payload.requested_outputs.model_dump(),
|
||||
"languages": payload.languages,
|
||||
"deadline": payload.deadline,
|
||||
"assignee_id": payload.assignee_id,
|
||||
"status": BriefStatus.DRAFT.value,
|
||||
"created_by": str(ctx.user.id),
|
||||
"job_id": None,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"submitted_at": None,
|
||||
"approved_by": None,
|
||||
}
|
||||
await db.job_briefs.insert_one(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.BRIEF_CREATE,
|
||||
description=f"Brief '{payload.title}' created",
|
||||
user=ctx.user,
|
||||
request=http_request,
|
||||
resource_type="brief",
|
||||
resource_id=str(doc["_id"]),
|
||||
details={"title": payload.title, "organization_id": org_id},
|
||||
)
|
||||
return _doc_to_response(doc)
|
||||
|
||||
|
||||
@router.get("/{brief_id}", response_model=JobBriefResponse)
|
||||
async def get_brief(
|
||||
brief_id: str,
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
doc = await db.job_briefs.find_one({"_id": brief_id})
|
||||
if not doc:
|
||||
raise HTTPException(status_code=404, detail="Brief not found")
|
||||
assert_user_in_org(ctx, doc["organization_id"], OrgRole.VIEWER)
|
||||
return _doc_to_response(doc)
|
||||
|
||||
|
||||
@router.patch("/{brief_id}", response_model=JobBriefResponse)
|
||||
async def update_brief(
|
||||
brief_id: str,
|
||||
payload: JobBriefUpdate,
|
||||
http_request: Request,
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
doc = await db.job_briefs.find_one({"_id": brief_id})
|
||||
if not doc:
|
||||
raise HTTPException(status_code=404, detail="Brief not found")
|
||||
assert_user_in_org(ctx, doc["organization_id"], OrgRole.MANAGER)
|
||||
if doc["status"] != BriefStatus.DRAFT.value:
|
||||
raise HTTPException(status_code=400, detail="Only DRAFT briefs can be updated")
|
||||
|
||||
updates: dict = {"updated_at": datetime.utcnow()}
|
||||
if payload.title is not None:
|
||||
updates["title"] = payload.title
|
||||
if payload.description is not None:
|
||||
updates["description"] = payload.description
|
||||
if payload.requested_outputs is not None:
|
||||
updates["requested_outputs"] = payload.requested_outputs.model_dump()
|
||||
if payload.languages is not None:
|
||||
updates["languages"] = payload.languages
|
||||
if payload.deadline is not None:
|
||||
updates["deadline"] = payload.deadline
|
||||
|
||||
result = await db.job_briefs.find_one_and_update(
|
||||
{"_id": brief_id},
|
||||
{"$set": updates},
|
||||
return_document=True,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.BRIEF_UPDATE,
|
||||
description=f"Brief '{brief_id}' updated",
|
||||
user=ctx.user,
|
||||
request=http_request,
|
||||
resource_type="brief",
|
||||
resource_id=brief_id,
|
||||
details={"fields_updated": list(updates.keys())},
|
||||
)
|
||||
return _doc_to_response(result)
|
||||
|
||||
|
||||
@router.post("/{brief_id}/submit", response_model=JobBriefResponse)
|
||||
async def submit_brief(
|
||||
brief_id: str,
|
||||
http_request: Request,
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
doc = await db.job_briefs.find_one({"_id": brief_id})
|
||||
if not doc:
|
||||
raise HTTPException(status_code=404, detail="Brief not found")
|
||||
assert_user_in_org(ctx, doc["organization_id"], OrgRole.MANAGER)
|
||||
if doc["status"] != BriefStatus.DRAFT.value:
|
||||
raise HTTPException(status_code=400, detail="Only DRAFT briefs can be submitted")
|
||||
|
||||
now = datetime.utcnow()
|
||||
result = await db.job_briefs.find_one_and_update(
|
||||
{"_id": brief_id},
|
||||
{"$set": {"status": BriefStatus.SUBMITTED.value, "submitted_at": now, "updated_at": now}},
|
||||
return_document=True,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.BRIEF_SUBMIT,
|
||||
description=f"Brief '{brief_id}' submitted for review",
|
||||
user=ctx.user,
|
||||
request=http_request,
|
||||
resource_type="brief",
|
||||
resource_id=brief_id,
|
||||
details={"organization_id": result.get("organization_id")},
|
||||
)
|
||||
return _doc_to_response(result)
|
||||
|
||||
|
||||
@router.post("/{brief_id}/approve", response_model=JobBriefResponse)
|
||||
async def approve_brief(
|
||||
brief_id: str,
|
||||
http_request: Request,
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
doc = await db.job_briefs.find_one({"_id": brief_id})
|
||||
if not doc:
|
||||
raise HTTPException(status_code=404, detail="Brief not found")
|
||||
assert_user_in_org(ctx, doc["organization_id"], OrgRole.ADMIN)
|
||||
if doc["status"] != BriefStatus.SUBMITTED.value:
|
||||
raise HTTPException(status_code=400, detail="Only SUBMITTED briefs can be approved")
|
||||
|
||||
now = datetime.utcnow()
|
||||
result = await db.job_briefs.find_one_and_update(
|
||||
{"_id": brief_id},
|
||||
{
|
||||
"$set": {
|
||||
"status": BriefStatus.APPROVED.value,
|
||||
"approved_by": str(ctx.user.id),
|
||||
"updated_at": now,
|
||||
}
|
||||
},
|
||||
return_document=True,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.BRIEF_APPROVE,
|
||||
description=f"Brief '{brief_id}' approved",
|
||||
user=ctx.user,
|
||||
request=http_request,
|
||||
resource_type="brief",
|
||||
resource_id=brief_id,
|
||||
details={"organization_id": result.get("organization_id")},
|
||||
)
|
||||
return _doc_to_response(result)
|
||||
|
|
@ -9,15 +9,16 @@ Access rules:
|
|||
- List projects (read) → Admin, PM, or any team member of the client
|
||||
"""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from bson import ObjectId
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import get_current_user, require_roles
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.client import (
|
||||
Client,
|
||||
ClientCreate,
|
||||
|
|
@ -30,6 +31,7 @@ from ...models.client import (
|
|||
TeamUpdate,
|
||||
)
|
||||
from ...models.user import User, UserRole
|
||||
from ...services.audit_logger import audit_logger
|
||||
|
||||
router = APIRouter(prefix="/clients", tags=["clients"])
|
||||
|
||||
|
|
@ -39,7 +41,7 @@ router = APIRouter(prefix="/clients", tags=["clients"])
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _now() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
async def _get_client_or_404(client_id: str, db: AsyncIOMotorDatabase) -> dict:
|
||||
|
|
@ -121,6 +123,7 @@ async def list_clients(
|
|||
@router.post("", response_model=Client)
|
||||
async def create_client(
|
||||
body: ClientCreate,
|
||||
request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -137,7 +140,18 @@ async def create_client(
|
|||
"updated_at": now,
|
||||
})
|
||||
doc = await db.clients.find_one({"_id": client_id})
|
||||
return _client_from_doc(doc)
|
||||
client = _client_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_CREATE,
|
||||
description=f"Client '{client.name}' created",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=str(client.id),
|
||||
resource_name=client.name,
|
||||
details={"slug": client.slug},
|
||||
)
|
||||
return client
|
||||
|
||||
|
||||
@router.get("/{client_id}", response_model=Client)
|
||||
|
|
@ -158,11 +172,12 @@ async def get_client(
|
|||
async def update_client(
|
||||
client_id: str,
|
||||
body: ClientUpdate,
|
||||
request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _get_client_or_404(client_id, db)
|
||||
update: dict = {k: v for k, v in body.model_dump(exclude_none=True).items()}
|
||||
update: dict = dict(body.model_dump(exclude_none=True).items())
|
||||
if not update:
|
||||
raise HTTPException(status_code=422, detail="No fields to update")
|
||||
if "slug" in update and await db.clients.find_one({"slug": update["slug"], "_id": {"$ne": client_id}}):
|
||||
|
|
@ -170,17 +185,39 @@ async def update_client(
|
|||
update["updated_at"] = _now()
|
||||
await db.clients.update_one({"_id": client_id}, {"$set": update})
|
||||
doc = await db.clients.find_one({"_id": client_id})
|
||||
return _client_from_doc(doc)
|
||||
client = _client_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_UPDATE,
|
||||
description=f"Client '{client.name}' updated",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client.name,
|
||||
details={"fields_updated": list(body.model_dump(exclude_none=True).keys())},
|
||||
)
|
||||
return client
|
||||
|
||||
|
||||
@router.delete("/{client_id}", status_code=204)
|
||||
async def deactivate_client(
|
||||
client_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _get_client_or_404(client_id, db)
|
||||
doc = await _get_client_or_404(client_id, db)
|
||||
await db.clients.update_one({"_id": client_id}, {"$set": {"is_active": False, "updated_at": _now()}})
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_DEACTIVATE,
|
||||
description=f"Client '{doc['name']}' deactivated",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=doc["name"],
|
||||
details={"was_active": doc.get("is_active", True)},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -195,10 +232,11 @@ class AssignPMRequest(BaseModel):
|
|||
async def assign_pm(
|
||||
client_id: str,
|
||||
body: AssignPMRequest,
|
||||
request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _get_client_or_404(client_id, db)
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
user_doc = await db.users.find_one({"_id": body.user_id})
|
||||
if not user_doc:
|
||||
raise HTTPException(status_code=404, detail="User not found")
|
||||
|
|
@ -209,16 +247,28 @@ async def assign_pm(
|
|||
"$set": {"role": UserRole.PROJECT_MANAGER.value, "updated_at": _now()},
|
||||
},
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_PM_ASSIGN,
|
||||
description=f"PM '{user_doc.get('email', body.user_id)}' assigned to client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"pm_user_id": body.user_id, "pm_email": user_doc.get("email")},
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{client_id}/pm/{user_id}", status_code=204)
|
||||
async def remove_pm(
|
||||
client_id: str,
|
||||
user_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _get_client_or_404(client_id, db)
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
pm_doc = await db.users.find_one({"_id": user_id})
|
||||
await db.users.update_one(
|
||||
{"_id": user_id},
|
||||
{"$pull": {"pm_client_ids": client_id}, "$set": {"updated_at": _now()}},
|
||||
|
|
@ -230,6 +280,16 @@ async def remove_pm(
|
|||
{"_id": user_id},
|
||||
{"$set": {"role": UserRole.CLIENT.value, "updated_at": _now()}},
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_PM_REMOVE,
|
||||
description=f"PM '{pm_doc.get('email', user_id) if pm_doc else user_id}' removed from client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"pm_user_id": user_id, "pm_email": pm_doc.get("email") if pm_doc else None},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{client_id}/pm", response_model=list[dict])
|
||||
|
|
@ -266,10 +326,11 @@ async def list_teams(
|
|||
async def create_team(
|
||||
client_id: str,
|
||||
body: TeamCreate,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _get_client_or_404(client_id, db)
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_admin(current_user, client_id, db)
|
||||
now = _now()
|
||||
team_id = str(ObjectId())
|
||||
|
|
@ -282,7 +343,18 @@ async def create_team(
|
|||
"updated_at": now,
|
||||
})
|
||||
doc = await db.teams.find_one({"_id": team_id})
|
||||
return _team_from_doc(doc)
|
||||
team = _team_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_TEAM_CREATE,
|
||||
description=f"Team '{team.name}' created for client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"team_id": team_id, "team_name": team.name},
|
||||
)
|
||||
return team
|
||||
|
||||
|
||||
@router.patch("/{client_id}/teams/{team_id}", response_model=Team)
|
||||
|
|
@ -290,32 +362,55 @@ async def update_team(
|
|||
client_id: str,
|
||||
team_id: str,
|
||||
body: TeamUpdate,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _get_client_or_404(client_id, db)
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_admin(current_user, client_id, db)
|
||||
await _get_team_or_404(team_id, client_id, db)
|
||||
update = {k: v for k, v in body.model_dump(exclude_none=True).items()}
|
||||
update = dict(body.model_dump(exclude_none=True).items())
|
||||
if not update:
|
||||
raise HTTPException(status_code=422, detail="No fields to update")
|
||||
update["updated_at"] = _now()
|
||||
await db.teams.update_one({"_id": team_id}, {"$set": update})
|
||||
doc = await db.teams.find_one({"_id": team_id})
|
||||
return _team_from_doc(doc)
|
||||
team = _team_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_TEAM_UPDATE,
|
||||
description=f"Team '{team.name}' updated for client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"team_id": team_id, "team_name": team.name, "fields_updated": list(body.model_dump(exclude_none=True).keys())},
|
||||
)
|
||||
return team
|
||||
|
||||
|
||||
@router.delete("/{client_id}/teams/{team_id}", status_code=204)
|
||||
async def delete_team(
|
||||
client_id: str,
|
||||
team_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _get_client_or_404(client_id, db)
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_admin(current_user, client_id, db)
|
||||
await _get_team_or_404(team_id, client_id, db)
|
||||
team_doc = await _get_team_or_404(team_id, client_id, db)
|
||||
await db.teams.delete_one({"_id": team_id})
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_TEAM_DELETE,
|
||||
description=f"Team '{team_doc['name']}' deleted from client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"team_id": team_id, "team_name": team_doc["name"]},
|
||||
)
|
||||
|
||||
|
||||
# Team membership
|
||||
|
|
@ -329,18 +424,35 @@ async def add_team_member(
|
|||
client_id: str,
|
||||
team_id: str,
|
||||
body: AddMemberRequest,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _get_client_or_404(client_id, db)
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_admin(current_user, client_id, db)
|
||||
await _get_team_or_404(team_id, client_id, db)
|
||||
if not await db.users.find_one({"_id": body.user_id}):
|
||||
team_doc = await _get_team_or_404(team_id, client_id, db)
|
||||
member_doc = await db.users.find_one({"_id": body.user_id})
|
||||
if not member_doc:
|
||||
raise HTTPException(status_code=404, detail="User not found")
|
||||
# Write to both Team.member_user_ids (legacy) and Membership.team_ids (MT-17)
|
||||
await db.teams.update_one(
|
||||
{"_id": team_id},
|
||||
{"$addToSet": {"member_user_ids": body.user_id}, "$set": {"updated_at": _now()}},
|
||||
)
|
||||
await db.memberships.update_one(
|
||||
{"user_id": body.user_id, "organization_id": client_id},
|
||||
{"$addToSet": {"team_ids": team_id}},
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_TEAM_MEMBER_ADD,
|
||||
description=f"User '{member_doc.get('email', body.user_id)}' added to team '{team_doc['name']}' of client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"team_id": team_id, "team_name": team_doc["name"], "member_user_id": body.user_id, "member_email": member_doc.get("email")},
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{client_id}/teams/{team_id}/members/{user_id}", status_code=204)
|
||||
|
|
@ -348,22 +460,56 @@ async def remove_team_member(
|
|||
client_id: str,
|
||||
team_id: str,
|
||||
user_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _get_client_or_404(client_id, db)
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_admin(current_user, client_id, db)
|
||||
await _get_team_or_404(team_id, client_id, db)
|
||||
team_doc = await _get_team_or_404(team_id, client_id, db)
|
||||
member_doc = await db.users.find_one({"_id": user_id})
|
||||
await db.teams.update_one(
|
||||
{"_id": team_id},
|
||||
{"$pull": {"member_user_ids": user_id}, "$set": {"updated_at": _now()}},
|
||||
)
|
||||
await db.memberships.update_one(
|
||||
{"user_id": user_id, "organization_id": client_id},
|
||||
{"$pull": {"team_ids": team_id}},
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_TEAM_MEMBER_REMOVE,
|
||||
description=f"User '{member_doc.get('email', user_id) if member_doc else user_id}' removed from team '{team_doc['name']}' of client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"team_id": team_id, "team_name": team_doc["name"], "member_user_id": user_id, "member_email": member_doc.get("email") if member_doc else None},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Project endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/all-projects", response_model=list[Project])
|
||||
async def list_all_projects(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Return all active projects accessible to the current user (across all clients)."""
|
||||
if current_user.role in (UserRole.ADMIN, UserRole.PRODUCTION, UserRole.PROJECT_MANAGER):
|
||||
docs = await db.projects.find({"is_active": True}).to_list(None)
|
||||
else:
|
||||
accessible_client_ids = await _get_accessible_client_ids(current_user, db)
|
||||
if not accessible_client_ids:
|
||||
return []
|
||||
docs = await db.projects.find(
|
||||
{"client_id": {"$in": accessible_client_ids}, "is_active": True}
|
||||
).to_list(None)
|
||||
return [_project_from_doc(d) for d in docs]
|
||||
|
||||
|
||||
@router.get("/{client_id}/projects", response_model=list[Project])
|
||||
async def list_projects(
|
||||
client_id: str,
|
||||
|
|
@ -380,10 +526,11 @@ async def list_projects(
|
|||
async def create_project(
|
||||
client_id: str,
|
||||
body: ProjectCreate,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _get_client_or_404(client_id, db)
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_client_member(current_user, client_id, db)
|
||||
now = _now()
|
||||
project_id = str(ObjectId())
|
||||
|
|
@ -399,7 +546,18 @@ async def create_project(
|
|||
"updated_at": now,
|
||||
})
|
||||
doc = await db.projects.find_one({"_id": project_id})
|
||||
return _project_from_doc(doc)
|
||||
project = _project_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_PROJECT_CREATE,
|
||||
description=f"Project '{project.name}' created for client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"project_id": project_id, "project_name": project.name, "default_languages": body.default_languages},
|
||||
)
|
||||
return project
|
||||
|
||||
|
||||
@router.patch("/{client_id}/projects/{project_id}", response_model=Project)
|
||||
|
|
@ -407,35 +565,58 @@ async def update_project(
|
|||
client_id: str,
|
||||
project_id: str,
|
||||
body: ProjectUpdate,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _get_client_or_404(client_id, db)
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_admin(current_user, client_id, db)
|
||||
await _get_project_or_404(project_id, client_id, db)
|
||||
update = {k: v for k, v in body.model_dump(exclude_none=True).items()}
|
||||
update = dict(body.model_dump(exclude_none=True).items())
|
||||
if not update:
|
||||
raise HTTPException(status_code=422, detail="No fields to update")
|
||||
update["updated_at"] = _now()
|
||||
await db.projects.update_one({"_id": project_id}, {"$set": update})
|
||||
doc = await db.projects.find_one({"_id": project_id})
|
||||
return _project_from_doc(doc)
|
||||
project = _project_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_PROJECT_UPDATE,
|
||||
description=f"Project '{project.name}' updated for client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"project_id": project_id, "project_name": project.name, "fields_updated": list(body.model_dump(exclude_none=True).keys())},
|
||||
)
|
||||
return project
|
||||
|
||||
|
||||
@router.delete("/{client_id}/projects/{project_id}", status_code=204)
|
||||
async def archive_project(
|
||||
client_id: str,
|
||||
project_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _get_client_or_404(client_id, db)
|
||||
client_doc = await _get_client_or_404(client_id, db)
|
||||
await _assert_pm_or_admin(current_user, client_id, db)
|
||||
await _get_project_or_404(project_id, client_id, db)
|
||||
project_doc = await _get_project_or_404(project_id, client_id, db)
|
||||
await db.projects.update_one(
|
||||
{"_id": project_id},
|
||||
{"$set": {"is_active": False, "updated_at": _now()}},
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.CLIENT_PROJECT_ARCHIVE,
|
||||
description=f"Project '{project_doc['name']}' archived for client '{client_doc['name']}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="client",
|
||||
resource_id=client_id,
|
||||
resource_name=client_doc["name"],
|
||||
details={"project_id": project_id, "project_name": project_doc["name"]},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -473,6 +654,19 @@ async def _assert_pm_or_client_member(user: User, client_id: str, db: AsyncIOMot
|
|||
raise HTTPException(status_code=403, detail="Not authorized to create projects for this client")
|
||||
|
||||
|
||||
async def _get_accessible_client_ids(user: User, db: AsyncIOMotorDatabase) -> list[str]:
|
||||
"""Return list of client_ids the user can access."""
|
||||
ids: set[str] = set()
|
||||
# PM assignments (legacy)
|
||||
if user.pm_client_ids:
|
||||
ids.update(user.pm_client_ids)
|
||||
# Org memberships
|
||||
mems = await db.memberships.find({"user_id": str(user.id)}).to_list(None)
|
||||
for m in mems:
|
||||
ids.add(m["organization_id"])
|
||||
return list(ids)
|
||||
|
||||
|
||||
async def _assert_client_access(user: User, client_id: str, db: AsyncIOMotorDatabase) -> None:
|
||||
"""Allow platform staff, org members (any role), or PM of the client."""
|
||||
if user.role in (UserRole.ADMIN, UserRole.REVIEWER, UserRole.PRODUCTION, UserRole.LINGUIST):
|
||||
|
|
@ -484,6 +678,4 @@ async def _assert_client_access(user: User, client_id: str, db: AsyncIOMotorData
|
|||
# Legacy fallback for pre-migration users
|
||||
if user.role == UserRole.PROJECT_MANAGER and client_id in (user.pm_client_ids or []):
|
||||
return
|
||||
if user.role in (UserRole.CLIENT, UserRole.PROJECT_MANAGER):
|
||||
return
|
||||
raise HTTPException(status_code=403, detail="Insufficient permissions")
|
||||
|
|
|
|||
|
|
@ -3,11 +3,11 @@ from motor.motor_asyncio import AsyncIOMotorDatabase
|
|||
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import get_current_user
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.user import User
|
||||
from ...schemas.file import SignedUploadRequest, SignedUploadResponse
|
||||
from ...services.gcs import generate_signed_upload_url
|
||||
from ...services.audit_logger import audit_logger
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...services.gcs import generate_signed_upload_url
|
||||
|
||||
router = APIRouter(prefix="/files", tags=["files"])
|
||||
|
||||
|
|
@ -28,11 +28,11 @@ async def get_signed_upload_url(
|
|||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Only video files are supported"
|
||||
)
|
||||
|
||||
|
||||
# Generate unique blob path
|
||||
from bson import ObjectId
|
||||
blob_path = f"temp/{ObjectId()}/{request.filename}"
|
||||
|
||||
|
||||
try:
|
||||
# Generate signed upload URL with form fields
|
||||
signed_data = await generate_signed_upload_url(
|
||||
|
|
@ -40,7 +40,7 @@ async def get_signed_upload_url(
|
|||
content_type=request.content_type,
|
||||
max_size=request.max_size or 1024 * 1024 * 1024 # 1GB default
|
||||
)
|
||||
|
||||
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.FILE_UPLOAD,
|
||||
description=f"Signed upload URL generated for {request.filename}",
|
||||
|
|
@ -62,4 +62,4 @@ async def get_signed_upload_url(
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to generate signed upload URL: {str(e)}"
|
||||
)
|
||||
) from None
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from __future__ import annotations
|
|||
|
||||
from fastapi import APIRouter, Depends, File, Form, HTTPException, Query, UploadFile
|
||||
|
||||
from ...core.dependencies import get_current_user, require_pm_for_client, require_roles
|
||||
from ...core.authz import MembershipContext, assert_user_in_org, get_membership_context
|
||||
from ...core.logging import get_logger
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.glossary import (
|
||||
|
|
@ -19,7 +19,7 @@ from ...models.glossary import (
|
|||
GlossaryResponse,
|
||||
GlossaryVersionResponse,
|
||||
)
|
||||
from ...models.user import User, UserRole
|
||||
from ...models.organization import OrgRole
|
||||
from ...services import audit_logger as audit_svc
|
||||
from ...services import glossary_service as svc
|
||||
|
||||
|
|
@ -37,22 +37,18 @@ _ALLOWED_CONTENT_TYPES = {
|
|||
_MAX_FILE_SIZE_MB = 50
|
||||
|
||||
|
||||
def _require_client_staff(client_id: str):
|
||||
"""Dependency: admin or PM of this client."""
|
||||
return require_pm_for_client(client_id_param="client_id")
|
||||
|
||||
|
||||
# ── List glossaries ───────────────────────────────────────────────────────────
|
||||
|
||||
@router.get("", response_model=list[GlossaryResponse])
|
||||
async def list_glossaries(
|
||||
client_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
):
|
||||
"""List all active glossaries for a client."""
|
||||
_assert_can_read(current_user)
|
||||
assert_user_in_org(ctx, client_id, OrgRole.VIEWER)
|
||||
glossaries = await svc.get_glossaries_for_client(client_id)
|
||||
return [_to_response(g) for g in glossaries]
|
||||
version_map = await svc.get_versions_by_ids([g.current_version_id for g in glossaries if g.current_version_id])
|
||||
return [_to_response(g, version_map.get(g.current_version_id)) for g in glossaries]
|
||||
|
||||
|
||||
# ── Upload new glossary ───────────────────────────────────────────────────────
|
||||
|
|
@ -66,9 +62,10 @@ async def upload_glossary(
|
|||
source_locale_col: str = Form(..., description="xlsx column header for the source language, e.g. en_gb"),
|
||||
description: str | None = Form(None),
|
||||
change_note: str | None = Form(None),
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN, UserRole.PROJECT_MANAGER)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
):
|
||||
"""Upload a new glossary xlsx file and associate it with a client."""
|
||||
assert_user_in_org(ctx, client_id, OrgRole.MANAGER)
|
||||
_validate_xlsx(file)
|
||||
|
||||
try:
|
||||
|
|
@ -78,7 +75,7 @@ async def upload_glossary(
|
|||
source_locale=source_locale,
|
||||
source_locale_col=source_locale_col,
|
||||
file=file,
|
||||
user_id=str(current_user.id),
|
||||
user_id=str(ctx.user.id),
|
||||
description=description,
|
||||
change_note=change_note,
|
||||
)
|
||||
|
|
@ -88,7 +85,7 @@ async def upload_glossary(
|
|||
await audit_svc.audit_logger.log_action(
|
||||
action=AuditAction.GLOSSARY_UPLOAD,
|
||||
description=f"Glossary '{name}' uploaded for client {client_id}",
|
||||
user=current_user,
|
||||
user=ctx.user,
|
||||
resource_type="glossary",
|
||||
resource_id=glossary.id,
|
||||
details={"term_count": version.term_count, "source_locale": source_locale},
|
||||
|
|
@ -104,9 +101,9 @@ async def upload_glossary(
|
|||
async def get_glossary(
|
||||
client_id: str,
|
||||
glossary_id: str,
|
||||
current_user: User = Depends(get_current_user),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
):
|
||||
_assert_can_read(current_user)
|
||||
assert_user_in_org(ctx, client_id, OrgRole.VIEWER)
|
||||
glossary = await svc.get_glossary(glossary_id)
|
||||
if not glossary or glossary.client_id != client_id:
|
||||
raise HTTPException(status_code=404, detail="Glossary not found")
|
||||
|
|
@ -124,9 +121,9 @@ async def list_terms(
|
|||
search: str | None = Query(None),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=200),
|
||||
current_user: User = Depends(get_current_user),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
):
|
||||
_assert_can_read(current_user)
|
||||
assert_user_in_org(ctx, client_id, OrgRole.VIEWER)
|
||||
glossary = await svc.get_glossary(glossary_id)
|
||||
if not glossary or glossary.client_id != client_id:
|
||||
raise HTTPException(status_code=404, detail="Glossary not found")
|
||||
|
|
@ -153,9 +150,10 @@ async def upload_version(
|
|||
file: UploadFile = File(...),
|
||||
source_locale_col: str = Form(...),
|
||||
change_note: str | None = Form(None),
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN, UserRole.PROJECT_MANAGER)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
):
|
||||
"""Upload a new xlsx file as a new version of an existing glossary."""
|
||||
assert_user_in_org(ctx, client_id, OrgRole.MANAGER)
|
||||
_validate_xlsx(file)
|
||||
glossary = await svc.get_glossary(glossary_id)
|
||||
if not glossary or glossary.client_id != client_id:
|
||||
|
|
@ -166,7 +164,7 @@ async def upload_version(
|
|||
glossary_id=glossary_id,
|
||||
source_locale_col=source_locale_col,
|
||||
file=file,
|
||||
user_id=str(current_user.id),
|
||||
user_id=str(ctx.user.id),
|
||||
change_note=change_note,
|
||||
)
|
||||
except ValueError as exc:
|
||||
|
|
@ -175,7 +173,7 @@ async def upload_version(
|
|||
await audit_svc.audit_logger.log_action(
|
||||
action=AuditAction.GLOSSARY_VERSION_UPLOAD,
|
||||
description=f"New glossary version uploaded for glossary {glossary_id}",
|
||||
user=current_user,
|
||||
user=ctx.user,
|
||||
resource_type="glossary_version",
|
||||
resource_id=version.id,
|
||||
details={"term_count": version.term_count, "version_number": version.version_number},
|
||||
|
|
@ -190,8 +188,9 @@ async def activate_version(
|
|||
client_id: str,
|
||||
glossary_id: str,
|
||||
version_id: str = Form(...),
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN, UserRole.PROJECT_MANAGER)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
):
|
||||
assert_user_in_org(ctx, client_id, OrgRole.MANAGER)
|
||||
glossary = await svc.get_glossary(glossary_id)
|
||||
if not glossary or glossary.client_id != client_id:
|
||||
raise HTTPException(status_code=404, detail="Glossary not found")
|
||||
|
|
@ -204,7 +203,7 @@ async def activate_version(
|
|||
await audit_svc.audit_logger.log_action(
|
||||
action=AuditAction.GLOSSARY_ACTIVATE,
|
||||
description=f"Glossary version {version_id} activated",
|
||||
user=current_user,
|
||||
user=ctx.user,
|
||||
resource_type="glossary",
|
||||
resource_id=glossary_id,
|
||||
details={"version_id": version_id},
|
||||
|
|
@ -219,9 +218,10 @@ async def reembed_version(
|
|||
client_id: str,
|
||||
glossary_id: str,
|
||||
version_id: str,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN, UserRole.PROJECT_MANAGER)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
):
|
||||
"""Re-queue the embedding task for a glossary version (resets failed/pending/stuck embeds)."""
|
||||
assert_user_in_org(ctx, client_id, OrgRole.MANAGER)
|
||||
glossary = await svc.get_glossary(glossary_id)
|
||||
if not glossary or glossary.client_id != client_id:
|
||||
raise HTTPException(status_code=404, detail="Glossary not found")
|
||||
|
|
@ -232,10 +232,11 @@ async def reembed_version(
|
|||
raise HTTPException(status_code=404, detail="Version not found")
|
||||
|
||||
try:
|
||||
from ...tasks.embed_glossary import embed_glossary_version_task
|
||||
from bson import ObjectId
|
||||
import motor.motor_asyncio
|
||||
from bson import ObjectId
|
||||
|
||||
from ...core.config import settings
|
||||
from ...tasks.embed_glossary import embed_glossary_version_task
|
||||
|
||||
client_db = motor.motor_asyncio.AsyncIOMotorClient(settings.mongodb_uri)
|
||||
db = client_db[settings.mongodb_db]
|
||||
|
|
@ -252,14 +253,15 @@ async def reembed_version(
|
|||
return {"status": "queued", "version_id": version_id}
|
||||
|
||||
|
||||
# ── Archive (soft-delete) ─────────────────────────────────────────────────────
|
||||
# ── Delete ───────────────────────────────────────────────────────────────────
|
||||
|
||||
@router.delete("/{glossary_id}", status_code=204)
|
||||
async def archive_glossary(
|
||||
client_id: str,
|
||||
glossary_id: str,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
):
|
||||
assert_user_in_org(ctx, client_id, OrgRole.ADMIN)
|
||||
glossary = await svc.get_glossary(glossary_id)
|
||||
if not glossary or glossary.client_id != client_id:
|
||||
raise HTTPException(status_code=404, detail="Glossary not found")
|
||||
|
|
@ -267,7 +269,7 @@ async def archive_glossary(
|
|||
await audit_svc.audit_logger.log_action(
|
||||
action=AuditAction.GLOSSARY_ARCHIVE,
|
||||
description=f"Glossary {glossary_id} archived",
|
||||
user=current_user,
|
||||
user=ctx.user,
|
||||
resource_type="glossary",
|
||||
resource_id=glossary_id,
|
||||
)
|
||||
|
|
@ -275,13 +277,6 @@ async def archive_glossary(
|
|||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
def _assert_can_read(user: User) -> None:
|
||||
allowed = {UserRole.ADMIN, UserRole.PROJECT_MANAGER, UserRole.REVIEWER,
|
||||
UserRole.LINGUIST, UserRole.PRODUCTION}
|
||||
if user.role not in allowed:
|
||||
raise HTTPException(status_code=403, detail="Insufficient permissions")
|
||||
|
||||
|
||||
def _validate_xlsx(file: UploadFile) -> None:
|
||||
if file.content_type not in _ALLOWED_CONTENT_TYPES and not (
|
||||
file.filename and file.filename.endswith(".xlsx")
|
||||
|
|
@ -292,7 +287,7 @@ def _validate_xlsx(file: UploadFile) -> None:
|
|||
)
|
||||
|
||||
|
||||
def _to_response(g) -> GlossaryResponse:
|
||||
def _to_response(g, current_version=None) -> GlossaryResponse:
|
||||
return GlossaryResponse(
|
||||
id=str(g.id),
|
||||
client_id=g.client_id,
|
||||
|
|
@ -302,6 +297,9 @@ def _to_response(g) -> GlossaryResponse:
|
|||
source=g.source,
|
||||
status=g.status,
|
||||
current_version_id=g.current_version_id,
|
||||
current_version_embedding_status=current_version.embedding_status if current_version else None,
|
||||
current_version_embedded_count=current_version.embedded_count if current_version else None,
|
||||
current_version_term_count=current_version.term_count if current_version else None,
|
||||
created_at=g.created_at,
|
||||
created_by=g.created_by,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -14,16 +14,21 @@ Protected endpoints:
|
|||
import hashlib
|
||||
import re
|
||||
import secrets
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
|
||||
from ...core.authz import bump_user_membership_cache
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import get_current_user
|
||||
from ...core.security import create_access_token, create_refresh_token, get_password_hash
|
||||
from ...core.security import (
|
||||
create_access_token,
|
||||
create_refresh_token,
|
||||
get_password_hash,
|
||||
)
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.invitation import (
|
||||
Invitation,
|
||||
InvitationAcceptRequest,
|
||||
InvitationCreate,
|
||||
InvitationPreviewResponse,
|
||||
|
|
@ -31,7 +36,7 @@ from ...models.invitation import (
|
|||
)
|
||||
from ...models.organization import OrgRole
|
||||
from ...models.user import AuthProvider, User, UserRole
|
||||
from ...core.authz import bump_user_membership_cache
|
||||
from ...services.audit_logger import audit_logger
|
||||
from ...services.emailer import email_service
|
||||
from ...services.membership_service import get_membership, upsert_membership
|
||||
|
||||
|
|
@ -39,7 +44,7 @@ router = APIRouter(tags=["invitations"])
|
|||
|
||||
|
||||
def _now() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
def _hash_token(plaintext: str) -> str:
|
||||
|
|
@ -54,7 +59,7 @@ def _make_token() -> tuple[str, str]:
|
|||
|
||||
def _inv_from_doc(doc: dict) -> InvitationResponse:
|
||||
now = _now()
|
||||
expires_at = doc["expires_at"].replace(tzinfo=timezone.utc) if doc["expires_at"].tzinfo is None else doc["expires_at"]
|
||||
expires_at = doc["expires_at"].replace(tzinfo=UTC) if doc["expires_at"].tzinfo is None else doc["expires_at"]
|
||||
return InvitationResponse(
|
||||
id=str(doc["_id"]),
|
||||
email=doc["email"],
|
||||
|
|
@ -100,6 +105,7 @@ org_router = APIRouter(prefix="/organizations", tags=["invitations"])
|
|||
async def create_invitation(
|
||||
org_id: str,
|
||||
body: InvitationCreate,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -121,6 +127,18 @@ async def create_invitation(
|
|||
detail="A pending invitation already exists for this email. Revoke it first to re-invite.",
|
||||
)
|
||||
|
||||
# MT-19: ensure all target_team_ids belong to this org (client_id == org_id)
|
||||
if body.target_team_ids:
|
||||
valid_teams = await db.teams.count_documents({
|
||||
"_id": {"$in": body.target_team_ids},
|
||||
"client_id": org_id,
|
||||
})
|
||||
if valid_teams != len(body.target_team_ids):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="One or more target_team_ids do not belong to this organization.",
|
||||
)
|
||||
|
||||
plaintext, token_hash = _make_token()
|
||||
now = _now()
|
||||
expires_at = now + timedelta(days=body.expires_in_days)
|
||||
|
|
@ -154,7 +172,17 @@ async def create_invitation(
|
|||
expires_at=expires_at,
|
||||
)
|
||||
|
||||
return _inv_from_doc(doc)
|
||||
inv = _inv_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.INVITATION_CREATE,
|
||||
description=f"Invitation created for '{email_lower}' to organization '{org_id}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="invitation",
|
||||
resource_id=inv.id,
|
||||
details={"invited_email": email_lower, "org_id": org_id, "role": body.role_in_org},
|
||||
)
|
||||
return inv
|
||||
|
||||
|
||||
@org_router.get("/{org_id}/invitations", response_model=list[InvitationResponse])
|
||||
|
|
@ -174,16 +202,30 @@ async def list_invitations(
|
|||
async def revoke_invitation(
|
||||
org_id: str,
|
||||
invitation_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
await _assert_org_admin(org_id, current_user, db)
|
||||
inv_doc = await db.invitations.find_one({"_id": invitation_id, "organization_id": org_id})
|
||||
result = await db.invitations.update_one(
|
||||
{"_id": invitation_id, "organization_id": org_id, "accepted_at": None, "revoked_at": None},
|
||||
{"$set": {"revoked_at": _now()}},
|
||||
)
|
||||
if result.matched_count == 0:
|
||||
raise HTTPException(status_code=404, detail="Invitation not found or already accepted/revoked")
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.INVITATION_REVOKE,
|
||||
description=f"Invitation '{invitation_id}' revoked in organization '{org_id}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="invitation",
|
||||
resource_id=invitation_id,
|
||||
details={
|
||||
"invited_email": inv_doc["email"] if inv_doc else None,
|
||||
"org_id": org_id,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -206,7 +248,7 @@ async def preview_invitation(
|
|||
raise HTTPException(status_code=410, detail="Invitation not found or has expired")
|
||||
|
||||
now = _now()
|
||||
expires_at = doc["expires_at"].replace(tzinfo=timezone.utc) if doc["expires_at"].tzinfo is None else doc["expires_at"]
|
||||
expires_at = doc["expires_at"].replace(tzinfo=UTC) if doc["expires_at"].tzinfo is None else doc["expires_at"]
|
||||
|
||||
if doc.get("revoked_at"):
|
||||
raise HTTPException(status_code=410, detail="This invitation has been revoked")
|
||||
|
|
@ -255,6 +297,7 @@ async def preview_invitation(
|
|||
@router.post("/invitations/accept")
|
||||
async def accept_invitation(
|
||||
body: InvitationAcceptRequest,
|
||||
request: Request,
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Accept an invitation. Creates user if needed, creates membership, returns tokens."""
|
||||
|
|
@ -317,12 +360,16 @@ async def accept_invitation(
|
|||
await upsert_membership(user_id, org_id, role_in_org, doc["invited_by_user_id"], db)
|
||||
await bump_user_membership_cache(user_id)
|
||||
|
||||
# Auto-add to target teams
|
||||
# Auto-add to target teams — write to both Team.member_user_ids (legacy) and Membership.team_ids (MT-17)
|
||||
for team_id in doc.get("target_team_ids", []):
|
||||
await db.teams.update_one(
|
||||
{"_id": team_id, "client_id": org_id},
|
||||
{"$addToSet": {"member_user_ids": user_id}},
|
||||
)
|
||||
await db.memberships.update_one(
|
||||
{"user_id": user_id, "organization_id": org_id},
|
||||
{"$addToSet": {"team_ids": team_id}},
|
||||
)
|
||||
|
||||
# Send welcome email
|
||||
if not existing_user.get("_welcomed"):
|
||||
|
|
@ -333,12 +380,23 @@ async def accept_invitation(
|
|||
org_name=org_name,
|
||||
)
|
||||
|
||||
# Issue JWT tokens
|
||||
access_token = create_access_token(subject=user_id)
|
||||
# Issue JWT tokens with org_ids claim
|
||||
_inv_org_ids = [m["organization_id"] async for m in db.memberships.find({"user_id": user_id}, {"organization_id": 1})]
|
||||
access_token = create_access_token(subject=user_id, org_ids=[str(o) for o in _inv_org_ids if o])
|
||||
refresh_token = create_refresh_token(subject=user_id)
|
||||
|
||||
org_name, org_slug = await _get_org_name(org_id, db)
|
||||
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.INVITATION_ACCEPT,
|
||||
description=f"Invitation accepted by '{email_lower}' for organization '{org_id}'",
|
||||
user=None,
|
||||
request=request,
|
||||
resource_type="invitation",
|
||||
resource_id=str(doc["_id"]),
|
||||
details={"invited_email": email_lower, "org_id": org_id},
|
||||
)
|
||||
|
||||
return {
|
||||
"access_token": access_token,
|
||||
"refresh_token": refresh_token,
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,17 +1,18 @@
|
|||
"""Per-language QC endpoints — two-stage (linguist + reviewer) assignment, workflow, comments."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, Query, Request
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import require_roles
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.job import LanguageQCComment, LanguageQCState
|
||||
from ...models.user import User, UserRole
|
||||
from ...services import language_qc as lqc
|
||||
from ...services.audit_logger import audit_logger
|
||||
|
||||
router = APIRouter(tags=["language-qc"])
|
||||
|
||||
|
|
@ -20,38 +21,39 @@ router = APIRouter(tags=["language-qc"])
|
|||
|
||||
class AssignRequest(BaseModel):
|
||||
linguist_user_id: str
|
||||
notes: Optional[str] = None
|
||||
deadline: Optional[datetime] = None
|
||||
notes: str | None = None
|
||||
deadline: datetime | None = None
|
||||
|
||||
|
||||
class ReassignRequest(BaseModel):
|
||||
linguist_user_id: str
|
||||
notes: Optional[str] = None
|
||||
deadline: Optional[datetime] = None
|
||||
notes: str | None = None
|
||||
deadline: datetime | None = None
|
||||
|
||||
|
||||
class AssignReviewerRequest(BaseModel):
|
||||
reviewer_user_id: str
|
||||
notes: Optional[str] = None
|
||||
deadline: Optional[datetime] = None
|
||||
notes: str | None = None
|
||||
deadline: datetime | None = None
|
||||
|
||||
|
||||
class ReassignReviewerRequest(BaseModel):
|
||||
reviewer_user_id: str
|
||||
notes: Optional[str] = None
|
||||
deadline: Optional[datetime] = None
|
||||
notes: str | None = None
|
||||
deadline: datetime | None = None
|
||||
|
||||
|
||||
class ApproveLanguageRequest(BaseModel):
|
||||
notes: Optional[str] = None
|
||||
notes: str | None = None
|
||||
|
||||
|
||||
class RejectLanguageRequest(BaseModel):
|
||||
notes: str
|
||||
category: str | None = None # timing | mistranslation | terminology | profanity | length | other
|
||||
|
||||
|
||||
class ReopenLanguageRequest(BaseModel):
|
||||
notes: Optional[str] = None
|
||||
notes: str | None = None
|
||||
|
||||
|
||||
class AddCommentRequest(BaseModel):
|
||||
|
|
@ -74,8 +76,8 @@ class QueueItem(BaseModel):
|
|||
job_status: str
|
||||
lang: str
|
||||
lang_qc_status: str
|
||||
assigned_at: Optional[str] = None
|
||||
reviewed_at: Optional[str] = None
|
||||
assigned_at: str | None = None
|
||||
reviewed_at: str | None = None
|
||||
|
||||
|
||||
class QueueResponse(BaseModel):
|
||||
|
|
@ -83,6 +85,20 @@ class QueueResponse(BaseModel):
|
|||
total: int
|
||||
|
||||
|
||||
class BulkAssignRequest(BaseModel):
|
||||
linguist_user_id: str
|
||||
reviewer_user_id: str | None = None
|
||||
languages: list[str] | None = None # None = all available languages
|
||||
only_unassigned: bool = False # skip languages that already have an assignment
|
||||
deadline: datetime | None = None
|
||||
|
||||
|
||||
class BulkAssignResponse(BaseModel):
|
||||
assigned: list[str]
|
||||
skipped: list[str]
|
||||
errors: dict[str, str]
|
||||
|
||||
|
||||
# ── Routes ────────────────────────────────────────────────────────────────────
|
||||
|
||||
@router.get("/jobs/{job_id}/language-qc", response_model=LanguageQCMapResponse)
|
||||
|
|
@ -94,6 +110,8 @@ async def get_language_qc(
|
|||
)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
# Lazy auto-assignment: apply project/job defaults on first open in PENDING_QC
|
||||
await lqc.auto_assign_defaults(db, job_id)
|
||||
states = await lqc.get_all_states(db, job_id)
|
||||
return LanguageQCMapResponse(job_id=job_id, language_qc=states)
|
||||
|
||||
|
|
@ -107,7 +125,7 @@ async def assign_language(
|
|||
request: AssignRequest,
|
||||
http_request: Request,
|
||||
current_user: User = Depends(require_roles(
|
||||
UserRole.PROJECT_MANAGER, UserRole.PRODUCTION, UserRole.ADMIN,
|
||||
UserRole.REVIEWER, UserRole.PROJECT_MANAGER, UserRole.PRODUCTION, UserRole.ADMIN,
|
||||
)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -115,6 +133,15 @@ async def assign_language(
|
|||
db, job_id, lang, request.linguist_user_id, current_user,
|
||||
http_request=http_request, notes=request.notes, deadline=request.deadline,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_ASSIGN,
|
||||
description=f"Language '{lang}' assigned to linguist '{request.linguist_user_id}' for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "linguist_user_id": request.linguist_user_id},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -133,6 +160,15 @@ async def reassign_language(
|
|||
db, job_id, lang, request.linguist_user_id, current_user,
|
||||
http_request=http_request, notes=request.notes, deadline=request.deadline,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_REASSIGN,
|
||||
description=f"Language '{lang}' reassigned to linguist '{request.linguist_user_id}' for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "linguist_user_id": request.linguist_user_id},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -145,7 +181,7 @@ async def assign_reviewer(
|
|||
request: AssignReviewerRequest,
|
||||
http_request: Request,
|
||||
current_user: User = Depends(require_roles(
|
||||
UserRole.PROJECT_MANAGER, UserRole.PRODUCTION, UserRole.ADMIN,
|
||||
UserRole.REVIEWER, UserRole.PROJECT_MANAGER, UserRole.PRODUCTION, UserRole.ADMIN,
|
||||
)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -153,6 +189,15 @@ async def assign_reviewer(
|
|||
db, job_id, lang, request.reviewer_user_id, current_user,
|
||||
http_request=http_request, notes=request.notes, deadline=request.deadline,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_REVIEWER_ASSIGN,
|
||||
description=f"Reviewer '{request.reviewer_user_id}' assigned to language '{lang}' for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "reviewer_user_id": request.reviewer_user_id},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -163,7 +208,7 @@ async def reassign_reviewer(
|
|||
request: ReassignReviewerRequest,
|
||||
http_request: Request,
|
||||
current_user: User = Depends(require_roles(
|
||||
UserRole.PROJECT_MANAGER, UserRole.PRODUCTION, UserRole.ADMIN,
|
||||
UserRole.REVIEWER, UserRole.PROJECT_MANAGER, UserRole.PRODUCTION, UserRole.ADMIN,
|
||||
)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -171,9 +216,94 @@ async def reassign_reviewer(
|
|||
db, job_id, lang, request.reviewer_user_id, current_user,
|
||||
http_request=http_request, notes=request.notes, deadline=request.deadline,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_REVIEWER_REASSIGN,
|
||||
description=f"Reviewer reassigned to '{request.reviewer_user_id}' for language '{lang}', job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "reviewer_user_id": request.reviewer_user_id},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
# ── Bulk assignment ───────────────────────────────────────────────────────────
|
||||
|
||||
@router.post("/jobs/{job_id}/languages/bulk-assign", response_model=BulkAssignResponse)
|
||||
async def bulk_assign_languages(
|
||||
job_id: str,
|
||||
request: BulkAssignRequest,
|
||||
http_request: Request,
|
||||
current_user: User = Depends(require_roles(
|
||||
UserRole.REVIEWER, UserRole.PROJECT_MANAGER, UserRole.PRODUCTION, UserRole.ADMIN,
|
||||
)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Assign one linguist (and optionally one reviewer) to multiple languages in one call."""
|
||||
job_doc = await db["jobs"].find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
available = list((job_doc.get("outputs") or {}).keys())
|
||||
target_langs = request.languages if request.languages else available
|
||||
|
||||
assigned: list[str] = []
|
||||
skipped: list[str] = []
|
||||
errors: dict[str, str] = {}
|
||||
|
||||
language_qc = job_doc.get("language_qc") or {}
|
||||
|
||||
for lang in target_langs:
|
||||
if lang not in available:
|
||||
skipped.append(lang)
|
||||
continue
|
||||
|
||||
lang_state = language_qc.get(lang) or {}
|
||||
already_assigned = bool(lang_state.get("assigned_linguist_id"))
|
||||
|
||||
if request.only_unassigned and already_assigned:
|
||||
skipped.append(lang)
|
||||
continue
|
||||
|
||||
try:
|
||||
await lqc.assign_linguist(
|
||||
db, job_id, lang, request.linguist_user_id, current_user,
|
||||
http_request=http_request, deadline=request.deadline,
|
||||
)
|
||||
except Exception as exc:
|
||||
errors[lang] = str(exc)
|
||||
continue
|
||||
|
||||
if request.reviewer_user_id:
|
||||
try:
|
||||
await lqc.assign_reviewer(
|
||||
db, job_id, lang, request.reviewer_user_id, current_user,
|
||||
http_request=http_request, deadline=request.deadline,
|
||||
)
|
||||
except Exception as exc:
|
||||
errors[f"{lang}:reviewer"] = str(exc)
|
||||
|
||||
assigned.append(lang)
|
||||
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_BULK_ASSIGN,
|
||||
description=f"Bulk assignment for job {job_id}: {len(assigned)} language(s) assigned to linguist '{request.linguist_user_id}'",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={
|
||||
"languages": assigned,
|
||||
"linguist_user_id": request.linguist_user_id,
|
||||
"reviewer_user_id": request.reviewer_user_id,
|
||||
"skipped": skipped,
|
||||
"errors": errors,
|
||||
},
|
||||
)
|
||||
return BulkAssignResponse(assigned=assigned, skipped=skipped, errors=errors)
|
||||
|
||||
|
||||
# ── Workflow transitions ──────────────────────────────────────────────────────
|
||||
|
||||
@router.post("/jobs/{job_id}/languages/{lang}/start-work", response_model=LanguageQCStateResponse)
|
||||
|
|
@ -188,6 +318,15 @@ async def start_linguist_work(
|
|||
):
|
||||
"""Linguist opens the language — pending → in_progress."""
|
||||
state = await lqc.start_linguist_work(db, job_id, lang, current_user)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_START_WORK,
|
||||
description=f"Linguist started work on language '{lang}' for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -203,6 +342,15 @@ async def submit_for_review(
|
|||
):
|
||||
"""Linguist submits — in_progress → pending_review. Notifies reviewer by email."""
|
||||
state = await lqc.submit_for_review(db, job_id, lang, current_user, http_request=http_request)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_SUBMIT,
|
||||
description=f"Language '{lang}' submitted for review for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -218,6 +366,15 @@ async def open_review(
|
|||
):
|
||||
"""Reviewer opens the review — pending_review → in_review."""
|
||||
state = await lqc.open_review(db, job_id, lang, current_user, http_request=http_request)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_OPEN_REVIEW,
|
||||
description=f"Reviewer opened review for language '{lang}', job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -237,6 +394,15 @@ async def approve_language(
|
|||
state = await lqc.approve_language(
|
||||
db, job_id, lang, current_user, http_request=http_request, notes=request.notes,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_APPROVE,
|
||||
description=f"Language '{lang}' approved for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "notes": request.notes},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -252,8 +418,50 @@ async def reject_language(
|
|||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
state = await lqc.reject_language(
|
||||
db, job_id, lang, current_user, request.notes, http_request=http_request,
|
||||
db, job_id, lang, current_user, request.notes, category=request.category, http_request=http_request,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_REJECT,
|
||||
description=f"Language '{lang}' rejected for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "notes": request.notes, "category": request.category},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
class MarkCueReviewedRequest(BaseModel):
|
||||
total_cues: int | None = None # client sends on first call to set total
|
||||
|
||||
|
||||
@router.post("/jobs/{job_id}/languages/{lang}/mark-cue-reviewed", response_model=LanguageQCStateResponse)
|
||||
async def mark_cue_reviewed(
|
||||
job_id: str,
|
||||
lang: str,
|
||||
request: MarkCueReviewedRequest,
|
||||
http_request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Increment reviewed_cues counter; optionally set total_cues on first call."""
|
||||
job_doc = await db.jobs.find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
inc_op: dict = {f"language_qc.{lang}.reviewed_cues": 1}
|
||||
set_op: dict = {"updated_at": datetime.utcnow()}
|
||||
|
||||
if request.total_cues is not None:
|
||||
set_op[f"language_qc.{lang}.total_cues"] = request.total_cues
|
||||
|
||||
await db.jobs.update_one({"_id": job_id}, {"$inc": inc_op, "$set": set_op})
|
||||
|
||||
updated_doc = await db.jobs.find_one({"_id": job_id})
|
||||
state_dict = (updated_doc.get("language_qc") or {}).get(lang, {})
|
||||
from ...models.job import LanguageQCState
|
||||
state = LanguageQCState(**state_dict) if isinstance(state_dict, dict) else LanguageQCState()
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -269,6 +477,15 @@ async def reopen_language(
|
|||
state = await lqc.reopen_language(
|
||||
db, job_id, lang, current_user, http_request=http_request, notes=request.notes,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_REOPEN,
|
||||
description=f"Language '{lang}' reopened for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "notes": request.notes},
|
||||
)
|
||||
return LanguageQCStateResponse(lang=lang, state=state)
|
||||
|
||||
|
||||
|
|
@ -289,6 +506,15 @@ async def add_comment(
|
|||
comment = await lqc.add_comment(
|
||||
db, job_id, lang, current_user, request.body, http_request=http_request,
|
||||
)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.LANGUAGE_QC_COMMENT,
|
||||
description=f"Comment added to language '{lang}' for job {job_id}",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"lang": lang, "comment_id": str(comment.id) if hasattr(comment, "id") else None},
|
||||
)
|
||||
return comment
|
||||
|
||||
|
||||
|
|
@ -313,7 +539,7 @@ async def list_comments(
|
|||
@router.get("/me/language-qc-queue", response_model=QueueResponse)
|
||||
async def my_language_qc_queue(
|
||||
role: str = Query("linguist", description="'linguist' or 'reviewer'"),
|
||||
qc_status: Optional[str] = Query(None, description="Filter by status"),
|
||||
qc_status: str | None = Query(None, description="Filter by status"),
|
||||
skip: int = Query(0, ge=0),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
current_user: User = Depends(require_roles(
|
||||
|
|
@ -322,13 +548,17 @@ async def my_language_qc_queue(
|
|||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""List jobs and languages assigned to the current user as linguist or reviewer."""
|
||||
# ADMIN sees all orgs; staff scoped to their orgs from JWT claim (MT-18)
|
||||
org_ids: list[str] | None = None if current_user.role == UserRole.ADMIN else getattr(current_user, "org_ids", None)
|
||||
if role == "reviewer":
|
||||
jobs = await lqc.list_for_reviewer(
|
||||
db, str(current_user.id), status_filter=qc_status, skip=skip, limit=limit,
|
||||
db, str(current_user.id), accessible_org_ids=org_ids,
|
||||
status_filter=qc_status, skip=skip, limit=limit,
|
||||
)
|
||||
else:
|
||||
jobs = await lqc.list_for_linguist(
|
||||
db, str(current_user.id), status_filter=qc_status, skip=skip, limit=limit,
|
||||
db, str(current_user.id), accessible_org_ids=org_ids,
|
||||
status_filter=qc_status, skip=skip, limit=limit,
|
||||
)
|
||||
|
||||
items: list[QueueItem] = []
|
||||
|
|
|
|||
|
|
@ -12,19 +12,25 @@ underlying MongoDB collections used by routes_clients.py so both
|
|||
endpoints coexist without data duplication.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from bson import ObjectId
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ...core.authz import bump_user_membership_cache
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import get_current_user, require_roles
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.membership import MemberDetail, MembershipCreate, MembershipUpdate
|
||||
from ...models.organization import OrgRole, Organization, OrganizationCreate, OrganizationUpdate
|
||||
from ...models.organization import (
|
||||
Organization,
|
||||
OrganizationCreate,
|
||||
OrganizationUpdate,
|
||||
OrgRole,
|
||||
)
|
||||
from ...models.user import User, UserRole
|
||||
from ...core.authz import bump_user_membership_cache
|
||||
from ...services.audit_logger import audit_logger
|
||||
from ...services.membership_service import (
|
||||
get_membership,
|
||||
get_memberships_for_user,
|
||||
|
|
@ -39,7 +45,7 @@ ADMIN_ROLES = [UserRole.ADMIN]
|
|||
|
||||
|
||||
def _now() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -115,6 +121,7 @@ class _OrgCreate(BaseModel):
|
|||
@router.post("", response_model=Organization, status_code=201)
|
||||
async def create_organization(
|
||||
body: OrganizationCreate,
|
||||
request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -133,13 +140,25 @@ async def create_organization(
|
|||
"updated_at": now,
|
||||
}
|
||||
await db.clients.insert_one(doc)
|
||||
return _org_from_doc(doc)
|
||||
org = _org_from_doc(doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.ORG_CREATE,
|
||||
description=f"Organization '{org.name}' created",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="organization",
|
||||
resource_id=str(org.id),
|
||||
resource_name=org.name,
|
||||
details={"slug": org.slug},
|
||||
)
|
||||
return org
|
||||
|
||||
|
||||
@router.patch("/{org_id}", response_model=Organization)
|
||||
async def update_organization(
|
||||
org_id: str,
|
||||
body: OrganizationUpdate,
|
||||
request: Request,
|
||||
current_user: User = Depends(require_roles(UserRole.ADMIN)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -156,7 +175,18 @@ async def update_organization(
|
|||
|
||||
await db.clients.update_one({"_id": org_id}, {"$set": updates})
|
||||
updated = {**doc, **updates}
|
||||
return _org_from_doc(updated)
|
||||
org = _org_from_doc(updated)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.ORG_UPDATE,
|
||||
description=f"Organization '{org.name}' updated",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="organization",
|
||||
resource_id=str(org.id),
|
||||
resource_name=org.name,
|
||||
details={k: v for k, v in updates.items() if k != "updated_at"},
|
||||
)
|
||||
return org
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -178,6 +208,7 @@ async def list_members(
|
|||
async def add_member(
|
||||
org_id: str,
|
||||
body: MembershipCreate,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -193,6 +224,15 @@ async def add_member(
|
|||
members = await list_org_members(org_id, db)
|
||||
for m in members:
|
||||
if m.user_id == body.user_id:
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.ORG_MEMBER_ADD,
|
||||
description=f"Member '{body.user_id}' added to organization '{org_id}' with role '{body.role_in_org}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="organization",
|
||||
resource_id=org_id,
|
||||
details={"user_id": body.user_id, "role": body.role_in_org},
|
||||
)
|
||||
return m
|
||||
raise HTTPException(status_code=500, detail="Membership created but could not be retrieved")
|
||||
|
||||
|
|
@ -202,6 +242,7 @@ async def update_member(
|
|||
org_id: str,
|
||||
user_id: str,
|
||||
body: MembershipUpdate,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -218,6 +259,15 @@ async def update_member(
|
|||
members = await list_org_members(org_id, db)
|
||||
for m in members:
|
||||
if m.user_id == user_id:
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.ORG_MEMBER_UPDATE,
|
||||
description=f"Member '{user_id}' role updated in organization '{org_id}' to '{body.role_in_org}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="organization",
|
||||
resource_id=org_id,
|
||||
details={"user_id": user_id, "role": body.role_in_org},
|
||||
)
|
||||
return m
|
||||
raise HTTPException(status_code=500, detail="Could not retrieve updated membership")
|
||||
|
||||
|
|
@ -226,6 +276,7 @@ async def update_member(
|
|||
async def remove_member(
|
||||
org_id: str,
|
||||
user_id: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
|
|
@ -239,6 +290,15 @@ async def remove_member(
|
|||
|
||||
await remove_membership(user_id, org_id, db)
|
||||
await bump_user_membership_cache(user_id)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.ORG_MEMBER_REMOVE,
|
||||
description=f"Member '{user_id}' removed from organization '{org_id}'",
|
||||
user=current_user,
|
||||
request=request,
|
||||
resource_type="organization",
|
||||
resource_id=org_id,
|
||||
details={"user_id": user_id, "role": existing.role_in_org},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
"""API routes for review notes - timestamped notes on video assets during review."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from bson import ObjectId
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
|
||||
from ...core.authz import MembershipContext, get_job_or_403, get_membership_context
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import get_current_user, require_roles
|
||||
from ...core.dependencies import require_roles
|
||||
from ...core.logging import get_logger
|
||||
from ...models.user import User, UserRole
|
||||
from ...schemas.review_note import (
|
||||
|
|
@ -25,18 +25,13 @@ router = APIRouter(prefix="/jobs/{job_id}/review-notes", tags=["review-notes"])
|
|||
@router.get("", response_model=ReviewNotesListResponse)
|
||||
async def list_review_notes(
|
||||
job_id: str,
|
||||
asset_key: Optional[str] = Query(None, description="Filter notes by asset key"),
|
||||
asset_key: str | None = Query(None, description="Filter notes by asset key"),
|
||||
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.LINGUIST, UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""List all review notes for a job, optionally filtered by asset key."""
|
||||
# Verify job exists
|
||||
job = await db.jobs.find_one({"_id": job_id})
|
||||
if not job:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Job not found"
|
||||
)
|
||||
await get_job_or_403(job_id, ctx, db) # org check + existence check
|
||||
|
||||
# Build query
|
||||
query = {"job_id": job_id}
|
||||
|
|
@ -58,16 +53,11 @@ async def create_review_note(
|
|||
job_id: str,
|
||||
request: ReviewNoteCreateRequest,
|
||||
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.LINGUIST, UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Create a new review note for a video asset."""
|
||||
# Verify job exists
|
||||
job = await db.jobs.find_one({"_id": job_id})
|
||||
if not job:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Job not found"
|
||||
)
|
||||
await get_job_or_403(job_id, ctx, db) # org check + existence check
|
||||
|
||||
# Create note document
|
||||
note_id = str(ObjectId())
|
||||
|
|
@ -96,9 +86,11 @@ async def get_review_note(
|
|||
job_id: str,
|
||||
note_id: str,
|
||||
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.LINGUIST, UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Get a single review note by ID."""
|
||||
await get_job_or_403(job_id, ctx, db) # org check
|
||||
note = await db.review_notes.find_one({"_id": note_id, "job_id": job_id})
|
||||
if not note:
|
||||
raise HTTPException(
|
||||
|
|
@ -115,9 +107,11 @@ async def update_review_note(
|
|||
note_id: str,
|
||||
request: ReviewNoteUpdateRequest,
|
||||
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.LINGUIST, UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Update a review note. Only the note owner can update."""
|
||||
await get_job_or_403(job_id, ctx, db) # org check
|
||||
note = await db.review_notes.find_one({"_id": note_id, "job_id": job_id})
|
||||
if not note:
|
||||
raise HTTPException(
|
||||
|
|
@ -151,9 +145,11 @@ async def delete_review_note(
|
|||
job_id: str,
|
||||
note_id: str,
|
||||
current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.LINGUIST, UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Delete a review note. Only the note owner can delete."""
|
||||
await get_job_or_403(job_id, ctx, db) # org check
|
||||
note = await db.review_notes.find_one({"_id": note_id, "job_id": job_id})
|
||||
if not note:
|
||||
raise HTTPException(
|
||||
|
|
|
|||
354
backend/app/api/v1/routes_share.py
Normal file
354
backend/app/api/v1/routes_share.py
Normal file
|
|
@ -0,0 +1,354 @@
|
|||
"""Share-token endpoints — create/revoke/list tokens + public read-only view + client decision."""
|
||||
|
||||
import secrets
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Literal
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ...core.config import settings
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import require_roles
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.share_token import ShareTokenResponse
|
||||
from ...models.user import User, UserRole
|
||||
from ...services.audit_logger import audit_logger
|
||||
from ...services.gcs import get_signed_download_url
|
||||
|
||||
router = APIRouter(tags=["share"])
|
||||
|
||||
_TOKENS = "share_tokens"
|
||||
_JOBS = "jobs"
|
||||
|
||||
|
||||
def _share_url(token: str) -> str:
|
||||
return f"{settings.app_url}/share/{token}"
|
||||
|
||||
|
||||
# ── Request schemas ───────────────────────────────────────────────────────────
|
||||
|
||||
class CreateShareTokenRequest(BaseModel):
|
||||
expires_in_days: int | None = 30 # None = no expiry
|
||||
label: str | None = None
|
||||
|
||||
|
||||
class ShareTokenListResponse(BaseModel):
|
||||
tokens: list[ShareTokenResponse]
|
||||
|
||||
|
||||
class PublicJobPreviewLanguage(BaseModel):
|
||||
captions_vtt_url: str | None = None
|
||||
audio_description_vtt_url: str | None = None
|
||||
accessible_video_mp4_url: str | None = None
|
||||
audio_description_mp3_url: str | None = None
|
||||
|
||||
|
||||
class PublicJobPreviewResponse(BaseModel):
|
||||
job_id: str
|
||||
job_title: str
|
||||
job_status: str
|
||||
source_language: str
|
||||
languages: list[str]
|
||||
language_outputs: dict[str, PublicJobPreviewLanguage]
|
||||
|
||||
|
||||
class ClientDecisionRequest(BaseModel):
|
||||
action: Literal["approve", "reject"]
|
||||
notes: str | None = None
|
||||
client_name: str | None = None
|
||||
|
||||
|
||||
class ClientDecisionResponse(BaseModel):
|
||||
status: str
|
||||
new_job_status: str
|
||||
|
||||
|
||||
# ── Authenticated routes ──────────────────────────────────────────────────────
|
||||
|
||||
@router.post("/jobs/{job_id}/share", response_model=ShareTokenResponse, status_code=201)
|
||||
async def create_share_token(
|
||||
job_id: str,
|
||||
request: CreateShareTokenRequest,
|
||||
http_request: Request,
|
||||
current_user: User = Depends(require_roles(
|
||||
UserRole.PROJECT_MANAGER, UserRole.PRODUCTION, UserRole.ADMIN,
|
||||
)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Generate a read-only share link for a job."""
|
||||
job_doc = await db[_JOBS].find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
token_id = secrets.token_hex(32)
|
||||
now = datetime.utcnow()
|
||||
expires_at = (now + timedelta(days=request.expires_in_days)) if request.expires_in_days else None
|
||||
|
||||
token_doc = {
|
||||
"_id": token_id,
|
||||
"job_id": job_id,
|
||||
"organization_id": job_doc.get("organization_id", ""),
|
||||
"created_by_user_id": str(current_user.id),
|
||||
"created_by_email": current_user.email,
|
||||
"created_at": now,
|
||||
"expires_at": expires_at,
|
||||
"is_active": True,
|
||||
"label": request.label,
|
||||
}
|
||||
await db[_TOKENS].insert_one(token_doc)
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.SHARE_TOKEN_CREATE,
|
||||
description=f"Share token created for job '{job_id}'",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"token_id": token_id, "label": request.label, "expires_in_days": request.expires_in_days},
|
||||
)
|
||||
|
||||
return ShareTokenResponse(
|
||||
id=token_id,
|
||||
job_id=job_id,
|
||||
created_by_email=current_user.email,
|
||||
created_at=now,
|
||||
expires_at=expires_at,
|
||||
is_active=True,
|
||||
label=request.label,
|
||||
share_url=_share_url(token_id),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/jobs/{job_id}/share", response_model=ShareTokenListResponse)
|
||||
async def list_share_tokens(
|
||||
job_id: str,
|
||||
current_user: User = Depends(require_roles(
|
||||
UserRole.PROJECT_MANAGER, UserRole.PRODUCTION, UserRole.ADMIN,
|
||||
)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""List all active share tokens for a job."""
|
||||
job_doc = await db[_JOBS].find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
cursor = db[_TOKENS].find({"job_id": job_id, "is_active": True})
|
||||
tokens = []
|
||||
async for doc in cursor:
|
||||
tokens.append(ShareTokenResponse(
|
||||
id=doc["_id"],
|
||||
job_id=doc["job_id"],
|
||||
created_by_email=doc["created_by_email"],
|
||||
created_at=doc["created_at"],
|
||||
expires_at=doc.get("expires_at"),
|
||||
is_active=doc["is_active"],
|
||||
label=doc.get("label"),
|
||||
share_url=_share_url(doc["_id"]),
|
||||
))
|
||||
return ShareTokenListResponse(tokens=tokens)
|
||||
|
||||
|
||||
@router.delete("/jobs/{job_id}/share/{token_id}", status_code=204)
|
||||
async def revoke_share_token(
|
||||
job_id: str,
|
||||
token_id: str,
|
||||
http_request: Request,
|
||||
current_user: User = Depends(require_roles(
|
||||
UserRole.PROJECT_MANAGER, UserRole.PRODUCTION, UserRole.ADMIN,
|
||||
)),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Revoke (deactivate) a share token."""
|
||||
result = await db[_TOKENS].update_one(
|
||||
{"_id": token_id, "job_id": job_id},
|
||||
{"$set": {"is_active": False}},
|
||||
)
|
||||
if result.matched_count == 0:
|
||||
raise HTTPException(status_code=404, detail="Token not found")
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.SHARE_TOKEN_REVOKE,
|
||||
description=f"Share token '{token_id}' revoked for job '{job_id}'",
|
||||
user=current_user,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={"token_id": token_id},
|
||||
)
|
||||
|
||||
|
||||
# ── Public route (no auth) ────────────────────────────────────────────────────
|
||||
|
||||
@router.get("/public/share/{token}", response_model=PublicJobPreviewResponse)
|
||||
async def get_public_job_preview(
|
||||
token: str,
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Return read-only job preview for a valid share token. No authentication required."""
|
||||
token_doc = await db[_TOKENS].find_one({"_id": token, "is_active": True})
|
||||
if not token_doc:
|
||||
raise HTTPException(status_code=404, detail="Share link not found or has been revoked")
|
||||
|
||||
if token_doc.get("expires_at") and token_doc["expires_at"] < datetime.utcnow():
|
||||
raise HTTPException(status_code=410, detail="Share link has expired")
|
||||
|
||||
job_doc = await db[_JOBS].find_one({"_id": token_doc["job_id"]})
|
||||
if not job_doc:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
outputs = job_doc.get("outputs") or {}
|
||||
language_outputs: dict[str, PublicJobPreviewLanguage] = {}
|
||||
|
||||
for lang, lang_output in outputs.items():
|
||||
if not isinstance(lang_output, dict):
|
||||
continue
|
||||
|
||||
lang_data = PublicJobPreviewLanguage()
|
||||
|
||||
if "captions_vtt_gcs" in lang_output:
|
||||
blob_path = lang_output["captions_vtt_gcs"].replace(f"gs://{settings.gcs_bucket}/", "")
|
||||
try:
|
||||
lang_data.captions_vtt_url = await get_signed_download_url(blob_path, 6)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if "ad_vtt_gcs" in lang_output:
|
||||
blob_path = lang_output["ad_vtt_gcs"].replace(f"gs://{settings.gcs_bucket}/", "")
|
||||
try:
|
||||
lang_data.audio_description_vtt_url = await get_signed_download_url(blob_path, 6)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if "ad_mp3_gcs" in lang_output:
|
||||
blob_path = lang_output["ad_mp3_gcs"].replace(f"gs://{settings.gcs_bucket}/", "")
|
||||
try:
|
||||
lang_data.audio_description_mp3_url = await get_signed_download_url(blob_path, 6)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if "accessible_video_gcs" in lang_output:
|
||||
blob_path = lang_output["accessible_video_gcs"].replace(f"gs://{settings.gcs_bucket}/", "")
|
||||
try:
|
||||
lang_data.accessible_video_mp4_url = await get_signed_download_url(blob_path, 6)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
language_outputs[lang] = lang_data
|
||||
|
||||
return PublicJobPreviewResponse(
|
||||
job_id=str(job_doc["_id"]),
|
||||
job_title=job_doc.get("title", "Untitled"),
|
||||
job_status=job_doc.get("status", ""),
|
||||
source_language=job_doc.get("source", {}).get("language", "en"),
|
||||
languages=list(outputs.keys()),
|
||||
language_outputs=language_outputs,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/public/share/{token}/decision", response_model=ClientDecisionResponse)
|
||||
async def client_decision(
|
||||
token: str,
|
||||
request: ClientDecisionRequest,
|
||||
http_request: Request,
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Submit client approval or rejection via a share link. No authentication required."""
|
||||
from ...services.validation import asset_validation_service
|
||||
|
||||
token_doc = await db[_TOKENS].find_one({"_id": token, "is_active": True})
|
||||
if not token_doc:
|
||||
raise HTTPException(status_code=404, detail="Share link not found or has been revoked")
|
||||
if token_doc.get("expires_at") and token_doc["expires_at"] < datetime.utcnow():
|
||||
raise HTTPException(status_code=410, detail="Share link has expired")
|
||||
|
||||
job_id = token_doc["job_id"]
|
||||
job_doc = await db[_JOBS].find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
if job_doc.get("status") != "pending_final_review":
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="This job is not currently awaiting client review"
|
||||
)
|
||||
|
||||
now = datetime.utcnow()
|
||||
by_label = f"client:{request.client_name or 'anonymous'} (share/{token[:8]})"
|
||||
|
||||
if request.action == "approve":
|
||||
is_valid, validation_errors = await asset_validation_service.validate_job_assets(job_doc)
|
||||
if not is_valid:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Asset validation failed: {'; '.join(validation_errors)}"
|
||||
)
|
||||
new_status = "completed"
|
||||
update = {
|
||||
"$set": {
|
||||
"status": new_status,
|
||||
"review.notes": request.notes or "",
|
||||
"updated_at": now,
|
||||
},
|
||||
"$push": {
|
||||
"review.history": {
|
||||
"at": now,
|
||||
"status": new_status,
|
||||
"by": by_label,
|
||||
"notes": request.notes or "",
|
||||
}
|
||||
},
|
||||
}
|
||||
else:
|
||||
new_status = "qc_feedback"
|
||||
update = {
|
||||
"$set": {
|
||||
"status": new_status,
|
||||
"review.notes": request.notes or "",
|
||||
"review.reviewer_id": by_label,
|
||||
"updated_at": now,
|
||||
},
|
||||
"$push": {
|
||||
"review.history": {
|
||||
"at": now,
|
||||
"status": new_status,
|
||||
"by": by_label,
|
||||
"notes": request.notes or "",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
result = await db[_JOBS].find_one_and_update(
|
||||
{"_id": job_id, "status": "pending_final_review"},
|
||||
update,
|
||||
return_document=True,
|
||||
)
|
||||
if not result:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="Decision could not be submitted — the job status may have changed"
|
||||
)
|
||||
|
||||
await audit_logger.log_action(
|
||||
action=AuditAction.SHARE_CLIENT_DECISION,
|
||||
description=f"Client '{request.client_name or 'anonymous'}' submitted decision '{request.action}' for job '{job_id}' via share token",
|
||||
user=None,
|
||||
request=http_request,
|
||||
resource_type="job",
|
||||
resource_id=job_id,
|
||||
details={
|
||||
"action": request.action,
|
||||
"token": token,
|
||||
"client_name": request.client_name,
|
||||
"new_status": new_status,
|
||||
"notes": request.notes,
|
||||
},
|
||||
)
|
||||
|
||||
if request.action == "approve":
|
||||
try:
|
||||
from ...tasks.notify import notify_client_task
|
||||
notify_client_task.delay(job_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return ClientDecisionResponse(status="ok", new_job_status=new_status)
|
||||
|
|
@ -1,18 +1,18 @@
|
|||
import asyncio
|
||||
import time
|
||||
from typing import Literal, Optional
|
||||
from typing import Literal
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from fastapi.responses import Response
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ...core.config import settings
|
||||
from ...core.logging import get_logger
|
||||
from ...services.gemini_tts import gemini_tts_service
|
||||
from ...services.elevenlabs_voices import elevenlabs_voice_service
|
||||
from ...services.tts import tts_service
|
||||
from ...services import cost_tracker
|
||||
from ...core.dependencies import get_current_user
|
||||
from ...core.logging import get_logger
|
||||
from ...services import cost_tracker
|
||||
from ...services.elevenlabs_voices import elevenlabs_voice_service
|
||||
from ...services.gemini_tts import gemini_tts_service
|
||||
from ...services.tts import tts_service
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
|
@ -30,20 +30,20 @@ class VoicePreviewRequest(BaseModel):
|
|||
style_preset: Literal[
|
||||
"neutral", "calm", "energetic", "professional", "warm", "documentary", "custom"
|
||||
] = "neutral"
|
||||
custom_style_prompt: Optional[str] = None
|
||||
custom_style_prompt: str | None = None
|
||||
# ElevenLabs-specific
|
||||
stability: Optional[float] = Field(default=None, ge=0.0, le=1.0)
|
||||
similarity_boost: Optional[float] = Field(default=None, ge=0.0, le=1.0)
|
||||
stability: float | None = Field(default=None, ge=0.0, le=1.0)
|
||||
similarity_boost: float | None = Field(default=None, ge=0.0, le=1.0)
|
||||
|
||||
|
||||
class VoiceInfo(BaseModel):
|
||||
"""Structured voice information for any provider."""
|
||||
id: str
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
preview_url: Optional[str] = None
|
||||
labels: Optional[dict[str, str]] = None
|
||||
category: Optional[str] = None
|
||||
description: str | None = None
|
||||
preview_url: str | None = None
|
||||
labels: dict[str, str] | None = None
|
||||
category: str | None = None
|
||||
|
||||
|
||||
class ProviderVoicesResponse(BaseModel):
|
||||
|
|
@ -52,7 +52,7 @@ class ProviderVoicesResponse(BaseModel):
|
|||
voices: list[VoiceInfo]
|
||||
default: str
|
||||
available: bool = True
|
||||
error: Optional[str] = None
|
||||
error: str | None = None
|
||||
|
||||
|
||||
class LanguagesResponse(BaseModel):
|
||||
|
|
@ -87,12 +87,12 @@ class ProviderOptionsResponse(BaseModel):
|
|||
"""Available TTS configuration options for a provider."""
|
||||
provider: str
|
||||
# Gemini-specific
|
||||
models: Optional[list[TTSOptionItem]] = None
|
||||
style_presets: Optional[list[TTSOptionItem]] = None
|
||||
speed_range: Optional[SpeedRange] = None
|
||||
models: list[TTSOptionItem] | None = None
|
||||
style_presets: list[TTSOptionItem] | None = None
|
||||
speed_range: SpeedRange | None = None
|
||||
# ElevenLabs-specific
|
||||
stability_range: Optional[FloatRange] = None
|
||||
similarity_boost_range: Optional[FloatRange] = None
|
||||
stability_range: FloatRange | None = None
|
||||
similarity_boost_range: FloatRange | None = None
|
||||
|
||||
|
||||
@router.get("/voices", response_model=ProviderVoicesResponse)
|
||||
|
|
|
|||
|
|
@ -3,15 +3,21 @@
|
|||
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
|
||||
from ...core.authz import MembershipContext, get_job_or_403, get_membership_context
|
||||
from ...core.config import settings
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import require_roles
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...models.user import User, UserRole
|
||||
from ...models.vtt_version import VttDiffResponse, VttKind, VttVersionListResponse, VttVersionSummary
|
||||
from ...models.vtt_version import (
|
||||
VttDiffResponse,
|
||||
VttKind,
|
||||
VttVersionListResponse,
|
||||
VttVersionSummary,
|
||||
)
|
||||
from ...services import vtt_versioning
|
||||
from ...services.audit_logger import audit_logger
|
||||
from ...models.audit_log import AuditAction
|
||||
from ...services.gcs import gcs_service
|
||||
from ...core.config import settings
|
||||
|
||||
router = APIRouter(prefix="/jobs", tags=["vtt-versions"])
|
||||
|
||||
|
|
@ -26,9 +32,11 @@ async def list_vtt_versions(
|
|||
skip: int = Query(0, ge=0),
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
current_user: User = Depends(require_roles(*_EDITABLE_ROLES)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""List all VTT versions for a job/lang/kind, newest first."""
|
||||
await get_job_or_403(job_id, ctx, db) # org check
|
||||
return await vtt_versioning.list_versions(db, job_id, lang, kind, skip, limit)
|
||||
|
||||
|
||||
|
|
@ -39,9 +47,11 @@ async def get_vtt_version(
|
|||
lang: str = Query(...),
|
||||
kind: VttKind = Query(...),
|
||||
current_user: User = Depends(require_roles(*_EDITABLE_ROLES)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Get full VTT content for a specific version."""
|
||||
await get_job_or_403(job_id, ctx, db) # org check
|
||||
v = await vtt_versioning.get_version(db, job_id, lang, kind, version)
|
||||
if not v:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Version not found")
|
||||
|
|
@ -69,9 +79,11 @@ async def diff_vtt_versions(
|
|||
from_version: int = Query(..., alias="from"),
|
||||
to_version: int = Query(..., alias="to"),
|
||||
current_user: User = Depends(require_roles(*_EDITABLE_ROLES)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""Line-level diff between two versions of a VTT file."""
|
||||
await get_job_or_403(job_id, ctx, db) # org check
|
||||
v_from = await vtt_versioning.get_version(db, job_id, lang, kind, from_version)
|
||||
v_to = await vtt_versioning.get_version(db, job_id, lang, kind, to_version)
|
||||
if not v_from:
|
||||
|
|
@ -93,6 +105,7 @@ async def restore_vtt_version(
|
|||
kind: VttKind = Query(...),
|
||||
http_request: Request = None,
|
||||
current_user: User = Depends(require_roles(UserRole.PRODUCTION, UserRole.ADMIN)),
|
||||
ctx: MembershipContext = Depends(get_membership_context),
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
):
|
||||
"""
|
||||
|
|
@ -100,6 +113,7 @@ async def restore_vtt_version(
|
|||
Non-destructive: creates a new version entry whose content mirrors the old one,
|
||||
then overwrites the live GCS file.
|
||||
"""
|
||||
await get_job_or_403(job_id, ctx, db) # org check
|
||||
src = await vtt_versioning.get_version(db, job_id, lang, kind, version)
|
||||
if not src:
|
||||
raise HTTPException(status_code=404, detail="Version not found")
|
||||
|
|
@ -115,7 +129,7 @@ async def restore_vtt_version(
|
|||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Version snapshot created (v{new_ver.version}) but live file update failed: {exc}",
|
||||
)
|
||||
) from None
|
||||
|
||||
# Update the GCS URI pointer in the job document
|
||||
gcs_uri_key = "captions_vtt_gcs" if kind == "captions" else "ad_vtt_gcs"
|
||||
|
|
|
|||
|
|
@ -5,107 +5,146 @@ Provides WebSocket endpoints for:
|
|||
1. Individual job status updates: /ws/jobs/{job_id}
|
||||
2. Job list updates: /ws/jobs (all jobs for authenticated user)
|
||||
"""
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, WebSocket, WebSocketDisconnect, HTTPException, Depends, Query
|
||||
from fastapi import (
|
||||
APIRouter,
|
||||
Depends,
|
||||
Query,
|
||||
WebSocket,
|
||||
WebSocketDisconnect,
|
||||
)
|
||||
from fastapi.security import HTTPBearer
|
||||
|
||||
from ...services.websocket import (
|
||||
connection_manager,
|
||||
authenticate_websocket,
|
||||
get_connection_manager,
|
||||
ConnectionManager
|
||||
)
|
||||
from ...models.job import Job
|
||||
from ...core.authz import PLATFORM_ADMIN_ROLES, _cached_memberships
|
||||
from ...core.database import get_database
|
||||
from ...core.dependencies import get_current_user
|
||||
from ...models.user import UserRole
|
||||
from ...services.websocket import (
|
||||
ConnectionManager,
|
||||
authenticate_websocket,
|
||||
connection_manager,
|
||||
get_connection_manager,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["WebSocket"])
|
||||
security = HTTPBearer()
|
||||
|
||||
# Close codes that indicate a permanent auth/permission failure — frontend must NOT retry
|
||||
_TERMINAL_CLOSE_CODES = {4001, 4003, 4004, 4403}
|
||||
|
||||
# Seconds between server-side keepalive frames.
|
||||
# Must be < Apache mod_proxy_wstunnel idle timeout.
|
||||
# Mod Comms incident 2026-03-18: 25s was insufficient; 20s is safe.
|
||||
_KEEPALIVE_INTERVAL_S = 20
|
||||
|
||||
|
||||
async def _resolve_user_and_org(websocket: WebSocket, user_id: str, db):
|
||||
"""
|
||||
Fetch user document and resolve org memberships from cache.
|
||||
Returns (user_doc, memberships_dict) or closes the socket and returns (None, None).
|
||||
"""
|
||||
user = await db["users"].find_one({"_id": user_id})
|
||||
if not user:
|
||||
try:
|
||||
from bson import ObjectId
|
||||
user = await db["users"].find_one({"_id": ObjectId(user_id)})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not user:
|
||||
await websocket.close(code=4001, reason="User not found")
|
||||
return None, None
|
||||
|
||||
is_platform_admin = UserRole(user.get("role", "")) in PLATFORM_ADMIN_ROLES
|
||||
if is_platform_admin:
|
||||
return user, None # None memberships = unrestricted
|
||||
|
||||
memberships = await _cached_memberships(user_id, db)
|
||||
return user, memberships
|
||||
|
||||
|
||||
def _can_access_org(org_id: str | None, memberships: dict | None) -> bool:
|
||||
"""Return True if user (with these memberships) may access the given org_id."""
|
||||
if memberships is None:
|
||||
return True # platform admin
|
||||
if not org_id:
|
||||
return True # legacy job without org: allow (further checks done below if needed)
|
||||
return org_id in memberships
|
||||
|
||||
|
||||
@router.websocket("/ws/jobs/{job_id}")
|
||||
async def websocket_job_status(
|
||||
websocket: WebSocket,
|
||||
job_id: str,
|
||||
token: Optional[str] = Query(None),
|
||||
token: str | None = Query(None),
|
||||
manager: ConnectionManager = Depends(get_connection_manager)
|
||||
):
|
||||
"""
|
||||
WebSocket endpoint for real-time job status updates
|
||||
|
||||
WebSocket endpoint for real-time job status updates.
|
||||
|
||||
Usage:
|
||||
- Connect: ws://localhost:8000/api/v1/ws/jobs/{job_id}?token={jwt_token}
|
||||
- Receives: Real-time status updates for the specific job
|
||||
|
||||
Message format:
|
||||
{
|
||||
"type": "job_status_update",
|
||||
"data": {
|
||||
"job_id": "...",
|
||||
"status": "processing",
|
||||
"updated_at": "2023-...",
|
||||
"message": "Processing video...",
|
||||
"progress": 45
|
||||
}
|
||||
}
|
||||
|
||||
Close codes:
|
||||
4001 — user not found
|
||||
4003 — role-based access denied
|
||||
4004 — job not found
|
||||
4403 — org membership access denied (do not retry)
|
||||
"""
|
||||
# Authenticate the WebSocket connection
|
||||
user_id = await authenticate_websocket(websocket, token)
|
||||
if not user_id:
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
# Verify user has access to this job
|
||||
db = await get_database()
|
||||
jobs_collection = db["jobs"]
|
||||
|
||||
job = await jobs_collection.find_one({"_id": job_id})
|
||||
|
||||
job = await db["jobs"].find_one({"_id": job_id})
|
||||
if not job:
|
||||
await websocket.close(code=4004, reason="Job not found")
|
||||
return
|
||||
|
||||
# Check permissions - users can only access their own jobs unless they're admin/reviewer
|
||||
user = await db["users"].find_one({"_id": user_id})
|
||||
if not user:
|
||||
try:
|
||||
from bson import ObjectId
|
||||
user = await db["users"].find_one({"_id": ObjectId(user_id)})
|
||||
except Exception:
|
||||
pass # Invalid ObjectId format
|
||||
|
||||
if not user:
|
||||
await websocket.close(code=4001, reason="User not found")
|
||||
return
|
||||
|
||||
# Check access permissions
|
||||
|
||||
user, memberships = await _resolve_user_and_org(websocket, user_id, db)
|
||||
if user is None:
|
||||
return # socket already closed inside helper
|
||||
|
||||
# Role-based client restriction
|
||||
if user["role"] == "client" and job.get("created_by") != user_id:
|
||||
await websocket.close(code=4003, reason="Access denied")
|
||||
return
|
||||
|
||||
# Connect to job status updates
|
||||
|
||||
# Org membership check
|
||||
job_org = job.get("organization_id")
|
||||
if not _can_access_org(job_org, memberships):
|
||||
await websocket.close(code=4403, reason="Org access denied")
|
||||
return
|
||||
|
||||
await manager.connect_job_status(websocket, user_id, job_id)
|
||||
|
||||
# Keep connection alive and handle incoming messages
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Wait for incoming WebSocket messages (for heartbeat, etc.)
|
||||
message = await websocket.receive_text()
|
||||
# Wait up to _KEEPALIVE_INTERVAL_S for a client message.
|
||||
# On timeout send a keepalive frame so the proxy idle timer resets.
|
||||
message = await asyncio.wait_for(
|
||||
websocket.receive_text(),
|
||||
timeout=_KEEPALIVE_INTERVAL_S,
|
||||
)
|
||||
logger.debug(f"Received WebSocket message from user {user_id}: {message}")
|
||||
|
||||
# Handle heartbeat or other client messages if needed
|
||||
if message == "ping":
|
||||
await websocket.send_text("pong")
|
||||
|
||||
|
||||
except TimeoutError:
|
||||
await websocket.send_text("keepalive")
|
||||
|
||||
except WebSocketDisconnect:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error in WebSocket message handling: {e}")
|
||||
break
|
||||
|
||||
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
except Exception as e:
|
||||
|
|
@ -117,75 +156,54 @@ async def websocket_job_status(
|
|||
@router.websocket("/ws/jobs")
|
||||
async def websocket_job_list(
|
||||
websocket: WebSocket,
|
||||
token: Optional[str] = Query(None),
|
||||
token: str | None = Query(None),
|
||||
manager: ConnectionManager = Depends(get_connection_manager)
|
||||
):
|
||||
"""
|
||||
WebSocket endpoint for real-time job list updates
|
||||
|
||||
WebSocket endpoint for real-time job list updates.
|
||||
|
||||
Usage:
|
||||
- Connect: ws://localhost:8000/api/v1/ws/jobs?token={jwt_token}
|
||||
- Receives: Real-time status updates for all jobs the user can access
|
||||
|
||||
Message format:
|
||||
{
|
||||
"type": "job_list_update",
|
||||
"data": {
|
||||
"job_id": "...",
|
||||
"status": "processing",
|
||||
"updated_at": "2023-...",
|
||||
"message": "Processing video...",
|
||||
"progress": 45
|
||||
}
|
||||
}
|
||||
|
||||
Only events for jobs in the user's accessible orgs are delivered.
|
||||
"""
|
||||
# Authenticate the WebSocket connection
|
||||
user_id = await authenticate_websocket(websocket, token)
|
||||
if not user_id:
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
# Verify user exists
|
||||
logger.info(f"WebSocket: Looking up user {user_id} in database")
|
||||
db = await get_database()
|
||||
|
||||
# Try looking up user by string ID first, then by ObjectId
|
||||
user = await db["users"].find_one({"_id": user_id})
|
||||
if not user:
|
||||
try:
|
||||
from bson import ObjectId
|
||||
user = await db["users"].find_one({"_id": ObjectId(user_id)})
|
||||
except Exception:
|
||||
pass # Invalid ObjectId format
|
||||
|
||||
if not user:
|
||||
logger.warning(f"WebSocket: User {user_id} not found in database (tried both string and ObjectId)")
|
||||
await websocket.close(code=4001, reason="User not found")
|
||||
return
|
||||
|
||||
|
||||
user, memberships = await _resolve_user_and_org(websocket, user_id, db)
|
||||
if user is None:
|
||||
return # socket already closed inside helper
|
||||
|
||||
logger.info(f"WebSocket: User {user_id} found, role: {user.get('role', 'unknown')}")
|
||||
|
||||
logger.info(f"WebSocket: User {user_id} found, connecting to job list updates")
|
||||
# Connect to job list updates
|
||||
await manager.connect_job_list(websocket, user_id)
|
||||
|
||||
# Keep connection alive and handle incoming messages
|
||||
|
||||
accessible_org_ids = None if memberships is None else list(memberships.keys())
|
||||
await manager.connect_job_list(websocket, user_id, accessible_org_ids=accessible_org_ids)
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Wait for incoming WebSocket messages
|
||||
message = await websocket.receive_text()
|
||||
message = await asyncio.wait_for(
|
||||
websocket.receive_text(),
|
||||
timeout=_KEEPALIVE_INTERVAL_S,
|
||||
)
|
||||
logger.debug(f"Received WebSocket message from user {user_id}: {message}")
|
||||
|
||||
# Handle heartbeat or other client messages if needed
|
||||
if message == "ping":
|
||||
await websocket.send_text("pong")
|
||||
|
||||
|
||||
except TimeoutError:
|
||||
await websocket.send_text("keepalive")
|
||||
|
||||
except WebSocketDisconnect:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error in WebSocket message handling: {e}")
|
||||
break
|
||||
|
||||
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
except Exception as e:
|
||||
|
|
@ -196,19 +214,15 @@ async def websocket_job_list(
|
|||
|
||||
@router.get("/ws/status")
|
||||
async def websocket_status():
|
||||
"""
|
||||
Get WebSocket connection status and statistics
|
||||
Useful for debugging and monitoring
|
||||
"""
|
||||
"""Get WebSocket connection status and statistics (debug/monitoring)."""
|
||||
stats = {
|
||||
"active_connections": len(connection_manager.active_connections),
|
||||
"job_subscriptions": len(connection_manager.job_subscriptions),
|
||||
"global_subscriptions": len(connection_manager.global_subscriptions),
|
||||
"redis_connected": connection_manager.redis_client is not None,
|
||||
"subscriber_running": (
|
||||
connection_manager.subscriber_task is not None and
|
||||
connection_manager.subscriber_task is not None and
|
||||
not connection_manager.subscriber_task.done()
|
||||
)
|
||||
}
|
||||
|
||||
return stats
|
||||
return stats
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ Provides:
|
|||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import Depends, HTTPException, status
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
|
|
@ -66,7 +65,7 @@ async def _cached_memberships(
|
|||
"""Load memberships, with Redis cache (60s TTL)."""
|
||||
cache_key = f"mem:user:{user_id}"
|
||||
try:
|
||||
redis = get_redis()
|
||||
redis = await get_redis()
|
||||
if redis:
|
||||
cached = await redis.get(cache_key)
|
||||
if cached:
|
||||
|
|
@ -78,7 +77,7 @@ async def _cached_memberships(
|
|||
memberships = await _load_memberships(user_id, db)
|
||||
|
||||
try:
|
||||
redis = get_redis()
|
||||
redis = await get_redis()
|
||||
if redis:
|
||||
await redis.setex(
|
||||
cache_key,
|
||||
|
|
@ -159,7 +158,7 @@ class OrgScopedQuery:
|
|||
def filter(
|
||||
self,
|
||||
base_query: dict,
|
||||
org_id: Optional[str] = None,
|
||||
org_id: str | None = None,
|
||||
org_field: str = "organization_id",
|
||||
) -> dict:
|
||||
if self.ctx.is_platform_admin:
|
||||
|
|
@ -183,6 +182,50 @@ class OrgScopedQuery:
|
|||
return {**base_query, org_field: {"$in": accessible}}
|
||||
|
||||
|
||||
def assert_user_in_org(
|
||||
ctx: "MembershipContext",
|
||||
org_id: str,
|
||||
min_role: OrgRole = OrgRole.VIEWER,
|
||||
) -> None:
|
||||
"""Raise 403 if ctx user does not have min_role in org_id. Platform admins always pass."""
|
||||
if not ctx.can_access_org(org_id, min_role):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Access to this organization is not permitted",
|
||||
)
|
||||
|
||||
|
||||
async def get_job_or_403(
|
||||
job_id: str,
|
||||
ctx: "MembershipContext",
|
||||
db: AsyncIOMotorDatabase,
|
||||
) -> dict:
|
||||
"""Load job document and verify ctx user can access its organization. Returns 404 for missing jobs."""
|
||||
job_doc = await db.jobs.find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Job not found")
|
||||
|
||||
org_id = job_doc.get("organization_id")
|
||||
if not org_id:
|
||||
# Legacy job without org: try resolving via project
|
||||
project_id = job_doc.get("project_id")
|
||||
if project_id:
|
||||
project = await db.projects.find_one({"_id": project_id}, {"client_id": 1})
|
||||
if project:
|
||||
org_id = project.get("client_id")
|
||||
|
||||
if org_id:
|
||||
if not ctx.can_access_org(org_id):
|
||||
# Return 404 to avoid leaking existence of cross-org jobs
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Job not found")
|
||||
else:
|
||||
# Truly legacy job (no project, no org): only the original uploader or admin can access
|
||||
if not ctx.is_platform_admin and job_doc.get("client_id") != str(ctx.user.id):
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Job not found")
|
||||
|
||||
return job_doc
|
||||
|
||||
|
||||
async def bump_user_membership_cache(user_id: str) -> None:
|
||||
"""Invalidate the Redis membership cache for a user (call on any membership write)."""
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ class Settings(BaseSettings):
|
|||
# App
|
||||
app_env: str = "dev"
|
||||
api_base_url: str = "http://localhost:8000"
|
||||
app_url: str = "https://optical-dev.oliver.solutions/video-accessibility"
|
||||
|
||||
# Auth
|
||||
jwt_secret: str
|
||||
|
|
@ -22,13 +23,14 @@ class Settings(BaseSettings):
|
|||
|
||||
# Redis
|
||||
redis_url: str
|
||||
|
||||
|
||||
# Celery
|
||||
celery_broker_url: str = ""
|
||||
celery_result_backend: str = ""
|
||||
|
||||
# GCP
|
||||
gcp_project_id: str
|
||||
gcp_location: str = "us-central1"
|
||||
gcs_bucket: str = "accessible-video"
|
||||
google_application_credentials: str = ""
|
||||
|
||||
|
|
@ -36,7 +38,7 @@ class Settings(BaseSettings):
|
|||
gemini_api_key: str
|
||||
elevenlabs_api_key: str = ""
|
||||
google_tts_credentials: str = ""
|
||||
|
||||
|
||||
# TTS Voice Configuration
|
||||
tts_provider: str = "gemini" # "gemini", "google", or "elevenlabs"
|
||||
google_tts_voices: dict[str, str] = {
|
||||
|
|
@ -50,7 +52,7 @@ class Settings(BaseSettings):
|
|||
elevenlabs_voices: dict[str, str] = {}
|
||||
|
||||
# Gemini TTS Configuration
|
||||
gemini_tts_model: str = "gemini-2.5-flash-preview-tts"
|
||||
gemini_tts_model: str = "gemini-3.1-flash-tts-preview"
|
||||
gemini_tts_default_voice: str = "Kore"
|
||||
gemini_tts_voices: list[str] = [
|
||||
"Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda", "Orus", "Aoede",
|
||||
|
|
@ -221,8 +223,8 @@ class Settings(BaseSettings):
|
|||
|
||||
# Gemini TTS Model Options
|
||||
gemini_tts_models: dict[str, str] = {
|
||||
"flash": "gemini-2.5-flash-preview-tts", # Fast, cost-efficient
|
||||
"pro": "gemini-2.5-pro-preview-tts", # Higher quality
|
||||
"flash": "gemini-3.1-flash-tts-preview", # Fast, cost-efficient (Preview)
|
||||
"pro": "gemini-2.5-pro-tts", # Higher quality (GA)
|
||||
}
|
||||
|
||||
# Gemini TTS Style Presets - prompts prepended to text for style control
|
||||
|
|
@ -247,6 +249,14 @@ class Settings(BaseSettings):
|
|||
whisper_sentence_gap_threshold: float = 0.5 # Gap duration to classify as sentence boundary
|
||||
whisper_phrase_gap_threshold: float = 0.3 # Gap duration to classify as phrase boundary
|
||||
whisper_min_gap_threshold: float = 0.15 # Minimum gap duration to consider
|
||||
# Forward-preferred snap windows (A2)
|
||||
whisper_snap_forward_window: float = 4.0 # Prefer boundary up to N seconds ahead of Gemini point
|
||||
whisper_snap_backward_window: float = 1.5 # Fall back to boundary up to N seconds behind
|
||||
# Adaptive silence buffer (A1)
|
||||
ad_silence_buffer_default: float = 0.5 # Base silence duration (s) before/after AD audio
|
||||
ad_silence_buffer_min_after: float = 0.1 # Minimum silence after AD audio
|
||||
# Minimum gap required at the chosen pause point (A3)
|
||||
ad_min_acceptable_gap: float = 0.2 # Seconds; points with shorter gaps trigger forward search
|
||||
|
||||
# Cloud Run Service URLs (empty = use local processing)
|
||||
# When set, CPU-intensive work is offloaded to Cloud Run with autoscaling
|
||||
|
|
@ -265,11 +275,10 @@ class Settings(BaseSettings):
|
|||
ffmpeg_worker_concurrency: int = 4 # FFmpeg tasks on main worker
|
||||
tts_worker_concurrency: int = 8 # TTS worker
|
||||
|
||||
# Email (Mailgun — primary; sendgrid_api_key kept for backward compat)
|
||||
# Email (Mailgun)
|
||||
mailgun_api_key: str = ""
|
||||
mailgun_domain: str = "mg.oliver.solutions"
|
||||
mailgun_from: str = "noreply@mg.oliver.solutions"
|
||||
sendgrid_api_key: str = ""
|
||||
email_from: str = "noreply@mg.oliver.solutions"
|
||||
client_base_url: str
|
||||
|
||||
|
|
@ -288,6 +297,10 @@ class Settings(BaseSettings):
|
|||
cost_tracker_source_app: str = "video-accessibility"
|
||||
cost_tracker_enabled: bool = True
|
||||
|
||||
# Upload limits (T-14 — single source of truth)
|
||||
upload_max_video_bytes: int = 2 * 1024 * 1024 * 1024 # 2GB
|
||||
upload_signed_url_ttl_hours: int = 24 # signed URL lifetime
|
||||
|
||||
# CORS - comma-separated list of allowed origins
|
||||
cors_origins: str = "http://localhost:5173,http://localhost:5174,http://localhost:3000,http://localhost:6001"
|
||||
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ async def create_indexes():
|
|||
await db.audit_logs.create_index([("resource_type", 1), ("resource_id", 1)]) # Resource tracking
|
||||
await db.audit_logs.create_index([("ip_address", 1), ("timestamp", -1)]) # IP-based analysis
|
||||
await db.audit_logs.create_index([("success", 1), ("timestamp", -1)]) # Failed operations
|
||||
|
||||
|
||||
# Text search index for description and details
|
||||
await db.audit_logs.create_index([
|
||||
("description", "text"),
|
||||
|
|
|
|||
|
|
@ -1,11 +1,9 @@
|
|||
from typing import Optional
|
||||
|
||||
from fastapi import Depends, HTTPException, Request, status
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
|
||||
from ..models.user import User, UserRole
|
||||
from .config import settings
|
||||
from .database import get_database
|
||||
from .security import decode_token
|
||||
|
||||
|
|
@ -43,7 +41,12 @@ async def get_current_user(
|
|||
detail="User not found",
|
||||
)
|
||||
|
||||
return User(**user_doc)
|
||||
user = User(**user_doc)
|
||||
# Attach org_ids hint from token as transient attribute (never used for authz)
|
||||
token_org_ids = payload.get("org_ids", [])
|
||||
if token_org_ids:
|
||||
user.__dict__["org_ids"] = token_org_ids
|
||||
return user
|
||||
|
||||
|
||||
def require_role(required_role: UserRole):
|
||||
|
|
@ -73,7 +76,7 @@ def require_roles(*required_roles: UserRole):
|
|||
async def get_current_user_optional(
|
||||
request: Request,
|
||||
db: AsyncIOMotorDatabase = Depends(get_database),
|
||||
) -> Optional[User]:
|
||||
) -> User | None:
|
||||
authorization: str = request.headers.get("Authorization")
|
||||
if not authorization:
|
||||
return None
|
||||
|
|
@ -104,7 +107,7 @@ async def get_current_user_optional(
|
|||
async def get_accessible_project_ids(
|
||||
user: User,
|
||||
db: AsyncIOMotorDatabase,
|
||||
) -> Optional[list[str]]:
|
||||
) -> list[str] | None:
|
||||
"""
|
||||
Returns project IDs the user may access, or None meaning "see everything".
|
||||
|
||||
|
|
@ -120,9 +123,12 @@ async def get_accessible_project_ids(
|
|||
|
||||
user_id = str(user.id)
|
||||
|
||||
# Primary path: use memberships collection (Phase 3 SaaS)
|
||||
membership_cursor = db.memberships.find({"user_id": user_id}, {"organization_id": 1})
|
||||
org_ids = [doc["organization_id"] async for doc in membership_cursor]
|
||||
# Primary path: use Redis-cached memberships (60s TTL, same cache as authz.py)
|
||||
from .authz import (
|
||||
_cached_memberships, # local import to avoid circular dep at module level
|
||||
)
|
||||
memberships_map = await _cached_memberships(user_id, db)
|
||||
org_ids = list(memberships_map.keys())
|
||||
|
||||
if org_ids:
|
||||
projects = await db.projects.find(
|
||||
|
|
@ -164,6 +170,67 @@ async def get_accessible_project_ids(
|
|||
return []
|
||||
|
||||
|
||||
async def get_user_org_ids(user: User, db: AsyncIOMotorDatabase) -> list[str] | None:
|
||||
"""Return org IDs the user belongs to, or None meaning unrestricted (ADMIN).
|
||||
|
||||
Priority: memberships → pm_client_ids (PM legacy) → team.member_user_ids (staff legacy)
|
||||
"""
|
||||
if user.role == UserRole.ADMIN:
|
||||
return None
|
||||
|
||||
user_id = str(user.id)
|
||||
|
||||
# Primary: Membership collection
|
||||
org_ids: list[str] = []
|
||||
async for m in db.memberships.find({"user_id": user_id}, {"organization_id": 1}):
|
||||
if m.get("organization_id"):
|
||||
org_ids.append(str(m["organization_id"]))
|
||||
if org_ids:
|
||||
return org_ids
|
||||
|
||||
# PM legacy: pm_client_ids
|
||||
if user.role == UserRole.PROJECT_MANAGER:
|
||||
return list(user.pm_client_ids or [])
|
||||
|
||||
# Staff legacy: team.member_user_ids
|
||||
teams = await db.teams.find({"member_user_ids": user_id}, {"client_id": 1}).to_list(None)
|
||||
if teams:
|
||||
return [str(t["client_id"]) for t in teams if t.get("client_id")]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
async def assert_job_in_user_org(job: dict, user: User, db: AsyncIOMotorDatabase) -> None:
|
||||
"""Raise 404 (not 403) when user cannot access this job — avoids information disclosure."""
|
||||
if user.role == UserRole.ADMIN:
|
||||
return
|
||||
|
||||
org_ids = await get_user_org_ids(user, db)
|
||||
if org_ids is None:
|
||||
return # unrestricted
|
||||
|
||||
job_org = job.get("organization_id")
|
||||
if job_org:
|
||||
if job_org in org_ids:
|
||||
return
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Job not found")
|
||||
|
||||
# No organization_id — try project fallback
|
||||
project_id = job.get("project_id")
|
||||
if project_id:
|
||||
project = await db.projects.find_one({"_id": project_id}, {"client_id": 1})
|
||||
if project and project.get("client_id") in org_ids:
|
||||
return
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Job not found")
|
||||
|
||||
# Legacy: client_id == creator user_id
|
||||
job_client_id = job.get("client_id")
|
||||
if job_client_id and job_client_id == str(user.id):
|
||||
return
|
||||
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Job not found")
|
||||
|
||||
|
||||
def require_pm_for_client(client_id_param: str = "client_id"):
|
||||
"""Dependency: ensures the current user is an Admin or PM for the given client."""
|
||||
async def checker(
|
||||
|
|
|
|||
|
|
@ -1,10 +1,6 @@
|
|||
"""Enhanced configuration system with Secret Manager integration."""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
from typing import Dict, Optional, Any
|
||||
from functools import lru_cache
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
from .config import Settings as BaseConfig
|
||||
from .logging import get_logger
|
||||
|
|
@ -14,41 +10,40 @@ logger = get_logger(__name__)
|
|||
|
||||
class SecretsConfig(BaseConfig):
|
||||
"""Enhanced configuration that loads secrets from GCP Secret Manager."""
|
||||
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
# Initialize with base configuration first
|
||||
super().__init__(**kwargs)
|
||||
|
||||
|
||||
# Flag to track if secrets have been loaded
|
||||
self._secrets_loaded = False
|
||||
self._secret_values: Dict[str, str] = {}
|
||||
|
||||
self._secret_values: dict[str, str] = {}
|
||||
|
||||
async def load_secrets(self) -> None:
|
||||
"""Load secrets from Secret Manager asynchronously."""
|
||||
if self._secrets_loaded:
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
# Only import here to avoid circular imports
|
||||
from app.services.secrets_manager import secrets_manager
|
||||
|
||||
|
||||
# Define which config fields should be loaded from secrets
|
||||
secret_mappings = {
|
||||
# Config field -> Secret Manager name
|
||||
"jwt_secret": "jwt-secret",
|
||||
"jwt_refresh_secret": "jwt-refresh-secret",
|
||||
"jwt_refresh_secret": "jwt-refresh-secret",
|
||||
"mongodb_uri": "mongodb-url",
|
||||
"redis_url": "redis-url",
|
||||
"gemini_api_key": "gemini-api-key",
|
||||
"sendgrid_api_key": "sendgrid-api-key",
|
||||
"elevenlabs_api_key": "elevenlabs-api-key",
|
||||
"sentry_dsn": "sentry-dsn"
|
||||
}
|
||||
|
||||
|
||||
# Get all secrets in batch
|
||||
secret_names = list(secret_mappings.values())
|
||||
retrieved_secrets = await secrets_manager.get_secrets_batch(secret_names)
|
||||
|
||||
|
||||
# Map secrets back to config fields
|
||||
for config_field, secret_name in secret_mappings.items():
|
||||
if secret_name in retrieved_secrets:
|
||||
|
|
@ -58,50 +53,50 @@ class SecretsConfig(BaseConfig):
|
|||
logger.debug(f"Loaded secret for {config_field}")
|
||||
else:
|
||||
logger.warning(f"Secret {secret_name} not available, using environment/default")
|
||||
|
||||
|
||||
self._secrets_loaded = True
|
||||
logger.info(f"Successfully loaded {len(retrieved_secrets)} secrets from Secret Manager")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load secrets from Secret Manager: {e}")
|
||||
logger.warning("Falling back to environment variables")
|
||||
self._secrets_loaded = True # Mark as loaded to prevent retries
|
||||
|
||||
def get_secret_value(self, field_name: str) -> Optional[str]:
|
||||
|
||||
def get_secret_value(self, field_name: str) -> str | None:
|
||||
"""Get a secret value if it was loaded from Secret Manager."""
|
||||
return self._secret_values.get(field_name)
|
||||
|
||||
|
||||
async def refresh_secrets(self) -> None:
|
||||
"""Force refresh secrets from Secret Manager."""
|
||||
self._secrets_loaded = False
|
||||
self._secret_values.clear()
|
||||
|
||||
|
||||
# Clear the secrets manager cache
|
||||
from app.services.secrets_manager import secrets_manager
|
||||
secrets_manager.clear_cache()
|
||||
|
||||
|
||||
await self.load_secrets()
|
||||
|
||||
|
||||
@property
|
||||
def is_production(self) -> bool:
|
||||
"""Check if running in production environment."""
|
||||
return self.app_env == "prod"
|
||||
|
||||
|
||||
@property
|
||||
def is_development(self) -> bool:
|
||||
"""Check if running in development environment."""
|
||||
return self.app_env == "dev"
|
||||
|
||||
|
||||
@property
|
||||
def google_cloud_project(self) -> str:
|
||||
"""Get Google Cloud Project ID."""
|
||||
return self.gcp_project_id
|
||||
|
||||
|
||||
@property
|
||||
def jwt_refresh_secret(self) -> str:
|
||||
"""Get JWT refresh secret (fallback to main secret if not set)."""
|
||||
return getattr(self, '_jwt_refresh_secret', self.jwt_secret)
|
||||
|
||||
|
||||
@jwt_refresh_secret.setter
|
||||
def jwt_refresh_secret(self, value: str) -> None:
|
||||
"""Set JWT refresh secret."""
|
||||
|
|
@ -109,37 +104,37 @@ class SecretsConfig(BaseConfig):
|
|||
|
||||
|
||||
# Global configuration instance
|
||||
_config_instance: Optional[SecretsConfig] = None
|
||||
_config_instance: SecretsConfig | None = None
|
||||
|
||||
|
||||
async def initialize_config() -> SecretsConfig:
|
||||
"""Initialize configuration with secrets loading."""
|
||||
global _config_instance
|
||||
|
||||
|
||||
if _config_instance is None:
|
||||
_config_instance = SecretsConfig()
|
||||
await _config_instance.load_secrets()
|
||||
|
||||
|
||||
return _config_instance
|
||||
|
||||
|
||||
def get_settings() -> SecretsConfig:
|
||||
"""Get settings instance (synchronous)."""
|
||||
global _config_instance
|
||||
|
||||
|
||||
if _config_instance is None:
|
||||
# Initialize without secrets for backwards compatibility
|
||||
_config_instance = SecretsConfig()
|
||||
logger.warning("Settings accessed before async initialization - secrets not loaded")
|
||||
|
||||
|
||||
return _config_instance
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@lru_cache
|
||||
def get_settings_cached() -> SecretsConfig:
|
||||
"""Get cached settings instance."""
|
||||
return get_settings()
|
||||
|
||||
|
||||
# Backwards compatibility
|
||||
settings = get_settings()
|
||||
settings = get_settings()
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from datetime import datetime, timedelta
|
||||
from typing import Any, Optional, Union
|
||||
from typing import Any
|
||||
|
||||
from fastapi import HTTPException, status
|
||||
from jose import JWTError, jwt
|
||||
|
|
@ -11,20 +11,24 @@ pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
|||
|
||||
|
||||
def create_access_token(
|
||||
subject: Union[str, Any], expires_delta: Optional[timedelta] = None
|
||||
subject: str | Any,
|
||||
expires_delta: timedelta | None = None,
|
||||
org_ids: list[str] | None = None,
|
||||
) -> str:
|
||||
if expires_delta:
|
||||
expire = datetime.utcnow() + expires_delta
|
||||
else:
|
||||
expire = datetime.utcnow() + timedelta(minutes=settings.jwt_access_ttl_min)
|
||||
|
||||
to_encode = {"exp": expire, "sub": str(subject)}
|
||||
to_encode: dict[str, Any] = {"exp": expire, "sub": str(subject), "v": 2}
|
||||
if org_ids:
|
||||
to_encode["org_ids"] = org_ids
|
||||
encoded_jwt = jwt.encode(to_encode, settings.jwt_secret, algorithm=settings.jwt_alg)
|
||||
return encoded_jwt
|
||||
|
||||
|
||||
def create_refresh_token(
|
||||
subject: Union[str, Any], expires_delta: Optional[timedelta] = None
|
||||
subject: str | Any, expires_delta: timedelta | None = None
|
||||
) -> str:
|
||||
if expires_delta:
|
||||
expire = datetime.utcnow() + expires_delta
|
||||
|
|
@ -37,6 +41,8 @@ def create_refresh_token(
|
|||
|
||||
|
||||
def verify_password(plain_password: str, hashed_password: str) -> bool:
|
||||
if not hashed_password:
|
||||
return False
|
||||
return pwd_context.verify(plain_password, hashed_password)
|
||||
|
||||
|
||||
|
|
@ -52,4 +58,4 @@ def decode_token(token: str) -> dict[str, Any]:
|
|||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Could not validate credentials",
|
||||
)
|
||||
) from None
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ class VTTCue:
|
|||
end_time: float # seconds
|
||||
text: str
|
||||
identifier: str | None = None
|
||||
settings: str = ""
|
||||
|
||||
|
||||
class VTTParser:
|
||||
|
|
@ -37,10 +38,11 @@ class VTTParser:
|
|||
|
||||
# Parse timing line
|
||||
if " --> " in line:
|
||||
timing_match = re.match(r'([\d:.,]+)\s+-->\s+([\d:.,]+)', line)
|
||||
timing_match = re.match(r'([\d:.,]+)\s+-->\s+([\d:.,]+)\s*(.*)', line)
|
||||
if timing_match:
|
||||
start_time = VTTParser._parse_timestamp(timing_match.group(1))
|
||||
end_time = VTTParser._parse_timestamp(timing_match.group(2))
|
||||
settings = timing_match.group(3).strip()
|
||||
|
||||
# Collect text lines until empty line or next cue
|
||||
i += 1
|
||||
|
|
@ -49,13 +51,13 @@ class VTTParser:
|
|||
text_lines.append(lines[i].strip())
|
||||
i += 1
|
||||
|
||||
if text_lines:
|
||||
cues.append(VTTCue(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
text="\n".join(text_lines),
|
||||
identifier=identifier
|
||||
))
|
||||
cues.append(VTTCue(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
text="\n".join(text_lines),
|
||||
identifier=identifier,
|
||||
settings=settings,
|
||||
))
|
||||
else:
|
||||
i += 1
|
||||
|
||||
|
|
@ -71,16 +73,19 @@ class VTTParser:
|
|||
if cue.identifier:
|
||||
lines.append(cue.identifier)
|
||||
|
||||
# Add timing line
|
||||
# Add timing line (preserve cue settings like line:0%)
|
||||
start_timestamp = VTTParser._format_timestamp(cue.start_time)
|
||||
end_timestamp = VTTParser._format_timestamp(cue.end_time)
|
||||
lines.append(f"{start_timestamp} --> {end_timestamp}")
|
||||
timing_line = f"{start_timestamp} --> {end_timestamp}"
|
||||
if cue.settings:
|
||||
timing_line += f" {cue.settings}"
|
||||
lines.append(timing_line)
|
||||
|
||||
# Add text (can be multi-line)
|
||||
lines.append(cue.text)
|
||||
lines.append("") # Empty line between cues
|
||||
|
||||
return "\n".join(lines)
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
@staticmethod
|
||||
def _parse_timestamp(timestamp: str) -> float:
|
||||
|
|
@ -121,7 +126,7 @@ class VTTParser:
|
|||
secs = seconds % 60
|
||||
|
||||
whole_secs = int(secs)
|
||||
milliseconds = int((secs - whole_secs) * 1000)
|
||||
milliseconds = round((secs - whole_secs) * 1000)
|
||||
|
||||
return f"{hours:02d}:{minutes:02d}:{whole_secs:02d}.{milliseconds:03d}"
|
||||
|
||||
|
|
@ -148,6 +153,22 @@ class VTTEditor:
|
|||
|
||||
return VTTParser.build(cues)
|
||||
|
||||
@staticmethod
|
||||
def assert_cue_alignment(en_vtt: str, target_vtt: str, lang: str) -> None:
|
||||
"""Raise ValueError if target VTT cue count or timestamps diverge from EN master."""
|
||||
en_cues = VTTParser.parse(en_vtt)
|
||||
tgt_cues = VTTParser.parse(target_vtt)
|
||||
if len(tgt_cues) != len(en_cues):
|
||||
raise ValueError(
|
||||
f"Cue count mismatch for {lang}: EN has {len(en_cues)}, target has {len(tgt_cues)}"
|
||||
)
|
||||
for i, (en, tgt) in enumerate(zip(en_cues, tgt_cues, strict=True)):
|
||||
if en.start_time != tgt.start_time or en.end_time != tgt.end_time:
|
||||
raise ValueError(
|
||||
f"Timestamp mismatch for {lang} cue {i}: "
|
||||
f"EN {en.start_time}-->{en.end_time}, target {tgt.start_time}-->{tgt.end_time}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def update_cue_text(vtt_content: str, cue_index: int, new_text: str) -> str:
|
||||
"""Update text for a specific cue by index"""
|
||||
|
|
@ -186,6 +207,20 @@ class VTTEditor:
|
|||
|
||||
return len(errors) == 0, errors
|
||||
|
||||
@staticmethod
|
||||
def fix_overlapping_cues(vtt_content: str) -> str:
|
||||
"""Trim end_time of each cue so it does not overlap the next cue's start_time."""
|
||||
cues = VTTParser.parse(vtt_content)
|
||||
for i in range(1, len(cues)):
|
||||
if cues[i].start_time < cues[i - 1].end_time:
|
||||
# Clamp previous cue end to 1ms before next cue start
|
||||
new_end = cues[i].start_time - 0.001
|
||||
# Never let end_time go at or below start_time
|
||||
if new_end <= cues[i - 1].start_time:
|
||||
new_end = cues[i - 1].start_time + 0.001
|
||||
cues[i - 1].end_time = new_end
|
||||
return VTTParser.build(cues)
|
||||
|
||||
@staticmethod
|
||||
def get_cue_count(vtt_content: str) -> int:
|
||||
"""Get the number of cues in VTT content"""
|
||||
|
|
@ -221,7 +256,7 @@ class VTTEditor:
|
|||
)
|
||||
return False, errors
|
||||
|
||||
for i, (src, tgt) in enumerate(zip(source_cues, translated_cues)):
|
||||
for i, (src, tgt) in enumerate(zip(source_cues, translated_cues, strict=False)):
|
||||
if abs(src.start_time - tgt.start_time) > 0.001:
|
||||
errors.append(
|
||||
f"Cue {i + 1}: start time changed "
|
||||
|
|
@ -251,3 +286,33 @@ class VTTEditor:
|
|||
|
||||
return VTTParser.build(cues)
|
||||
|
||||
# DCMP §6.01 filler patterns per language (whole-word, case-insensitive)
|
||||
_FILLER_PATTERNS: dict[str, str] = {
|
||||
"en": r'\b(um+|uh+|ah+|er+|hmm+|you know|i mean|sort of|kind of|basically|literally|honestly|actually|right\?|so yeah)\b',
|
||||
"es": r'\b(eh+|este|o sea|pues|bueno|o sea que|mmm+)\b',
|
||||
"fr": r'\b(euh+|beh|ben|donc|quoi|enfin|voilà|genre)\b',
|
||||
"de": r'\b(äh+|ähm+|halt|ne|also|naja|sozusagen|quasi)\b',
|
||||
"it": r'\b(ehm+|allora|cioè|tipo|praticamente|insomma|ecco)\b',
|
||||
"nl": r'\b(eh+|nou|zeg|eigenlijk|gewoon|toch|zo van|hè)\b',
|
||||
"pt": r'\b(ahn+|hã+|né|sabe|tipo|então|assim)\b',
|
||||
"pl": r'\b(no|że|bo|znaczy|właśnie|jakby|wiesz)\b',
|
||||
"uk": r'\b(ну+|ем+|типу|знаєш|значить|власне|от)\b',
|
||||
"ru": r'\b(ну+|эм+|типа|знаешь|значит|вот|собственно)\b',
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def clean_disfluencies(vtt_content: str, lang: str) -> str:
|
||||
"""Remove filler words and hesitations per DCMP §6.01 for supported languages."""
|
||||
pattern = VTTEditor._FILLER_PATTERNS.get(lang.split("-")[0].lower())
|
||||
if not pattern:
|
||||
return vtt_content
|
||||
cues = VTTParser.parse(vtt_content)
|
||||
compiled = re.compile(pattern, re.IGNORECASE)
|
||||
for cue in cues:
|
||||
cleaned = compiled.sub("", cue.text)
|
||||
# Collapse multiple spaces and strip leading/trailing punctuation artifacts
|
||||
cleaned = re.sub(r'[ \t]{2,}', ' ', cleaned).strip().strip(',').strip()
|
||||
if cleaned:
|
||||
cue.text = cleaned
|
||||
return VTTParser.build(cues)
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,9 @@ from sentry_sdk.integrations.pymongo import PyMongoIntegration
|
|||
from sentry_sdk.integrations.redis import RedisIntegration
|
||||
|
||||
from .api.v1.routes_admin import router as admin_router
|
||||
from .api.v1.routes_admin_production import router as admin_production_router
|
||||
from .api.v1.routes_auth import router as auth_router
|
||||
from .api.v1.routes_briefs import router as briefs_router
|
||||
from .api.v1.routes_clients import router as clients_router
|
||||
from .api.v1.routes_files import router as files_router
|
||||
from .api.v1.routes_glossaries import router as glossaries_router
|
||||
|
|
@ -21,6 +23,7 @@ from .api.v1.routes_jobs import router as jobs_router
|
|||
from .api.v1.routes_language_qc import router as language_qc_router
|
||||
from .api.v1.routes_organizations import router as organizations_router
|
||||
from .api.v1.routes_review_notes import router as review_notes_router
|
||||
from .api.v1.routes_share import router as share_router
|
||||
from .api.v1.routes_tts import router as tts_router
|
||||
from .api.v1.routes_vtt_versions import router as vtt_versions_router
|
||||
from .api.v1.routes_websockets import router as websockets_router
|
||||
|
|
@ -91,12 +94,17 @@ async def lifespan(app: FastAPI):
|
|||
print(f"⚠️ Could not seed default admin: {e}")
|
||||
# await create_indexes() # Temporarily disabled for debugging
|
||||
|
||||
# Seed language_qc for existing jobs that don't have it yet
|
||||
# T-16: Seed language_qc only for jobs that still lack it (idempotent, skips on subsequent starts)
|
||||
try:
|
||||
db = await get_database()
|
||||
async for job_doc in db.jobs.find({"language_qc": {"$exists": False}}, {"_id": 1, "status": 1, "outputs": 1, "source": 1, "review": 1, "updated_at": 1, "requested_outputs": 1}):
|
||||
await seed_language_qc_for_job(db, job_doc)
|
||||
print("✅ language_qc migration complete")
|
||||
pending_count = await db.jobs.count_documents({"language_qc": {"$exists": False}})
|
||||
if pending_count > 0:
|
||||
async for job_doc in db.jobs.find(
|
||||
{"language_qc": {"$exists": False}},
|
||||
{"_id": 1, "status": 1, "outputs": 1, "source": 1, "review": 1, "updated_at": 1, "requested_outputs": 1},
|
||||
):
|
||||
await seed_language_qc_for_job(db, job_doc)
|
||||
print(f"✅ language_qc migration complete ({pending_count} jobs seeded)")
|
||||
except Exception as e:
|
||||
print(f"⚠️ language_qc migration failed: {e}")
|
||||
|
||||
|
|
@ -112,6 +120,9 @@ async def lifespan(app: FastAPI):
|
|||
# Store middleware in app state for access
|
||||
app.state.rate_limit_middleware = rate_limit_middleware
|
||||
app.state.validation_middleware = validation_middleware
|
||||
elif settings.redis_url:
|
||||
# T-13: REDIS_URL is configured but client unavailable — rate limiting is disabled
|
||||
print(f"⚠️ Redis configured at {settings.redis_url!r} but connection failed — rate limiting disabled")
|
||||
|
||||
yield
|
||||
# Shutdown
|
||||
|
|
@ -145,6 +156,7 @@ async def cors_error_handler(request, call_next):
|
|||
response = await call_next(request)
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
from .core.logging import get_logger as _get_logger
|
||||
_get_logger(__name__).exception("🚨 CORS middleware caught: %s\n%s", e, traceback.format_exc())
|
||||
|
||||
|
|
@ -265,6 +277,9 @@ app.include_router(language_qc_router, prefix="/api/v1")
|
|||
app.include_router(glossaries_router, prefix="/api/v1")
|
||||
app.include_router(tts_router, prefix="/api/v1")
|
||||
app.include_router(admin_router, prefix="/api/v1")
|
||||
app.include_router(admin_production_router, prefix="/api/v1")
|
||||
app.include_router(briefs_router, prefix="/api/v1")
|
||||
app.include_router(share_router, prefix="/api/v1")
|
||||
app.include_router(websockets_router, prefix="/api/v1")
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,16 @@
|
|||
"""Middleware package for FastAPI application."""
|
||||
|
||||
from .rate_limiting import RateLimitMiddleware, IPWhitelist, create_rate_limit_middleware
|
||||
from .rate_limiting import (
|
||||
IPWhitelist,
|
||||
RateLimitMiddleware,
|
||||
create_rate_limit_middleware,
|
||||
)
|
||||
from .validation import ValidationMiddleware, create_validation_middleware
|
||||
|
||||
__all__ = [
|
||||
"RateLimitMiddleware",
|
||||
"IPWhitelist",
|
||||
"IPWhitelist",
|
||||
"create_rate_limit_middleware",
|
||||
"ValidationMiddleware",
|
||||
"create_validation_middleware"
|
||||
]
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,14 +1,10 @@
|
|||
"""Rate limiting middleware for API endpoints."""
|
||||
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
import redis.asyncio as aioredis
|
||||
from fastapi import HTTPException, Request, status
|
||||
from fastapi import Request, status
|
||||
from fastapi.responses import JSONResponse
|
||||
import json
|
||||
import asyncio
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.telemetry.metrics import track_rate_limit_metrics
|
||||
|
|
@ -16,50 +12,50 @@ from app.telemetry.metrics import track_rate_limit_metrics
|
|||
|
||||
class RateLimiter:
|
||||
"""Redis-based rate limiter with sliding window algorithm."""
|
||||
|
||||
|
||||
def __init__(self, redis_client: aioredis.Redis):
|
||||
self.redis = redis_client
|
||||
|
||||
|
||||
async def is_allowed(
|
||||
self,
|
||||
key: str,
|
||||
limit: int,
|
||||
self,
|
||||
key: str,
|
||||
limit: int,
|
||||
window_seconds: int,
|
||||
identifier: str = ""
|
||||
) -> Tuple[bool, Dict[str, int]]:
|
||||
) -> tuple[bool, dict[str, int]]:
|
||||
"""
|
||||
Check if request is allowed under rate limit.
|
||||
|
||||
|
||||
Returns:
|
||||
Tuple of (is_allowed, rate_limit_info)
|
||||
"""
|
||||
now = time.time()
|
||||
pipeline = self.redis.pipeline()
|
||||
|
||||
|
||||
# Remove expired entries
|
||||
pipeline.zremrangebyscore(key, 0, now - window_seconds)
|
||||
|
||||
|
||||
# Count current requests in window
|
||||
pipeline.zcard(key)
|
||||
|
||||
|
||||
# Add current request
|
||||
pipeline.zadd(key, {str(now): now})
|
||||
|
||||
|
||||
# Set expiry
|
||||
pipeline.expire(key, window_seconds)
|
||||
|
||||
|
||||
results = await pipeline.execute()
|
||||
current_requests = results[1]
|
||||
|
||||
|
||||
rate_limit_info = {
|
||||
"limit": limit,
|
||||
"remaining": max(0, limit - current_requests),
|
||||
"reset_time": int(now + window_seconds),
|
||||
"retry_after": window_seconds if current_requests >= limit else 0
|
||||
}
|
||||
|
||||
|
||||
is_allowed = current_requests <= limit
|
||||
|
||||
|
||||
# Track metrics
|
||||
track_rate_limit_metrics(
|
||||
identifier=identifier,
|
||||
|
|
@ -67,17 +63,17 @@ class RateLimiter:
|
|||
current_requests=current_requests,
|
||||
limit=limit
|
||||
)
|
||||
|
||||
|
||||
return is_allowed, rate_limit_info
|
||||
|
||||
|
||||
class RateLimitMiddleware:
|
||||
"""FastAPI middleware for rate limiting."""
|
||||
|
||||
|
||||
def __init__(self, redis_client: aioredis.Redis):
|
||||
self.limiter = RateLimiter(redis_client)
|
||||
self.settings = get_settings()
|
||||
|
||||
|
||||
# Rate limit configurations by endpoint pattern
|
||||
self.rate_limits = {
|
||||
# Authentication endpoints
|
||||
|
|
@ -85,32 +81,32 @@ class RateLimitMiddleware:
|
|||
"POST:/api/v1/auth/register": (3, 3600), # 3 requests per hour
|
||||
"POST:/api/v1/auth/refresh": (10, 300), # 10 requests per 5 minutes
|
||||
"POST:/api/v1/auth/forgot-password": (3, 3600), # 3 requests per hour
|
||||
|
||||
|
||||
# File upload endpoints
|
||||
"POST:/api/v1/files/upload": (10, 3600), # 10 uploads per hour
|
||||
"POST:/api/v1/jobs": (20, 3600), # 20 job creations per hour
|
||||
|
||||
|
||||
# Job management endpoints
|
||||
"GET:/api/v1/jobs": (100, 300), # 100 requests per 5 minutes
|
||||
"PATCH:/api/v1/jobs/*/approve": (50, 3600), # 50 approvals per hour
|
||||
"PATCH:/api/v1/jobs/*/reject": (50, 3600), # 50 rejections per hour
|
||||
|
||||
|
||||
# VTT editing endpoints
|
||||
"PATCH:/api/v1/jobs/*/vtt": (100, 3600), # 100 VTT edits per hour
|
||||
|
||||
|
||||
# Admin endpoints (more restrictive)
|
||||
"GET:/api/v1/admin/*": (50, 300), # 50 requests per 5 minutes
|
||||
"POST:/api/v1/admin/*": (20, 3600), # 20 admin actions per hour
|
||||
"PATCH:/api/v1/admin/*": (20, 3600), # 20 admin updates per hour
|
||||
"DELETE:/api/v1/admin/*": (10, 3600), # 10 admin deletions per hour
|
||||
}
|
||||
|
||||
|
||||
# Default rate limits
|
||||
self.default_limits = {
|
||||
"authenticated": (1000, 3600), # 1000 requests per hour for authenticated users
|
||||
"anonymous": (100, 3600), # 100 requests per hour for anonymous users
|
||||
}
|
||||
|
||||
|
||||
def _get_client_identifier(self, request: Request) -> str:
|
||||
"""Get client identifier for rate limiting."""
|
||||
user = getattr(request.state, 'user', None)
|
||||
|
|
@ -128,53 +124,53 @@ class RateLimitMiddleware:
|
|||
|
||||
client_ip = request.client.host if request.client else "unknown"
|
||||
return f"ip:{client_ip}"
|
||||
|
||||
|
||||
def _get_endpoint_key(self, request: Request) -> str:
|
||||
"""Get endpoint pattern for rate limiting."""
|
||||
method = request.method
|
||||
path = request.url.path
|
||||
|
||||
|
||||
# Replace job IDs with wildcard for pattern matching
|
||||
import re
|
||||
path = re.sub(r'/jobs/[a-f0-9-]+/', '/jobs/*/', path)
|
||||
path = re.sub(r'/admin/users/[a-f0-9-]+', '/admin/users/*', path)
|
||||
|
||||
|
||||
return f"{method}:{path}"
|
||||
|
||||
def _get_rate_limit(self, request: Request) -> Tuple[int, int]:
|
||||
|
||||
def _get_rate_limit(self, request: Request) -> tuple[int, int]:
|
||||
"""Get rate limit for the current request."""
|
||||
endpoint_key = self._get_endpoint_key(request)
|
||||
|
||||
|
||||
# Check for specific endpoint limits
|
||||
if endpoint_key in self.rate_limits:
|
||||
return self.rate_limits[endpoint_key]
|
||||
|
||||
|
||||
# Check for wildcard matches
|
||||
for pattern, limits in self.rate_limits.items():
|
||||
if pattern.endswith("*") and endpoint_key.startswith(pattern[:-1]):
|
||||
return limits
|
||||
|
||||
|
||||
# Use default limits based on authentication
|
||||
user = getattr(request.state, 'user', None)
|
||||
if user:
|
||||
return self.default_limits["authenticated"]
|
||||
else:
|
||||
return self.default_limits["anonymous"]
|
||||
|
||||
|
||||
async def __call__(self, request: Request, call_next):
|
||||
"""Process rate limiting for the request."""
|
||||
|
||||
|
||||
# Skip rate limiting for health checks and metrics only
|
||||
if request.url.path in ["/health", "/metrics"]:
|
||||
return await call_next(request)
|
||||
|
||||
|
||||
client_id = self._get_client_identifier(request)
|
||||
endpoint_key = self._get_endpoint_key(request)
|
||||
limit, window = self._get_rate_limit(request)
|
||||
|
||||
|
||||
# Create rate limit key
|
||||
rate_limit_key = f"rate_limit:{client_id}:{endpoint_key}"
|
||||
|
||||
|
||||
try:
|
||||
is_allowed, rate_info = await self.limiter.is_allowed(
|
||||
key=rate_limit_key,
|
||||
|
|
@ -182,7 +178,7 @@ class RateLimitMiddleware:
|
|||
window_seconds=window,
|
||||
identifier=client_id
|
||||
)
|
||||
|
||||
|
||||
if not is_allowed:
|
||||
# Return rate limit exceeded response
|
||||
return JSONResponse(
|
||||
|
|
@ -199,17 +195,17 @@ class RateLimitMiddleware:
|
|||
"Retry-After": str(rate_info["retry_after"])
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# Process the request
|
||||
response = await call_next(request)
|
||||
|
||||
|
||||
# Add rate limit headers to response
|
||||
response.headers["X-RateLimit-Limit"] = str(rate_info["limit"])
|
||||
response.headers["X-RateLimit-Remaining"] = str(rate_info["remaining"])
|
||||
response.headers["X-RateLimit-Reset"] = str(rate_info["reset_time"])
|
||||
|
||||
|
||||
return response
|
||||
|
||||
|
||||
except Exception as e:
|
||||
# Log error but don't block request if rate limiting fails
|
||||
print(f"Rate limiting error: {e}")
|
||||
|
|
@ -218,30 +214,30 @@ class RateLimitMiddleware:
|
|||
|
||||
class IPWhitelist:
|
||||
"""IP whitelist for bypassing rate limits."""
|
||||
|
||||
|
||||
def __init__(self, redis_client: aioredis.Redis):
|
||||
self.redis = redis_client
|
||||
self.whitelist_key = "ip_whitelist"
|
||||
|
||||
|
||||
# Default whitelisted IPs (health checks, monitoring)
|
||||
self.default_whitelist = {
|
||||
"127.0.0.1",
|
||||
"::1",
|
||||
"169.254.169.254", # GCP metadata server
|
||||
}
|
||||
|
||||
|
||||
async def is_whitelisted(self, ip: str) -> bool:
|
||||
"""Check if IP is whitelisted."""
|
||||
if ip in self.default_whitelist:
|
||||
return True
|
||||
|
||||
|
||||
try:
|
||||
is_member = await self.redis.sismember(self.whitelist_key, ip)
|
||||
return bool(is_member)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
async def add_ip(self, ip: str, ttl_seconds: Optional[int] = None) -> bool:
|
||||
|
||||
async def add_ip(self, ip: str, ttl_seconds: int | None = None) -> bool:
|
||||
"""Add IP to whitelist."""
|
||||
try:
|
||||
await self.redis.sadd(self.whitelist_key, ip)
|
||||
|
|
@ -252,7 +248,7 @@ class IPWhitelist:
|
|||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
async def remove_ip(self, ip: str) -> bool:
|
||||
"""Remove IP from whitelist."""
|
||||
try:
|
||||
|
|
@ -264,4 +260,4 @@ class IPWhitelist:
|
|||
|
||||
async def create_rate_limit_middleware(redis_client: aioredis.Redis) -> RateLimitMiddleware:
|
||||
"""Factory function to create rate limit middleware."""
|
||||
return RateLimitMiddleware(redis_client)
|
||||
return RateLimitMiddleware(redis_client)
|
||||
|
|
|
|||
|
|
@ -3,15 +3,17 @@
|
|||
import json
|
||||
import re
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
from fastapi import HTTPException, Request, status
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel, ValidationError as PydanticValidationError
|
||||
import magic
|
||||
from typing import Any
|
||||
from urllib.parse import unquote
|
||||
|
||||
import magic
|
||||
from fastapi import Request, status
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from app.telemetry.metrics import track_validation_metrics
|
||||
|
||||
from ..core.config import settings
|
||||
|
||||
|
||||
class ValidationError(Exception):
|
||||
"""Custom validation error."""
|
||||
|
|
@ -25,92 +27,93 @@ class SecurityValidationError(Exception):
|
|||
|
||||
class RequestValidator:
|
||||
"""Enhanced request validation with security checks."""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
# File type restrictions
|
||||
self.allowed_video_types = {
|
||||
"video/mp4",
|
||||
"video/quicktime",
|
||||
"video/quicktime",
|
||||
"video/x-msvideo" # AVI
|
||||
}
|
||||
|
||||
|
||||
self.allowed_subtitle_types = {
|
||||
"text/vtt",
|
||||
"text/plain"
|
||||
}
|
||||
|
||||
|
||||
# Security patterns to block
|
||||
self.malicious_patterns = [
|
||||
# SQL injection patterns
|
||||
r"(union|select|insert|update|delete|drop|create|alter)\s+",
|
||||
r"(script|javascript|vbscript|onload|onerror|onclick)",
|
||||
r"\b(union|select|insert|update|delete|drop|create|alter)\b\s+",
|
||||
r"vbscript:", # vbscript protocol injection
|
||||
r"\b(onload|onerror|onclick)\s*=", # HTML event handler attribute injection
|
||||
r"<\s*script[^>]*>",
|
||||
r"javascript:",
|
||||
r"data:.*base64",
|
||||
|
||||
|
||||
# Path traversal
|
||||
r"\.\./",
|
||||
r"\.\.\\",
|
||||
r"%2e%2e%2f",
|
||||
r"%2e%2e\\",
|
||||
|
||||
# Command injection (removed $ to allow MongoDB operators in controlled contexts)
|
||||
r"[;&|`](?!\s*$)", # Allow $ but not as command separator
|
||||
r"(rm|wget|curl|nc|bash|sh|cmd|powershell)\s+",
|
||||
|
||||
|
||||
# Command injection (removed $ and ; — semicolons are common in natural language)
|
||||
r"[&|`](?!\s*$)",
|
||||
r"\b(rm|wget|curl|nc|bash|sh|cmd|powershell)\b\s+",
|
||||
|
||||
# MongoDB injection — NoSQL operator abuse
|
||||
r"\$where|\$expr|\$function|\$accumulator"
|
||||
r"|\$ne|\$nin|\$not"
|
||||
r"|\$gt|\$gte|\$lt|\$lte"
|
||||
r"|\$regex|\$jsonSchema|\$mod",
|
||||
]
|
||||
|
||||
|
||||
self.compiled_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in self.malicious_patterns]
|
||||
|
||||
# Max file sizes (in bytes)
|
||||
self.max_video_size = 2 * 1024 * 1024 * 1024 # 2GB
|
||||
|
||||
# Max file sizes (in bytes) — driven by central config (T-14)
|
||||
self.max_video_size = settings.upload_max_video_bytes
|
||||
self.max_subtitle_size = 10 * 1024 * 1024 # 10MB
|
||||
|
||||
|
||||
# Request size limits
|
||||
self.max_json_size = 1024 * 1024 # 1MB
|
||||
self.max_form_fields = 50
|
||||
|
||||
|
||||
def validate_string_content(self, content: str, field_name: str = "input") -> None:
|
||||
"""Validate string content for malicious patterns."""
|
||||
if not isinstance(content, str):
|
||||
return
|
||||
|
||||
|
||||
for pattern in self.compiled_patterns:
|
||||
if pattern.search(content):
|
||||
raise SecurityValidationError(
|
||||
f"Potentially malicious content detected in {field_name}"
|
||||
)
|
||||
|
||||
|
||||
def validate_filename(self, filename: str) -> str:
|
||||
"""Validate and sanitize filename."""
|
||||
if not filename:
|
||||
raise ValidationError("Filename cannot be empty")
|
||||
|
||||
|
||||
# Decode URL encoding
|
||||
filename = unquote(filename)
|
||||
|
||||
|
||||
# Check for malicious patterns
|
||||
self.validate_string_content(filename, "filename")
|
||||
|
||||
|
||||
# Remove dangerous characters
|
||||
safe_filename = re.sub(r'[^\w\-_\.]', '_', filename)
|
||||
|
||||
|
||||
# Prevent hidden files
|
||||
if safe_filename.startswith('.'):
|
||||
safe_filename = 'file_' + safe_filename[1:]
|
||||
|
||||
|
||||
# Limit length
|
||||
if len(safe_filename) > 255:
|
||||
name, ext = safe_filename.rsplit('.', 1) if '.' in safe_filename else (safe_filename, '')
|
||||
safe_filename = name[:250] + ('.' + ext if ext else '')
|
||||
|
||||
|
||||
return safe_filename
|
||||
|
||||
|
||||
def validate_file_type(self, content: bytes, expected_type: str, filename: str) -> None:
|
||||
"""Validate file type using magic numbers."""
|
||||
try:
|
||||
|
|
@ -120,13 +123,13 @@ class RequestValidator:
|
|||
ext = filename.lower().split('.')[-1] if '.' in filename else ''
|
||||
video_extensions = {'mp4', 'mov', 'avi', 'mkv'}
|
||||
subtitle_extensions = {'vtt', 'srt', 'txt'}
|
||||
|
||||
|
||||
if expected_type == "video" and ext not in video_extensions:
|
||||
raise ValidationError(f"Invalid video file extension: {ext}")
|
||||
raise ValidationError(f"Invalid video file extension: {ext}") from None
|
||||
elif expected_type == "subtitle" and ext not in subtitle_extensions:
|
||||
raise ValidationError(f"Invalid subtitle file extension: {ext}")
|
||||
raise ValidationError(f"Invalid subtitle file extension: {ext}") from None
|
||||
return
|
||||
|
||||
|
||||
if expected_type == "video" and detected_type not in self.allowed_video_types:
|
||||
raise ValidationError(
|
||||
f"Invalid video file type: {detected_type}. "
|
||||
|
|
@ -137,7 +140,7 @@ class RequestValidator:
|
|||
f"Invalid subtitle file type: {detected_type}. "
|
||||
f"Allowed types: {', '.join(self.allowed_subtitle_types)}"
|
||||
)
|
||||
|
||||
|
||||
def validate_file_size(self, size: int, file_type: str) -> None:
|
||||
"""Validate file size limits."""
|
||||
if file_type == "video" and size > self.max_video_size:
|
||||
|
|
@ -150,16 +153,16 @@ class RequestValidator:
|
|||
f"Subtitle file too large: {size} bytes. "
|
||||
f"Maximum allowed: {self.max_subtitle_size} bytes"
|
||||
)
|
||||
|
||||
async def validate_json_payload(self, request: Request) -> Optional[Dict[str, Any]]:
|
||||
|
||||
async def validate_json_payload(self, request: Request) -> dict[str, Any] | None:
|
||||
"""Validate JSON request payload."""
|
||||
if not request.headers.get("content-type", "").startswith("application/json"):
|
||||
return None
|
||||
|
||||
|
||||
content_length = request.headers.get("content-length")
|
||||
if content_length and int(content_length) > self.max_json_size:
|
||||
raise ValidationError(f"JSON payload too large: {content_length} bytes")
|
||||
|
||||
|
||||
try:
|
||||
# Check if body has already been read
|
||||
if hasattr(request, '_cached_body'):
|
||||
|
|
@ -168,63 +171,67 @@ class RequestValidator:
|
|||
body = await request.body()
|
||||
# Cache the body so FastAPI can read it later
|
||||
request._cached_body = body
|
||||
|
||||
|
||||
if len(body) > self.max_json_size:
|
||||
raise ValidationError(f"JSON payload too large: {len(body)} bytes")
|
||||
|
||||
|
||||
if not body:
|
||||
return {}
|
||||
|
||||
|
||||
payload = json.loads(body)
|
||||
|
||||
|
||||
# Recursively validate all string values
|
||||
self._validate_json_values(payload)
|
||||
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValidationError(f"Invalid JSON: {e}")
|
||||
|
||||
raise ValidationError(f"Invalid JSON: {e}") from e
|
||||
|
||||
# Fields that contain free-form natural language — skip injection pattern checks
|
||||
_FREETEXT_FIELDS = {"captions_vtt", "audio_description_vtt", "text", "notes", "change_note", "description"}
|
||||
|
||||
def _validate_json_values(self, obj: Any, path: str = "root") -> None:
|
||||
"""Recursively validate JSON values."""
|
||||
if isinstance(obj, dict):
|
||||
if len(obj) > self.max_form_fields:
|
||||
raise ValidationError(f"Too many fields in object at {path}")
|
||||
|
||||
|
||||
for key, value in obj.items():
|
||||
if isinstance(key, str):
|
||||
self.validate_string_content(key, f"{path}.{key}")
|
||||
self._validate_json_values(value, f"{path}.{key}")
|
||||
|
||||
self.validate_string_content(key, f"{path}.key")
|
||||
# Skip pattern scanning for free-text fields (VTT content, notes, etc.)
|
||||
if key not in self._FREETEXT_FIELDS:
|
||||
self._validate_json_values(value, f"{path}.{key}")
|
||||
|
||||
elif isinstance(obj, list):
|
||||
if len(obj) > 1000: # Prevent large arrays
|
||||
raise ValidationError(f"Array too large at {path}")
|
||||
|
||||
|
||||
for i, item in enumerate(obj):
|
||||
self._validate_json_values(item, f"{path}[{i}]")
|
||||
|
||||
|
||||
elif isinstance(obj, str):
|
||||
self.validate_string_content(obj, path)
|
||||
|
||||
|
||||
def validate_query_params(self, request: Request) -> None:
|
||||
"""Validate query parameters."""
|
||||
for key, value in request.query_params.items():
|
||||
self.validate_string_content(key, f"query.{key}")
|
||||
self.validate_string_content(str(value), f"query.{key}")
|
||||
|
||||
|
||||
def validate_headers(self, request: Request) -> None:
|
||||
"""Validate request headers."""
|
||||
suspicious_headers = {
|
||||
"x-forwarded-host",
|
||||
"x-original-host",
|
||||
"x-original-host",
|
||||
"x-rewrite-url"
|
||||
}
|
||||
|
||||
|
||||
for header_name, header_value in request.headers.items():
|
||||
# Check for suspicious headers
|
||||
if header_name.lower() in suspicious_headers:
|
||||
self.validate_string_content(header_value, f"header.{header_name}")
|
||||
|
||||
|
||||
# Validate user-agent length
|
||||
if header_name.lower() == "user-agent" and len(header_value) > 500:
|
||||
raise SecurityValidationError("User-Agent header too long")
|
||||
|
|
@ -232,34 +239,34 @@ class RequestValidator:
|
|||
|
||||
class ValidationMiddleware:
|
||||
"""FastAPI middleware for enhanced request validation."""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.validator = RequestValidator()
|
||||
|
||||
|
||||
async def __call__(self, request: Request, call_next):
|
||||
"""Process validation for the request."""
|
||||
|
||||
|
||||
start_time = time.time()
|
||||
validation_errors = []
|
||||
|
||||
|
||||
# Skip validation for timing adjustment endpoint temporarily
|
||||
if "/vtt/adjust-timing" in request.url.path:
|
||||
return await call_next(request)
|
||||
|
||||
|
||||
try:
|
||||
# Validate headers
|
||||
self.validator.validate_headers(request)
|
||||
|
||||
|
||||
# Validate query parameters
|
||||
self.validator.validate_query_params(request)
|
||||
|
||||
|
||||
# Validate JSON payload if present
|
||||
if request.method in ["POST", "PUT", "PATCH"]:
|
||||
await self.validator.validate_json_payload(request)
|
||||
|
||||
|
||||
# Process the request
|
||||
response = await call_next(request)
|
||||
|
||||
|
||||
# Track successful validation
|
||||
track_validation_metrics(
|
||||
endpoint=request.url.path,
|
||||
|
|
@ -268,10 +275,10 @@ class ValidationMiddleware:
|
|||
validation_time=time.time() - start_time,
|
||||
error_types=[]
|
||||
)
|
||||
|
||||
|
||||
return response
|
||||
|
||||
except SecurityValidationError as e:
|
||||
|
||||
except SecurityValidationError:
|
||||
validation_errors.append("security")
|
||||
track_validation_metrics(
|
||||
endpoint=request.url.path,
|
||||
|
|
@ -280,7 +287,7 @@ class ValidationMiddleware:
|
|||
validation_time=time.time() - start_time,
|
||||
error_types=validation_errors
|
||||
)
|
||||
|
||||
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
content={
|
||||
|
|
@ -288,7 +295,7 @@ class ValidationMiddleware:
|
|||
"error_code": "SECURITY_VALIDATION_ERROR"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
except ValidationError as e:
|
||||
validation_errors.append("format")
|
||||
track_validation_metrics(
|
||||
|
|
@ -298,7 +305,7 @@ class ValidationMiddleware:
|
|||
validation_time=time.time() - start_time,
|
||||
error_types=validation_errors
|
||||
)
|
||||
|
||||
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||
content={
|
||||
|
|
@ -306,7 +313,7 @@ class ValidationMiddleware:
|
|||
"error_code": "VALIDATION_ERROR"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
validation_errors.append("unknown")
|
||||
track_validation_metrics(
|
||||
|
|
@ -316,7 +323,7 @@ class ValidationMiddleware:
|
|||
validation_time=time.time() - start_time,
|
||||
error_types=validation_errors
|
||||
)
|
||||
|
||||
|
||||
# Log unexpected error but continue processing
|
||||
print(f"Validation middleware error: {e}")
|
||||
return await call_next(request)
|
||||
|
|
@ -324,4 +331,4 @@ class ValidationMiddleware:
|
|||
|
||||
async def create_validation_middleware() -> ValidationMiddleware:
|
||||
"""Factory function to create validation middleware."""
|
||||
return ValidationMiddleware()
|
||||
return ValidationMiddleware()
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
"""Database migration framework for MongoDB."""
|
||||
|
||||
from .migrator import MigrationManager, Migration
|
||||
from .migrator import Migration, MigrationManager
|
||||
|
||||
__all__ = ["MigrationManager", "Migration"]
|
||||
__all__ = ["MigrationManager", "Migration"]
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
"""MongoDB migration framework."""
|
||||
|
||||
import os
|
||||
import importlib.util
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
|
||||
from app.core.database import get_database
|
||||
|
|
@ -17,22 +16,23 @@ logger = get_logger(__name__)
|
|||
|
||||
class Migration(ABC):
|
||||
"""Base class for database migrations."""
|
||||
|
||||
|
||||
version: str = "0000-00-00-000000" # overridden by subclass as class variable
|
||||
description: str = ""
|
||||
|
||||
def __init__(self):
|
||||
self.version: str = "0000-00-00-000000" # Format: YYYY-MM-DD-HHMMSS
|
||||
self.description: str = ""
|
||||
self.db: Optional[AsyncIOMotorDatabase] = None
|
||||
|
||||
self.db: AsyncIOMotorDatabase | None = None
|
||||
|
||||
@abstractmethod
|
||||
async def up(self) -> None:
|
||||
"""Apply the migration."""
|
||||
pass
|
||||
|
||||
|
||||
@abstractmethod
|
||||
async def down(self) -> None:
|
||||
"""Rollback the migration."""
|
||||
pass
|
||||
|
||||
|
||||
async def set_database(self, db: AsyncIOMotorDatabase) -> None:
|
||||
"""Set the database instance."""
|
||||
self.db = db
|
||||
|
|
@ -40,7 +40,7 @@ class Migration(ABC):
|
|||
|
||||
class MigrationRecord:
|
||||
"""Represents a migration record in the database."""
|
||||
|
||||
|
||||
def __init__(self, version: str, description: str, applied_at: datetime):
|
||||
self.version = version
|
||||
self.description = description
|
||||
|
|
@ -49,163 +49,163 @@ class MigrationRecord:
|
|||
|
||||
class MigrationManager:
|
||||
"""Manages database migrations."""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.db: Optional[AsyncIOMotorDatabase] = None
|
||||
self.db: AsyncIOMotorDatabase | None = None
|
||||
self.migrations_dir = Path(__file__).parent / "scripts"
|
||||
self.collection_name = "migration_history"
|
||||
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Initialize the migration manager."""
|
||||
self.db = await get_database()
|
||||
await self._ensure_migration_collection()
|
||||
|
||||
|
||||
async def _ensure_migration_collection(self) -> None:
|
||||
"""Ensure the migration history collection exists with proper indexes."""
|
||||
collection = self.db[self.collection_name]
|
||||
|
||||
|
||||
# Create indexes for migration history
|
||||
await collection.create_index([("version", 1)], unique=True)
|
||||
await collection.create_index([("applied_at", -1)])
|
||||
|
||||
|
||||
logger.info("Migration history collection initialized")
|
||||
|
||||
def discover_migrations(self) -> List[str]:
|
||||
|
||||
def discover_migrations(self) -> list[str]:
|
||||
"""Discover all migration files in the migrations directory."""
|
||||
if not self.migrations_dir.exists():
|
||||
logger.warning(f"Migrations directory not found: {self.migrations_dir}")
|
||||
return []
|
||||
|
||||
|
||||
migration_files = []
|
||||
for file_path in self.migrations_dir.glob("*.py"):
|
||||
if file_path.name.startswith("migration_") and not file_path.name.startswith("__"):
|
||||
migration_files.append(file_path.stem)
|
||||
|
||||
|
||||
# Sort by version (filename should start with version)
|
||||
migration_files.sort()
|
||||
return migration_files
|
||||
|
||||
|
||||
async def load_migration(self, migration_name: str) -> Migration:
|
||||
"""Dynamically load a migration class."""
|
||||
migration_path = self.migrations_dir / f"{migration_name}.py"
|
||||
|
||||
|
||||
if not migration_path.exists():
|
||||
raise FileNotFoundError(f"Migration file not found: {migration_path}")
|
||||
|
||||
|
||||
# Load the module
|
||||
spec = importlib.util.spec_from_file_location(migration_name, migration_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
|
||||
# Get the migration class (assume it's named Migration)
|
||||
if not hasattr(module, 'Migration'):
|
||||
raise AttributeError(f"Migration class not found in {migration_name}")
|
||||
|
||||
migration_class = getattr(module, 'Migration')
|
||||
|
||||
migration_class = module.Migration
|
||||
migration = migration_class()
|
||||
await migration.set_database(self.db)
|
||||
|
||||
|
||||
return migration
|
||||
|
||||
async def get_applied_migrations(self) -> List[str]:
|
||||
|
||||
async def get_applied_migrations(self) -> list[str]:
|
||||
"""Get list of applied migration versions."""
|
||||
collection = self.db[self.collection_name]
|
||||
cursor = collection.find({}, {"version": 1}).sort("version", 1)
|
||||
|
||||
|
||||
applied = []
|
||||
async for doc in cursor:
|
||||
applied.append(doc["version"])
|
||||
|
||||
|
||||
return applied
|
||||
|
||||
|
||||
async def record_migration(self, migration: Migration) -> None:
|
||||
"""Record a successful migration in the database."""
|
||||
collection = self.db[self.collection_name]
|
||||
|
||||
|
||||
record = {
|
||||
"version": migration.version,
|
||||
"description": migration.description,
|
||||
"applied_at": datetime.utcnow()
|
||||
}
|
||||
|
||||
|
||||
await collection.insert_one(record)
|
||||
logger.info(f"Recorded migration: {migration.version} - {migration.description}")
|
||||
|
||||
|
||||
async def remove_migration_record(self, version: str) -> None:
|
||||
"""Remove a migration record (for rollback)."""
|
||||
collection = self.db[self.collection_name]
|
||||
await collection.delete_one({"version": version})
|
||||
logger.info(f"Removed migration record: {version}")
|
||||
|
||||
|
||||
@trace_async_operation("migration_manager.migrate_up")
|
||||
async def migrate_up(self, target_version: Optional[str] = None) -> List[str]:
|
||||
async def migrate_up(self, target_version: str | None = None) -> list[str]:
|
||||
"""
|
||||
Apply migrations up to the target version.
|
||||
|
||||
|
||||
Args:
|
||||
target_version: Version to migrate to. If None, applies all pending migrations.
|
||||
|
||||
|
||||
Returns:
|
||||
List of applied migration versions.
|
||||
"""
|
||||
await self.initialize()
|
||||
|
||||
|
||||
# Discover all migrations
|
||||
all_migrations = self.discover_migrations()
|
||||
applied_migrations = await self.get_applied_migrations()
|
||||
|
||||
|
||||
# Find pending migrations
|
||||
pending_migrations = []
|
||||
for migration_name in all_migrations:
|
||||
# Extract version from filename (assumes format: migration_YYYY-MM-DD-HHMMSS_description.py)
|
||||
version = migration_name.replace("migration_", "").split("_")[0]
|
||||
|
||||
|
||||
if version not in applied_migrations:
|
||||
if target_version is None or version <= target_version:
|
||||
pending_migrations.append((migration_name, version))
|
||||
|
||||
|
||||
# Sort by version
|
||||
pending_migrations.sort(key=lambda x: x[1])
|
||||
|
||||
|
||||
applied = []
|
||||
for migration_name, version in pending_migrations:
|
||||
try:
|
||||
logger.info(f"Applying migration: {migration_name}")
|
||||
|
||||
|
||||
migration = await self.load_migration(migration_name)
|
||||
await migration.up()
|
||||
await self.record_migration(migration)
|
||||
|
||||
|
||||
applied.append(version)
|
||||
logger.info(f"Successfully applied migration: {version}")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to apply migration {migration_name}: {e}")
|
||||
raise
|
||||
|
||||
|
||||
return applied
|
||||
|
||||
|
||||
@trace_async_operation("migration_manager.migrate_down")
|
||||
async def migrate_down(self, target_version: str) -> List[str]:
|
||||
async def migrate_down(self, target_version: str) -> list[str]:
|
||||
"""
|
||||
Rollback migrations down to the target version.
|
||||
|
||||
|
||||
Args:
|
||||
target_version: Version to rollback to.
|
||||
|
||||
|
||||
Returns:
|
||||
List of rolled back migration versions.
|
||||
"""
|
||||
await self.initialize()
|
||||
|
||||
|
||||
applied_migrations = await self.get_applied_migrations()
|
||||
|
||||
|
||||
# Find migrations to rollback (newer than target)
|
||||
to_rollback = []
|
||||
for version in reversed(applied_migrations):
|
||||
if version > target_version:
|
||||
to_rollback.append(version)
|
||||
|
||||
|
||||
rolled_back = []
|
||||
for version in to_rollback:
|
||||
try:
|
||||
|
|
@ -215,39 +215,39 @@ class MigrationManager:
|
|||
if version in migration_file:
|
||||
migration_name = migration_file
|
||||
break
|
||||
|
||||
|
||||
if not migration_name:
|
||||
logger.warning(f"Migration file not found for version {version}")
|
||||
continue
|
||||
|
||||
|
||||
logger.info(f"Rolling back migration: {migration_name}")
|
||||
|
||||
|
||||
migration = await self.load_migration(migration_name)
|
||||
await migration.down()
|
||||
await self.remove_migration_record(version)
|
||||
|
||||
|
||||
rolled_back.append(version)
|
||||
logger.info(f"Successfully rolled back migration: {version}")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to rollback migration {version}: {e}")
|
||||
raise
|
||||
|
||||
|
||||
return rolled_back
|
||||
|
||||
|
||||
async def get_migration_status(self) -> dict:
|
||||
"""Get current migration status."""
|
||||
await self.initialize()
|
||||
|
||||
|
||||
all_migrations = self.discover_migrations()
|
||||
applied_migrations = await self.get_applied_migrations()
|
||||
|
||||
|
||||
pending_count = len(all_migrations) - len(applied_migrations)
|
||||
|
||||
|
||||
return {
|
||||
"total_migrations": len(all_migrations),
|
||||
"applied_migrations": len(applied_migrations),
|
||||
"pending_migrations": pending_count,
|
||||
"latest_applied": applied_migrations[-1] if applied_migrations else None,
|
||||
"all_applied": applied_migrations
|
||||
}
|
||||
}
|
||||
|
|
|
|||
22
backend/app/migrations/run.py
Normal file
22
backend/app/migrations/run.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
"""Entry point for running migrations: python -m app.migrations.run"""
|
||||
import asyncio
|
||||
|
||||
from app.core.database import close_mongo_connection, connect_to_mongo
|
||||
from app.migrations.migrator import MigrationManager
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
await connect_to_mongo()
|
||||
try:
|
||||
mgr = MigrationManager()
|
||||
applied = await mgr.migrate_up()
|
||||
if applied:
|
||||
print(f"Applied {len(applied)} migration(s): {applied}")
|
||||
else:
|
||||
print("Already up to date — no pending migrations.")
|
||||
finally:
|
||||
await close_mongo_connection()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
|
@ -1,39 +1,38 @@
|
|||
"""Initial database schema setup migration."""
|
||||
|
||||
from datetime import datetime
|
||||
from app.migrations.migrator import Migration
|
||||
|
||||
|
||||
class Migration(Migration):
|
||||
"""Initial schema setup with all collections and indexes."""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.version = "2025-08-17-120000"
|
||||
self.description = "Initial database schema with users, jobs, and audit_logs collections"
|
||||
|
||||
|
||||
async def up(self) -> None:
|
||||
"""Create initial collections and indexes."""
|
||||
|
||||
|
||||
# Users collection setup
|
||||
await self.db.users.create_index([("email", 1)], unique=True)
|
||||
await self.db.users.create_index([("role", 1)])
|
||||
await self.db.users.create_index([("is_active", 1)])
|
||||
await self.db.users.create_index([("created_at", -1)])
|
||||
|
||||
|
||||
# Jobs collection setup
|
||||
await self.db.jobs.create_index([("status", 1), ("created_at", -1)])
|
||||
await self.db.jobs.create_index([("client_id", 1)])
|
||||
await self.db.jobs.create_index([("updated_at", -1)])
|
||||
await self.db.jobs.create_index([("languages", 1)])
|
||||
|
||||
|
||||
# Create compound index for job queries
|
||||
await self.db.jobs.create_index([
|
||||
("status", 1),
|
||||
("client_id", 1),
|
||||
("created_at", -1)
|
||||
])
|
||||
|
||||
|
||||
# Audit logs collection setup
|
||||
await self.db.audit_logs.create_index([("timestamp", -1)])
|
||||
await self.db.audit_logs.create_index([("action", 1), ("timestamp", -1)])
|
||||
|
|
@ -42,23 +41,23 @@ class Migration(Migration):
|
|||
await self.db.audit_logs.create_index([("resource_type", 1), ("resource_id", 1)])
|
||||
await self.db.audit_logs.create_index([("ip_address", 1), ("timestamp", -1)])
|
||||
await self.db.audit_logs.create_index([("success", 1), ("timestamp", -1)])
|
||||
|
||||
|
||||
# Text search index for audit logs
|
||||
await self.db.audit_logs.create_index([
|
||||
("description", "text"),
|
||||
("details", "text"),
|
||||
("error_message", "text")
|
||||
])
|
||||
|
||||
|
||||
print(f"✅ Applied migration {self.version}: {self.description}")
|
||||
|
||||
|
||||
async def down(self) -> None:
|
||||
"""Drop all collections (destructive - use with caution)."""
|
||||
|
||||
|
||||
# This is a destructive operation - in production, you might want to backup first
|
||||
await self.db.users.drop()
|
||||
await self.db.jobs.drop()
|
||||
await self.db.audit_logs.drop()
|
||||
|
||||
|
||||
print(f"⚠️ Rolled back migration {self.version}: {self.description}")
|
||||
print("⚠️ WARNING: All data has been deleted!")
|
||||
print("⚠️ WARNING: All data has been deleted!")
|
||||
|
|
|
|||
|
|
@ -5,75 +5,75 @@ from app.migrations.migrator import Migration
|
|||
|
||||
class Migration(Migration):
|
||||
"""Optimize indexes for better query performance."""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.version = "2025-08-17-120001"
|
||||
self.description = "Index optimization for query performance improvements"
|
||||
|
||||
|
||||
async def up(self) -> None:
|
||||
"""Add optimized indexes for common query patterns."""
|
||||
|
||||
|
||||
# Jobs collection optimizations
|
||||
|
||||
|
||||
# Index for job status transitions and monitoring
|
||||
await self.db.jobs.create_index([
|
||||
("status", 1),
|
||||
("updated_at", -1),
|
||||
("client_id", 1)
|
||||
], name="jobs_status_updated_client_idx")
|
||||
|
||||
|
||||
# Index for queue management (pending jobs)
|
||||
await self.db.jobs.create_index([
|
||||
("status", 1),
|
||||
("created_at", 1)
|
||||
], name="jobs_queue_processing_idx")
|
||||
|
||||
|
||||
# Index for client job history
|
||||
await self.db.jobs.create_index([
|
||||
("client_id", 1),
|
||||
("created_at", -1),
|
||||
("status", 1)
|
||||
], name="jobs_client_history_idx")
|
||||
|
||||
|
||||
# Sparse index for error tracking
|
||||
await self.db.jobs.create_index([
|
||||
("status", 1),
|
||||
("error", 1)
|
||||
], sparse=True, name="jobs_error_tracking_idx")
|
||||
|
||||
|
||||
# Users collection optimizations
|
||||
|
||||
|
||||
# Index for active user queries
|
||||
await self.db.users.create_index([
|
||||
("is_active", 1),
|
||||
("role", 1),
|
||||
("last_login_at", -1)
|
||||
], name="users_active_role_login_idx")
|
||||
|
||||
|
||||
# Index for user search by email pattern
|
||||
await self.db.users.create_index([
|
||||
("email", "text"),
|
||||
("first_name", "text"),
|
||||
("last_name", "text")
|
||||
], name="users_search_idx")
|
||||
|
||||
|
||||
# Audit logs collection optimizations
|
||||
|
||||
|
||||
# Compound index for security monitoring
|
||||
await self.db.audit_logs.create_index([
|
||||
("severity", 1),
|
||||
("action", 1),
|
||||
("timestamp", -1)
|
||||
], name="audit_security_monitoring_idx")
|
||||
|
||||
|
||||
# Index for user activity analysis
|
||||
await self.db.audit_logs.create_index([
|
||||
("user_id", 1),
|
||||
("action", 1),
|
||||
("timestamp", -1)
|
||||
], name="audit_user_activity_idx")
|
||||
|
||||
|
||||
# Index for resource access tracking
|
||||
await self.db.audit_logs.create_index([
|
||||
("resource_type", 1),
|
||||
|
|
@ -81,30 +81,30 @@ class Migration(Migration):
|
|||
("action", 1),
|
||||
("timestamp", -1)
|
||||
], name="audit_resource_access_idx")
|
||||
|
||||
|
||||
# Sparse index for failed operations
|
||||
await self.db.audit_logs.create_index([
|
||||
("success", 1),
|
||||
("timestamp", -1)
|
||||
], sparse=True, name="audit_failures_idx")
|
||||
|
||||
|
||||
# Add TTL index for automatic audit log cleanup (optional)
|
||||
# Uncomment if you want automatic cleanup after 2 years
|
||||
# await self.db.audit_logs.create_index(
|
||||
# [("timestamp", 1)],
|
||||
# [("timestamp", 1)],
|
||||
# expireAfterSeconds=63072000, # 2 years
|
||||
# name="audit_ttl_idx"
|
||||
# )
|
||||
|
||||
|
||||
print(f"✅ Applied migration {self.version}: {self.description}")
|
||||
|
||||
|
||||
async def down(self) -> None:
|
||||
"""Remove the optimized indexes."""
|
||||
|
||||
|
||||
# Drop the indexes we created
|
||||
indexes_to_drop = [
|
||||
"jobs_status_updated_client_idx",
|
||||
"jobs_queue_processing_idx",
|
||||
"jobs_queue_processing_idx",
|
||||
"jobs_client_history_idx",
|
||||
"jobs_error_tracking_idx",
|
||||
"users_active_role_login_idx",
|
||||
|
|
@ -114,21 +114,21 @@ class Migration(Migration):
|
|||
"audit_resource_access_idx",
|
||||
"audit_failures_idx"
|
||||
]
|
||||
|
||||
|
||||
for index_name in indexes_to_drop:
|
||||
try:
|
||||
await self.db.jobs.drop_index(index_name)
|
||||
except Exception:
|
||||
pass # Index might not exist on this collection
|
||||
|
||||
|
||||
try:
|
||||
await self.db.users.drop_index(index_name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
try:
|
||||
await self.db.audit_logs.drop_index(index_name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print(f"⚠️ Rolled back migration {self.version}: {self.description}")
|
||||
|
||||
print(f"⚠️ Rolled back migration {self.version}: {self.description}")
|
||||
|
|
|
|||
|
|
@ -1,20 +1,21 @@
|
|||
"""Migrate audit log schema from basic to comprehensive format."""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from app.migrations.migrator import Migration
|
||||
|
||||
|
||||
class Migration(Migration):
|
||||
"""Update audit log schema to comprehensive format."""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.version = "2025-08-17-120002"
|
||||
self.description = "Update audit log schema from basic to comprehensive format"
|
||||
|
||||
|
||||
async def up(self) -> None:
|
||||
"""Migrate existing audit logs to new schema format."""
|
||||
|
||||
|
||||
# Find all existing audit logs with old schema
|
||||
old_logs_cursor = self.db.audit_logs.find({
|
||||
# Look for logs that have the old schema structure
|
||||
|
|
@ -24,9 +25,9 @@ class Migration(Migration):
|
|||
{"timestamp": {"$exists": False}} # Missing new timestamp field
|
||||
]
|
||||
})
|
||||
|
||||
|
||||
migration_count = 0
|
||||
|
||||
|
||||
async for old_log in old_logs_cursor:
|
||||
try:
|
||||
# Map old fields to new schema
|
||||
|
|
@ -38,82 +39,82 @@ class Migration(Migration):
|
|||
"description": old_log.get("action", "Legacy action"),
|
||||
"success": True,
|
||||
"environment": "prod",
|
||||
"service_name": "accessible-video-api",
|
||||
"service_name": "accessible-video-api",
|
||||
"api_version": "v1"
|
||||
}
|
||||
|
||||
|
||||
# Map optional fields if they exist
|
||||
if "user_id" in old_log:
|
||||
new_log["user_id"] = old_log["user_id"]
|
||||
|
||||
|
||||
if "job_id" in old_log:
|
||||
new_log["resource_type"] = "job"
|
||||
new_log["resource_id"] = old_log["job_id"]
|
||||
|
||||
|
||||
if "ip_address" in old_log:
|
||||
new_log["ip_address"] = old_log["ip_address"]
|
||||
|
||||
|
||||
if "user_agent" in old_log:
|
||||
new_log["user_agent"] = old_log["user_agent"]
|
||||
|
||||
|
||||
if "details" in old_log:
|
||||
new_log["details"] = old_log["details"]
|
||||
|
||||
|
||||
# Replace the old document with the new schema
|
||||
await self.db.audit_logs.replace_one(
|
||||
{"_id": old_log["_id"]},
|
||||
new_log
|
||||
)
|
||||
|
||||
|
||||
migration_count += 1
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error migrating audit log {old_log.get('_id')}: {e}")
|
||||
continue
|
||||
|
||||
|
||||
print(f"✅ Applied migration {self.version}: Migrated {migration_count} audit log records")
|
||||
|
||||
|
||||
def _map_old_action(self, old_action: str) -> str:
|
||||
"""Map old action strings to new AuditAction enum values."""
|
||||
action_mapping = {
|
||||
# Job actions
|
||||
"job_created": "job.create",
|
||||
"job_approved": "job.approve",
|
||||
"job_approved": "job.approve",
|
||||
"job_rejected": "job.reject",
|
||||
"job_updated": "job.update",
|
||||
"job_cancelled": "job.cancel",
|
||||
|
||||
|
||||
# Auth actions
|
||||
"login": "auth.login.success",
|
||||
"logout": "auth.logout",
|
||||
"login_failed": "auth.login.failure",
|
||||
|
||||
|
||||
# File actions
|
||||
"file_uploaded": "file.upload",
|
||||
"file_downloaded": "file.download",
|
||||
|
||||
|
||||
# VTT actions
|
||||
"vtt_edited": "vtt.edit",
|
||||
|
||||
|
||||
# Admin actions
|
||||
"user_created": "user.create",
|
||||
"user_updated": "user.update",
|
||||
"user_deleted": "user.delete",
|
||||
}
|
||||
|
||||
|
||||
return action_mapping.get(old_action, old_action)
|
||||
|
||||
|
||||
async def down(self) -> None:
|
||||
"""Rollback to old audit log schema format (limited)."""
|
||||
|
||||
|
||||
# Find all audit logs with new schema
|
||||
new_logs_cursor = self.db.audit_logs.find({
|
||||
"timestamp": {"$exists": True},
|
||||
"action": {"$exists": True}
|
||||
})
|
||||
|
||||
|
||||
rollback_count = 0
|
||||
|
||||
|
||||
async for new_log in new_logs_cursor:
|
||||
try:
|
||||
# Map new fields back to old schema (lossy conversion)
|
||||
|
|
@ -122,34 +123,34 @@ class Migration(Migration):
|
|||
"when": new_log["timestamp"],
|
||||
"action": new_log["action"]
|
||||
}
|
||||
|
||||
|
||||
# Map back optional fields
|
||||
if "user_id" in new_log:
|
||||
old_log["user_id"] = new_log["user_id"]
|
||||
|
||||
|
||||
if "resource_type" in new_log and new_log["resource_type"] == "job":
|
||||
old_log["job_id"] = new_log.get("resource_id")
|
||||
|
||||
|
||||
if "ip_address" in new_log:
|
||||
old_log["ip_address"] = new_log["ip_address"]
|
||||
|
||||
|
||||
if "user_agent" in new_log:
|
||||
old_log["user_agent"] = new_log["user_agent"]
|
||||
|
||||
|
||||
if "details" in new_log:
|
||||
old_log["details"] = new_log["details"]
|
||||
|
||||
|
||||
# Replace with old schema
|
||||
await self.db.audit_logs.replace_one(
|
||||
{"_id": new_log["_id"]},
|
||||
old_log
|
||||
)
|
||||
|
||||
|
||||
rollback_count += 1
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error rolling back audit log {new_log.get('_id')}: {e}")
|
||||
continue
|
||||
|
||||
|
||||
print(f"⚠️ Rolled back migration {self.version}: Reverted {rollback_count} audit log records")
|
||||
print("⚠️ WARNING: Some audit log data may have been lost due to schema differences")
|
||||
print("⚠️ WARNING: Some audit log data may have been lost due to schema differences")
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ class Migration(Migration):
|
|||
|
||||
# Create index on auth_provider for faster queries
|
||||
await self.db.users.create_index([("auth_provider", 1)])
|
||||
print(f"✅ Created index on auth_provider field")
|
||||
print("✅ Created index on auth_provider field")
|
||||
|
||||
print(f"✅ Applied migration {self.version}: {self.description}")
|
||||
|
||||
|
|
@ -34,7 +34,7 @@ class Migration(Migration):
|
|||
# Drop the index
|
||||
try:
|
||||
await self.db.users.drop_index("auth_provider_1")
|
||||
print(f"✅ Dropped index on auth_provider field")
|
||||
print("✅ Dropped index on auth_provider field")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not drop index: {e}")
|
||||
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ class Migration(Migration):
|
|||
"validationLevel": "moderate", # moderate = only validate on insert/update, not existing docs
|
||||
"validationAction": "error" # error = reject invalid documents
|
||||
})
|
||||
print(f"✅ Updated users collection validator")
|
||||
print("✅ Updated users collection validator")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not update validator: {e}")
|
||||
# Try creating the collection if it doesn't exist
|
||||
|
|
@ -86,7 +86,7 @@ class Migration(Migration):
|
|||
validationLevel="moderate",
|
||||
validationAction="error"
|
||||
)
|
||||
print(f"✅ Created users collection with validator")
|
||||
print("✅ Created users collection with validator")
|
||||
except Exception as e2:
|
||||
print(f"⚠️ Could not create collection: {e2}")
|
||||
|
||||
|
|
@ -136,4 +136,4 @@ class Migration(Migration):
|
|||
})
|
||||
|
||||
print(f"⚠️ Rolled back migration {self.version}: {self.description}")
|
||||
print(f"⚠️ WARNING: Production role users will fail validation!")
|
||||
print("⚠️ WARNING: Production role users will fail validation!")
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ class Migration(Migration):
|
|||
"validationLevel": "moderate",
|
||||
"validationAction": "error"
|
||||
})
|
||||
print(f" Updated jobs collection validator")
|
||||
print(" Updated jobs collection validator")
|
||||
except Exception as e:
|
||||
print(f" Could not update validator: {e}")
|
||||
raise
|
||||
|
|
@ -101,4 +101,4 @@ class Migration(Migration):
|
|||
})
|
||||
|
||||
print(f" Rolled back migration {self.version}: {self.description}")
|
||||
print(f" WARNING: Jobs with approved_source or qc_feedback status will fail validation!")
|
||||
print(" WARNING: Jobs with approved_source or qc_feedback status will fail validation!")
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ class Migration(Migration):
|
|||
"validationLevel": "moderate",
|
||||
"validationAction": "error"
|
||||
})
|
||||
print(f" Updated jobs collection validator")
|
||||
print(" Updated jobs collection validator")
|
||||
except Exception as e:
|
||||
print(f" Could not update validator: {e}")
|
||||
raise
|
||||
|
|
@ -104,4 +104,4 @@ class Migration(Migration):
|
|||
})
|
||||
|
||||
print(f" Rolled back migration {self.version}: {self.description}")
|
||||
print(f" WARNING: Jobs with rendering_video status will fail validation!")
|
||||
print(" WARNING: Jobs with rendering_video status will fail validation!")
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ class Migration(Migration):
|
|||
"validationLevel": "moderate",
|
||||
"validationAction": "error"
|
||||
})
|
||||
print(f" Updated jobs collection validator")
|
||||
print(" Updated jobs collection validator")
|
||||
except Exception as e:
|
||||
print(f" Could not update validator: {e}")
|
||||
raise
|
||||
|
|
@ -111,4 +111,4 @@ class Migration(Migration):
|
|||
})
|
||||
|
||||
print(f" Rolled back migration {self.version}: {self.description}")
|
||||
print(f" WARNING: Jobs with tts_failed or render_failed status will fail validation!")
|
||||
print(" WARNING: Jobs with tts_failed or render_failed status will fail validation!")
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ class Migration(Migration):
|
|||
"validationLevel": "moderate",
|
||||
"validationAction": "error"
|
||||
})
|
||||
print(f" Updated jobs collection validator")
|
||||
print(" Updated jobs collection validator")
|
||||
except Exception as e:
|
||||
print(f" Could not update validator: {e}")
|
||||
raise
|
||||
|
|
@ -114,4 +114,4 @@ class Migration(Migration):
|
|||
})
|
||||
|
||||
print(f" Rolled back migration {self.version}: {self.description}")
|
||||
print(f" WARNING: Jobs with rendering_qc status will fail validation!")
|
||||
print(" WARNING: Jobs with rendering_qc status will fail validation!")
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ class Migration(Migration):
|
|||
"validationLevel": "moderate",
|
||||
"validationAction": "error"
|
||||
})
|
||||
print(f"✅ Updated users collection validator")
|
||||
print("✅ Updated users collection validator")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not update validator: {e}")
|
||||
try:
|
||||
|
|
@ -74,7 +74,7 @@ class Migration(Migration):
|
|||
validationLevel="moderate",
|
||||
validationAction="error"
|
||||
)
|
||||
print(f"✅ Created users collection with validator")
|
||||
print("✅ Created users collection with validator")
|
||||
except Exception as e2:
|
||||
print(f"⚠️ Could not create collection: {e2}")
|
||||
|
||||
|
|
@ -134,4 +134,4 @@ class Migration(Migration):
|
|||
})
|
||||
|
||||
print(f"⚠️ Rolled back migration {self.version}: {self.description}")
|
||||
print(f"⚠️ WARNING: Linguist role users will fail validation!")
|
||||
print("⚠️ WARNING: Linguist role users will fail validation!")
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ class Migration(Migration):
|
|||
"validationLevel": "moderate",
|
||||
"validationAction": "error"
|
||||
})
|
||||
print(f"✅ Updated users collection validator")
|
||||
print("✅ Updated users collection validator")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not update validator: {e}")
|
||||
try:
|
||||
|
|
@ -79,7 +79,7 @@ class Migration(Migration):
|
|||
validationLevel="moderate",
|
||||
validationAction="error"
|
||||
)
|
||||
print(f"✅ Created users collection with validator")
|
||||
print("✅ Created users collection with validator")
|
||||
except Exception as e2:
|
||||
print(f"⚠️ Could not create collection: {e2}")
|
||||
|
||||
|
|
@ -139,4 +139,4 @@ class Migration(Migration):
|
|||
})
|
||||
|
||||
print(f"⚠️ Rolled back migration {self.version}: {self.description}")
|
||||
print(f"⚠️ WARNING: project_manager role users will fail validation!")
|
||||
print("⚠️ WARNING: project_manager role users will fail validation!")
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
"""Backfill memberships collection from existing pm_client_ids and team.member_user_ids."""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from app.migrations.migrator import Migration
|
||||
|
||||
|
|
@ -13,7 +13,7 @@ class Migration(Migration):
|
|||
self.description = "Backfill memberships from pm_client_ids and team member lists"
|
||||
|
||||
async def up(self) -> None:
|
||||
now = datetime.now(timezone.utc)
|
||||
now = datetime.now(UTC)
|
||||
upserted = 0
|
||||
|
||||
# 1. PROJECT_MANAGER users → MANAGER membership for each pm_client_id
|
||||
|
|
|
|||
|
|
@ -0,0 +1,53 @@
|
|||
"""Add PROCESSING_FAILED status to job schema validator and create failure indexes."""
|
||||
from app.migrations.migrator import Migration
|
||||
|
||||
|
||||
class Migration(Migration):
|
||||
version = "2026-04-29-000000"
|
||||
description = "Add processing_failed status and failure/status compound indexes on jobs"
|
||||
|
||||
async def up(self) -> None:
|
||||
db = self.db
|
||||
|
||||
# Add processing_failed to the schema validator enum (if validator exists)
|
||||
try:
|
||||
validator_info = await db.command(
|
||||
"listCollections", filter={"name": "jobs"}
|
||||
)
|
||||
collections = [c async for c in validator_info["cursor"]]
|
||||
if collections and collections[0].get("options", {}).get("validator"):
|
||||
existing_validator = collections[0]["options"]["validator"]
|
||||
status_path = (
|
||||
existing_validator.get("$jsonSchema", {})
|
||||
.get("properties", {})
|
||||
.get("status", {})
|
||||
.get("enum", [])
|
||||
)
|
||||
if status_path and "processing_failed" not in status_path:
|
||||
status_path.append("processing_failed")
|
||||
await db.command(
|
||||
"collMod",
|
||||
"jobs",
|
||||
validator=existing_validator,
|
||||
validationAction="warn",
|
||||
)
|
||||
except Exception:
|
||||
# No validator or unsupported — skip gracefully
|
||||
pass
|
||||
|
||||
# Indexes for failure dashboard queries
|
||||
await db.jobs.create_index(
|
||||
[("failure.step", 1), ("status", 1)],
|
||||
name="idx_jobs_failure_step_status",
|
||||
background=True,
|
||||
)
|
||||
await db.jobs.create_index(
|
||||
[("status", 1), ("organization_id", 1), ("created_at", -1)],
|
||||
name="idx_jobs_status_org_created",
|
||||
background=True,
|
||||
)
|
||||
|
||||
async def down(self) -> None:
|
||||
db = self.db
|
||||
await db.jobs.drop_index("idx_jobs_failure_step_status")
|
||||
await db.jobs.drop_index("idx_jobs_status_org_created")
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
"""Create job_briefs collection with indexes."""
|
||||
from app.migrations.migrator import Migration
|
||||
|
||||
|
||||
class Migration(Migration):
|
||||
version = "2026-04-29-000001"
|
||||
description = "Create job_briefs collection and indexes"
|
||||
|
||||
async def up(self) -> None:
|
||||
db = self.db
|
||||
|
||||
# Ensure collection exists (insert + delete a dummy doc)
|
||||
try:
|
||||
await db.create_collection("job_briefs")
|
||||
except Exception:
|
||||
pass # already exists
|
||||
|
||||
await db.job_briefs.create_index(
|
||||
[("organization_id", 1), ("status", 1), ("created_at", -1)],
|
||||
name="idx_briefs_org_status_created",
|
||||
background=True,
|
||||
)
|
||||
await db.job_briefs.create_index(
|
||||
[("created_by", 1)],
|
||||
name="idx_briefs_created_by",
|
||||
background=True,
|
||||
)
|
||||
await db.job_briefs.create_index(
|
||||
[("project_id", 1)],
|
||||
name="idx_briefs_project_id",
|
||||
background=True,
|
||||
sparse=True,
|
||||
)
|
||||
await db.job_briefs.create_index(
|
||||
[("job_id", 1)],
|
||||
name="idx_briefs_job_id",
|
||||
background=True,
|
||||
sparse=True,
|
||||
)
|
||||
|
||||
async def down(self) -> None:
|
||||
db = self.db
|
||||
await db.job_briefs.drop_index("idx_briefs_org_status_created")
|
||||
await db.job_briefs.drop_index("idx_briefs_created_by")
|
||||
await db.job_briefs.drop_index("idx_briefs_project_id")
|
||||
await db.job_briefs.drop_index("idx_briefs_job_id")
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
"""Backfill Membership.team_ids from Team.member_user_ids (MT-17)."""
|
||||
from app.migrations.migrator import Migration
|
||||
|
||||
|
||||
class Migration(Migration):
|
||||
version = "2026-04-30-000000"
|
||||
description = "Backfill team_ids on Membership records from Team.member_user_ids"
|
||||
|
||||
async def up(self) -> None:
|
||||
db = self.db
|
||||
upserted = 0
|
||||
|
||||
# For each team that has member_user_ids, push team_id into the matching Membership
|
||||
async for team in db.teams.find(
|
||||
{"member_user_ids": {"$exists": True, "$ne": []}},
|
||||
{"_id": 1, "client_id": 1, "member_user_ids": 1},
|
||||
):
|
||||
team_id = str(team["_id"])
|
||||
org_id = str(team.get("client_id", ""))
|
||||
for user_id in team.get("member_user_ids", []):
|
||||
result = await db.memberships.update_one(
|
||||
{"user_id": str(user_id), "organization_id": org_id},
|
||||
{"$addToSet": {"team_ids": team_id}},
|
||||
)
|
||||
if result.modified_count:
|
||||
upserted += 1
|
||||
|
||||
# Ensure index for efficient team-based lookups
|
||||
await db.memberships.create_index(
|
||||
[("team_ids", 1)],
|
||||
name="idx_memberships_team_ids",
|
||||
background=True,
|
||||
sparse=True,
|
||||
)
|
||||
|
||||
print(f"✅ Backfilled team_ids on {upserted} Membership records")
|
||||
|
||||
async def down(self) -> None:
|
||||
db = self.db
|
||||
await db.memberships.update_many({}, {"$unset": {"team_ids": ""}})
|
||||
try:
|
||||
await db.memberships.drop_index("idx_memberships_team_ids")
|
||||
except Exception:
|
||||
pass
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
"""Add cancelled status to job schema validator."""
|
||||
from app.migrations.migrator import Migration
|
||||
|
||||
|
||||
class Migration(Migration):
|
||||
version = "2026-04-30-000001"
|
||||
description = "Add cancelled status to jobs collection schema validator"
|
||||
|
||||
async def up(self) -> None:
|
||||
db = self.db
|
||||
|
||||
try:
|
||||
validator_info = await db.command(
|
||||
"listCollections", filter={"name": "jobs"}
|
||||
)
|
||||
collections = [c async for c in validator_info["cursor"]]
|
||||
if collections and collections[0].get("options", {}).get("validator"):
|
||||
existing_validator = collections[0]["options"]["validator"]
|
||||
status_path = (
|
||||
existing_validator.get("$jsonSchema", {})
|
||||
.get("properties", {})
|
||||
.get("status", {})
|
||||
.get("enum", [])
|
||||
)
|
||||
if status_path and "cancelled" not in status_path:
|
||||
status_path.append("cancelled")
|
||||
await db.command(
|
||||
"collMod",
|
||||
"jobs",
|
||||
validator=existing_validator,
|
||||
validationAction="warn",
|
||||
)
|
||||
except Exception:
|
||||
# No validator or unsupported — skip gracefully
|
||||
pass
|
||||
|
||||
async def down(self) -> None:
|
||||
pass
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
"""Replace status enum in $jsonSchema validator with the full current list."""
|
||||
from app.migrations.migrator import Migration
|
||||
|
||||
ALL_STATUSES = [
|
||||
"created", "ingesting", "ai_processing",
|
||||
"pending_qc", "approved_english", "approved_source",
|
||||
"rejected", "qc_feedback",
|
||||
"translating", "tts_generating", "tts_failed",
|
||||
"rendering_video", "render_failed", "rendering_qc",
|
||||
"pending_final_review", "completed",
|
||||
"processing_failed", "cancelled",
|
||||
]
|
||||
|
||||
|
||||
class Migration(Migration):
|
||||
version = "2026-04-30-000002"
|
||||
description = "Fix status enum in jobs $jsonSchema validator (add processing_failed + cancelled)"
|
||||
|
||||
async def up(self) -> None:
|
||||
db = self.db
|
||||
|
||||
result = await db.command("listCollections", filter={"name": "jobs"})
|
||||
batch = result.get("cursor", {}).get("firstBatch", [])
|
||||
if not batch:
|
||||
return
|
||||
|
||||
existing_validator = batch[0].get("options", {}).get("validator")
|
||||
if not existing_validator:
|
||||
return
|
||||
|
||||
schema = existing_validator.get("$jsonSchema", {})
|
||||
status_prop = schema.get("properties", {}).get("status")
|
||||
if not status_prop:
|
||||
return
|
||||
|
||||
status_prop["enum"] = ALL_STATUSES
|
||||
|
||||
await db.command(
|
||||
"collMod",
|
||||
"jobs",
|
||||
validator=existing_validator,
|
||||
validationLevel="moderate",
|
||||
validationAction="error",
|
||||
)
|
||||
|
||||
async def down(self) -> None:
|
||||
pass
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
"""Backfill source_has_ad=False on existing jobs and job_briefs."""
|
||||
from app.migrations.migrator import Migration
|
||||
|
||||
|
||||
class Migration(Migration):
|
||||
version = "2026-05-08-000000"
|
||||
description = "Add source_has_ad field to jobs.source and job_briefs"
|
||||
|
||||
async def up(self) -> None:
|
||||
db = self.db
|
||||
|
||||
jobs_result = await db.jobs.update_many(
|
||||
{"source.source_has_ad": {"$exists": False}},
|
||||
{"$set": {"source.source_has_ad": False}},
|
||||
)
|
||||
briefs_result = await db.job_briefs.update_many(
|
||||
{"source_has_ad": {"$exists": False}},
|
||||
{"$set": {"source_has_ad": False}},
|
||||
)
|
||||
|
||||
print(f"✅ Backfilled source_has_ad on {jobs_result.modified_count} jobs, {briefs_result.modified_count} job_briefs")
|
||||
|
||||
async def down(self) -> None:
|
||||
db = self.db
|
||||
await db.jobs.update_many({}, {"$unset": {"source.source_has_ad": ""}})
|
||||
await db.job_briefs.update_many({}, {"$unset": {"source_has_ad": ""}})
|
||||
|
|
@ -1,17 +1,18 @@
|
|||
"""Audit log model for tracking sensitive operations."""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Optional
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from bson import ObjectId
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .user import PyObjectId
|
||||
|
||||
|
||||
class AuditAction(str, Enum):
|
||||
class AuditAction(StrEnum):
|
||||
"""Enumeration of auditable actions."""
|
||||
|
||||
|
||||
# Authentication actions
|
||||
LOGIN_SUCCESS = "auth.login.success"
|
||||
LOGIN_FAILURE = "auth.login.failure"
|
||||
|
|
@ -19,7 +20,7 @@ class AuditAction(str, Enum):
|
|||
TOKEN_REFRESH = "auth.token.refresh"
|
||||
PASSWORD_CHANGE = "auth.password.change"
|
||||
PASSWORD_RESET = "auth.password.reset"
|
||||
|
||||
|
||||
# User management actions
|
||||
USER_CREATE = "user.create"
|
||||
USER_UPDATE = "user.update"
|
||||
|
|
@ -27,7 +28,7 @@ class AuditAction(str, Enum):
|
|||
USER_ROLE_CHANGE = "user.role.change"
|
||||
USER_ACTIVATE = "user.activate"
|
||||
USER_DEACTIVATE = "user.deactivate"
|
||||
|
||||
|
||||
# Job management actions
|
||||
JOB_CREATE = "job.create"
|
||||
JOB_UPDATE = "job.update"
|
||||
|
|
@ -36,17 +37,21 @@ class AuditAction(str, Enum):
|
|||
JOB_REJECT = "job.reject"
|
||||
JOB_CANCEL = "job.cancel"
|
||||
JOB_STATUS_CHANGE = "job.status.change"
|
||||
|
||||
JOB_TASK_FAILED = "job.task.failed"
|
||||
JOB_RETRY = "job.retry"
|
||||
JOB_BULK_RETRY = "job.bulk_retry"
|
||||
|
||||
# File operations
|
||||
FILE_UPLOAD = "file.upload"
|
||||
FILE_DOWNLOAD = "file.download"
|
||||
FILE_DELETE = "file.delete"
|
||||
FILE_ACCESS = "file.access"
|
||||
|
||||
|
||||
# VTT editing actions
|
||||
VTT_EDIT = "vtt.edit"
|
||||
VTT_APPROVE = "vtt.approve"
|
||||
VTT_REJECT = "vtt.reject"
|
||||
VTT_RETRANSLATE = "vtt.retranslate"
|
||||
|
||||
# Per-language QC actions
|
||||
LANGUAGE_QC_ASSIGN = "language_qc.assign"
|
||||
|
|
@ -59,19 +64,62 @@ class AuditAction(str, Enum):
|
|||
LANGUAGE_QC_REJECT = "language_qc.reject"
|
||||
LANGUAGE_QC_REOPEN = "language_qc.reopen"
|
||||
LANGUAGE_QC_COMMENT = "language_qc.comment"
|
||||
|
||||
|
||||
# Admin actions
|
||||
ADMIN_CONFIG_CHANGE = "admin.config.change"
|
||||
ADMIN_SYSTEM_ACTION = "admin.system.action"
|
||||
ADMIN_DATA_EXPORT = "admin.data.export"
|
||||
ADMIN_AUDIT_ACCESS = "admin.audit.access"
|
||||
|
||||
|
||||
# Glossary management
|
||||
GLOSSARY_UPLOAD = "glossary.upload"
|
||||
GLOSSARY_VERSION_UPLOAD = "glossary.version.upload"
|
||||
GLOSSARY_ACTIVATE = "glossary.activate"
|
||||
GLOSSARY_ARCHIVE = "glossary.archive"
|
||||
|
||||
# Client management
|
||||
CLIENT_CREATE = "client.create"
|
||||
CLIENT_UPDATE = "client.update"
|
||||
CLIENT_DEACTIVATE = "client.deactivate"
|
||||
CLIENT_PM_ASSIGN = "client.pm_assign"
|
||||
CLIENT_PM_REMOVE = "client.pm_remove"
|
||||
CLIENT_TEAM_CREATE = "client.team_create"
|
||||
CLIENT_TEAM_UPDATE = "client.team_update"
|
||||
CLIENT_TEAM_DELETE = "client.team_delete"
|
||||
CLIENT_TEAM_MEMBER_ADD = "client.team_member_add"
|
||||
CLIENT_TEAM_MEMBER_REMOVE = "client.team_member_remove"
|
||||
CLIENT_PROJECT_CREATE = "client.project_create"
|
||||
CLIENT_PROJECT_UPDATE = "client.project_update"
|
||||
CLIENT_PROJECT_ARCHIVE = "client.project_archive"
|
||||
|
||||
# Organization management
|
||||
ORG_CREATE = "org.create"
|
||||
ORG_UPDATE = "org.update"
|
||||
ORG_MEMBER_ADD = "org.member_add"
|
||||
ORG_MEMBER_UPDATE = "org.member_update"
|
||||
ORG_MEMBER_REMOVE = "org.member_remove"
|
||||
|
||||
# Invitations
|
||||
INVITATION_CREATE = "invitation.create"
|
||||
INVITATION_REVOKE = "invitation.revoke"
|
||||
INVITATION_ACCEPT = "invitation.accept"
|
||||
|
||||
# Language QC (additional)
|
||||
LANGUAGE_QC_BULK_ASSIGN = "language_qc.bulk_assign"
|
||||
LANGUAGE_QC_START_WORK = "language_qc.start_work"
|
||||
LANGUAGE_QC_MARK_CUE_REVIEWED = "language_qc.mark_cue_reviewed"
|
||||
|
||||
# Brief management
|
||||
BRIEF_CREATE = "brief.create"
|
||||
BRIEF_UPDATE = "brief.update"
|
||||
BRIEF_SUBMIT = "brief.submit"
|
||||
BRIEF_APPROVE = "brief.approve"
|
||||
|
||||
# Share tokens
|
||||
SHARE_TOKEN_CREATE = "share.token_create"
|
||||
SHARE_TOKEN_REVOKE = "share.token_revoke"
|
||||
SHARE_CLIENT_DECISION = "share.client_decision"
|
||||
|
||||
# Security events
|
||||
RATE_LIMIT_EXCEEDED = "security.rate_limit.exceeded"
|
||||
VALIDATION_FAILURE = "security.validation.failure"
|
||||
|
|
@ -79,9 +127,9 @@ class AuditAction(str, Enum):
|
|||
SUSPICIOUS_ACTIVITY = "security.suspicious.activity"
|
||||
|
||||
|
||||
class AuditLogSeverity(str, Enum):
|
||||
class AuditLogSeverity(StrEnum):
|
||||
"""Severity levels for audit events."""
|
||||
|
||||
|
||||
INFO = "info" # Normal operations
|
||||
WARNING = "warning" # Suspicious but not critical
|
||||
ERROR = "error" # Failed operations
|
||||
|
|
@ -90,43 +138,43 @@ class AuditLogSeverity(str, Enum):
|
|||
|
||||
class AuditLog(BaseModel):
|
||||
"""Audit log entry model."""
|
||||
|
||||
id: Optional[PyObjectId] = Field(default_factory=lambda: str(ObjectId()), alias="_id")
|
||||
|
||||
|
||||
id: PyObjectId | None = Field(default_factory=lambda: str(ObjectId()), alias="_id")
|
||||
|
||||
# Core audit fields
|
||||
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
||||
action: AuditAction
|
||||
severity: AuditLogSeverity = AuditLogSeverity.INFO
|
||||
|
||||
|
||||
# Actor information
|
||||
user_id: Optional[PyObjectId] = None
|
||||
user_email: Optional[str] = None
|
||||
user_role: Optional[str] = None
|
||||
|
||||
user_id: PyObjectId | None = None
|
||||
user_email: str | None = None
|
||||
user_role: str | None = None
|
||||
|
||||
# Request context
|
||||
ip_address: Optional[str] = None
|
||||
user_agent: Optional[str] = None
|
||||
request_id: Optional[str] = None
|
||||
session_id: Optional[str] = None
|
||||
|
||||
ip_address: str | None = None
|
||||
user_agent: str | None = None
|
||||
request_id: str | None = None
|
||||
session_id: str | None = None
|
||||
|
||||
# Resource information
|
||||
resource_type: Optional[str] = None # e.g., "job", "user", "file"
|
||||
resource_id: Optional[str] = None
|
||||
resource_name: Optional[str] = None
|
||||
|
||||
resource_type: str | None = None # e.g., "job", "user", "file"
|
||||
resource_id: str | None = None
|
||||
resource_name: str | None = None
|
||||
|
||||
# Action details
|
||||
description: str
|
||||
details: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
details: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
# Outcome
|
||||
success: bool = True
|
||||
error_message: Optional[str] = None
|
||||
|
||||
error_message: str | None = None
|
||||
|
||||
# Additional metadata
|
||||
environment: str = "prod"
|
||||
service_name: str = "accessible-video-api"
|
||||
api_version: str = "v1"
|
||||
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
arbitrary_types_allowed = True
|
||||
|
|
@ -135,49 +183,49 @@ class AuditLog(BaseModel):
|
|||
|
||||
class AuditLogCreate(BaseModel):
|
||||
"""Schema for creating audit log entries."""
|
||||
|
||||
|
||||
action: AuditAction
|
||||
severity: AuditLogSeverity = AuditLogSeverity.INFO
|
||||
description: str
|
||||
|
||||
|
||||
# Optional fields that can be provided
|
||||
user_id: Optional[PyObjectId] = None
|
||||
user_email: Optional[str] = None
|
||||
user_role: Optional[str] = None
|
||||
ip_address: Optional[str] = None
|
||||
user_agent: Optional[str] = None
|
||||
request_id: Optional[str] = None
|
||||
resource_type: Optional[str] = None
|
||||
resource_id: Optional[str] = None
|
||||
resource_name: Optional[str] = None
|
||||
details: Dict[str, Any] = Field(default_factory=dict)
|
||||
user_id: PyObjectId | None = None
|
||||
user_email: str | None = None
|
||||
user_role: str | None = None
|
||||
ip_address: str | None = None
|
||||
user_agent: str | None = None
|
||||
request_id: str | None = None
|
||||
resource_type: str | None = None
|
||||
resource_id: str | None = None
|
||||
resource_name: str | None = None
|
||||
details: dict[str, Any] = Field(default_factory=dict)
|
||||
success: bool = True
|
||||
error_message: Optional[str] = None
|
||||
error_message: str | None = None
|
||||
|
||||
|
||||
class AuditLogQuery(BaseModel):
|
||||
"""Schema for querying audit logs."""
|
||||
|
||||
|
||||
# Time range
|
||||
start_date: Optional[datetime] = None
|
||||
end_date: Optional[datetime] = None
|
||||
|
||||
start_date: datetime | None = None
|
||||
end_date: datetime | None = None
|
||||
|
||||
# Filters
|
||||
action: Optional[AuditAction] = None
|
||||
severity: Optional[AuditLogSeverity] = None
|
||||
user_id: Optional[PyObjectId] = None
|
||||
user_email: Optional[str] = None
|
||||
resource_type: Optional[str] = None
|
||||
resource_id: Optional[str] = None
|
||||
success: Optional[bool] = None
|
||||
|
||||
action: AuditAction | None = None
|
||||
severity: AuditLogSeverity | None = None
|
||||
user_id: PyObjectId | None = None
|
||||
user_email: str | None = None
|
||||
resource_type: str | None = None
|
||||
resource_id: str | None = None
|
||||
success: bool | None = None
|
||||
|
||||
# Search
|
||||
search: Optional[str] = None # Full-text search in description and details
|
||||
|
||||
search: str | None = None # Full-text search in description and details
|
||||
|
||||
# Pagination
|
||||
skip: int = 0
|
||||
limit: int = 100
|
||||
|
||||
|
||||
# Sorting
|
||||
sort_by: str = "timestamp"
|
||||
sort_order: int = -1 # -1 for descending, 1 for ascending
|
||||
|
|
@ -185,7 +233,7 @@ class AuditLogQuery(BaseModel):
|
|||
|
||||
class AuditLogResponse(BaseModel):
|
||||
"""Response schema for audit log queries."""
|
||||
|
||||
|
||||
logs: list[AuditLog]
|
||||
total_count: int
|
||||
page: int
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from datetime import datetime
|
||||
from typing import Optional, Annotated
|
||||
from typing import Annotated
|
||||
|
||||
from bson import ObjectId
|
||||
from pydantic import BaseModel, BeforeValidator
|
||||
|
|
@ -17,12 +17,12 @@ PyObjectId = Annotated[str, BeforeValidator(validate_object_id)]
|
|||
|
||||
|
||||
class Client(BaseModel):
|
||||
id: Optional[str] = None
|
||||
id: str | None = None
|
||||
name: str
|
||||
slug: str
|
||||
is_active: bool = True
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
created_at: datetime | None = None
|
||||
updated_at: datetime | None = None
|
||||
|
||||
|
||||
class ClientCreate(BaseModel):
|
||||
|
|
@ -31,18 +31,18 @@ class ClientCreate(BaseModel):
|
|||
|
||||
|
||||
class ClientUpdate(BaseModel):
|
||||
name: Optional[str] = None
|
||||
slug: Optional[str] = None
|
||||
is_active: Optional[bool] = None
|
||||
name: str | None = None
|
||||
slug: str | None = None
|
||||
is_active: bool | None = None
|
||||
|
||||
|
||||
class Team(BaseModel):
|
||||
id: Optional[str] = None
|
||||
id: str | None = None
|
||||
name: str
|
||||
client_id: str
|
||||
member_user_ids: list[str] = []
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
created_at: datetime | None = None
|
||||
updated_at: datetime | None = None
|
||||
|
||||
|
||||
class TeamCreate(BaseModel):
|
||||
|
|
@ -50,31 +50,31 @@ class TeamCreate(BaseModel):
|
|||
|
||||
|
||||
class TeamUpdate(BaseModel):
|
||||
name: Optional[str] = None
|
||||
name: str | None = None
|
||||
|
||||
|
||||
class Project(BaseModel):
|
||||
id: Optional[str] = None
|
||||
id: str | None = None
|
||||
name: str
|
||||
client_id: str
|
||||
is_active: bool = True
|
||||
default_languages: list[str] = []
|
||||
default_linguist_id: Optional[str] = None
|
||||
default_reviewer_id: Optional[str] = None
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
default_linguist_id: str | None = None
|
||||
default_reviewer_id: str | None = None
|
||||
created_at: datetime | None = None
|
||||
updated_at: datetime | None = None
|
||||
|
||||
|
||||
class ProjectCreate(BaseModel):
|
||||
name: str
|
||||
default_languages: list[str] = []
|
||||
default_linguist_id: Optional[str] = None
|
||||
default_reviewer_id: Optional[str] = None
|
||||
default_linguist_id: str | None = None
|
||||
default_reviewer_id: str | None = None
|
||||
|
||||
|
||||
class ProjectUpdate(BaseModel):
|
||||
name: Optional[str] = None
|
||||
is_active: Optional[bool] = None
|
||||
default_languages: Optional[list[str]] = None
|
||||
default_linguist_id: Optional[str] = None
|
||||
default_reviewer_id: Optional[str] = None
|
||||
name: str | None = None
|
||||
is_active: bool | None = None
|
||||
default_languages: list[str] | None = None
|
||||
default_linguist_id: str | None = None
|
||||
default_reviewer_id: str | None = None
|
||||
|
|
|
|||
|
|
@ -91,6 +91,9 @@ class GlossaryResponse(BaseModel):
|
|||
source: GlossarySource
|
||||
status: GlossaryStatus
|
||||
current_version_id: str | None = None
|
||||
current_version_embedding_status: EmbeddingStatus | None = None
|
||||
current_version_embedded_count: int | None = None
|
||||
current_version_term_count: int | None = None
|
||||
created_at: datetime
|
||||
created_by: str
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, EmailStr
|
||||
|
||||
|
|
@ -7,7 +6,7 @@ from .organization import OrgRole
|
|||
|
||||
|
||||
class Invitation(BaseModel):
|
||||
id: Optional[str] = None
|
||||
id: str | None = None
|
||||
email: str
|
||||
organization_id: str
|
||||
role_in_org: OrgRole
|
||||
|
|
@ -15,9 +14,9 @@ class Invitation(BaseModel):
|
|||
token_hash: str
|
||||
invited_by_user_id: str
|
||||
expires_at: datetime
|
||||
accepted_at: Optional[datetime] = None
|
||||
revoked_at: Optional[datetime] = None
|
||||
created_at: Optional[datetime] = None
|
||||
accepted_at: datetime | None = None
|
||||
revoked_at: datetime | None = None
|
||||
created_at: datetime | None = None
|
||||
|
||||
|
||||
class InvitationCreate(BaseModel):
|
||||
|
|
@ -40,9 +39,9 @@ class InvitationPreviewResponse(BaseModel):
|
|||
|
||||
class InvitationAcceptRequest(BaseModel):
|
||||
token: str
|
||||
full_name: Optional[str] = None
|
||||
password: Optional[str] = None
|
||||
ms_id_token: Optional[str] = None
|
||||
full_name: str | None = None
|
||||
password: str | None = None
|
||||
ms_id_token: str | None = None
|
||||
|
||||
|
||||
class InvitationResponse(BaseModel):
|
||||
|
|
@ -52,9 +51,9 @@ class InvitationResponse(BaseModel):
|
|||
role_in_org: OrgRole
|
||||
invited_by_user_id: str
|
||||
expires_at: datetime
|
||||
accepted_at: Optional[datetime] = None
|
||||
revoked_at: Optional[datetime] = None
|
||||
created_at: Optional[datetime] = None
|
||||
accepted_at: datetime | None = None
|
||||
revoked_at: datetime | None = None
|
||||
created_at: datetime | None = None
|
||||
is_expired: bool = False
|
||||
is_accepted: bool = False
|
||||
is_revoked: bool = False
|
||||
|
|
|
|||
|
|
@ -1,11 +1,13 @@
|
|||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Literal, Optional
|
||||
from enum import StrEnum
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field, constr
|
||||
|
||||
FailureStep = Literal["ingestion", "ai_processing", "translation", "tts", "render"]
|
||||
|
||||
class JobStatus(str, Enum):
|
||||
|
||||
class JobStatus(StrEnum):
|
||||
CREATED = "created"
|
||||
INGESTING = "ingesting"
|
||||
AI_PROCESSING = "ai_processing"
|
||||
|
|
@ -16,12 +18,14 @@ class JobStatus(str, Enum):
|
|||
QC_FEEDBACK = "qc_feedback"
|
||||
TRANSLATING = "translating"
|
||||
TTS_GENERATING = "tts_generating"
|
||||
TTS_FAILED = "tts_failed" # TTS synthesis failed after retries, requires reprocessing
|
||||
TTS_FAILED = "tts_failed" # legacy: use PROCESSING_FAILED + failure.step="tts" for new failures
|
||||
RENDERING_VIDEO = "rendering_video" # Accessible video rendering in progress
|
||||
RENDER_FAILED = "render_failed" # Accessible video rendering failed, requires reprocessing
|
||||
RENDER_FAILED = "render_failed" # legacy: use PROCESSING_FAILED + failure.step="render" for new failures
|
||||
PROCESSING_FAILED = "processing_failed" # unified failure status; see Job.failure for step details
|
||||
RENDERING_QC = "rendering_qc" # Re-rendering accessible video during QC review
|
||||
PENDING_FINAL_REVIEW = "pending_final_review"
|
||||
COMPLETED = "completed"
|
||||
CANCELLED = "cancelled"
|
||||
|
||||
@classmethod
|
||||
def is_approved(cls, status: str) -> bool:
|
||||
|
|
@ -29,14 +33,24 @@ class JobStatus(str, Enum):
|
|||
return status in [cls.APPROVED_ENGLISH.value, cls.APPROVED_SOURCE.value]
|
||||
|
||||
|
||||
class JobFailure(BaseModel):
|
||||
step: FailureStep
|
||||
type: str
|
||||
message: str
|
||||
retriable: bool = True
|
||||
occurred_at: datetime
|
||||
retry_count: int = 0
|
||||
|
||||
|
||||
class Source(BaseModel):
|
||||
filename: str
|
||||
original_filename: Optional[str] = None
|
||||
original_filename: str | None = None
|
||||
gcs_uri: str
|
||||
duration_s: Optional[float] = None
|
||||
duration_s: float | None = None
|
||||
language: constr(min_length=2, max_length=10) = "en" # Final source language (from detection or explicit)
|
||||
language_hint: Optional[str] = None # User-provided hint for non-English videos
|
||||
detected_language: Optional[str] = None # AI-detected language from Gemini
|
||||
language_hint: str | None = None # User-provided hint for non-English videos
|
||||
detected_language: str | None = None # AI-detected language from Gemini
|
||||
source_has_ad: bool = False # Source video already contains professional audio descriptions
|
||||
|
||||
|
||||
class TTSPreferences(BaseModel):
|
||||
|
|
@ -50,10 +64,10 @@ class TTSPreferences(BaseModel):
|
|||
style_preset: Literal[
|
||||
"neutral", "calm", "energetic", "professional", "warm", "documentary", "custom"
|
||||
] = "neutral"
|
||||
custom_style_prompt: Optional[str] = None # Used when style_preset is "custom"
|
||||
custom_style_prompt: str | None = None # Used when style_preset is "custom"
|
||||
# ElevenLabs-specific settings
|
||||
stability: Optional[float] = None # 0.0-1.0, default 0.5 when used
|
||||
similarity_boost: Optional[float] = None # 0.0-1.0, default 0.5 when used
|
||||
stability: float | None = None # 0.0-1.0, default 0.5 when used
|
||||
similarity_boost: float | None = None # 0.0-1.0, default 0.5 when used
|
||||
|
||||
|
||||
class RequestedOutputs(BaseModel):
|
||||
|
|
@ -61,22 +75,24 @@ class RequestedOutputs(BaseModel):
|
|||
audio_description_vtt: bool = True
|
||||
audio_description_mp3: bool = True
|
||||
accessible_video_mp4: bool = False # Rendered video with embedded audio descriptions
|
||||
accessible_video_method: Optional[Literal["overlay", "pause_insert"]] = None # User-selected method
|
||||
accessible_video_method: Literal["overlay", "pause_insert"] | None = None # User-selected method
|
||||
sdh_vtt: bool = False # SDH (Subtitles for Deaf and Hard of Hearing) captions with speaker labels, sound effects, music notation
|
||||
descriptive_transcript: bool = False # WCAG-compliant combined speech+description transcript text file
|
||||
languages: list[str] = []
|
||||
transcreation: list[str] = []
|
||||
tts_preferences: Optional[TTSPreferences] = None
|
||||
translation_mode: Literal["traditional", "video_native"] = "video_native"
|
||||
tts_preferences: TTSPreferences | None = None
|
||||
translation_mode: Literal["traditional", "video_native"] = "traditional"
|
||||
|
||||
|
||||
class PausePointData(BaseModel):
|
||||
"""Pause point timing data for accessible video editing during QC."""
|
||||
cue_index: int # AD cue index this pause point belongs to
|
||||
original_ms: float # Rendered timeline position (ms) - for UI display
|
||||
source_ms: Optional[float] = None # Source video cut point (ms) - for re-rendering (None = use original_ms)
|
||||
adjusted_ms: Optional[float] = None # User-adjusted timestamp (ms), None = use original
|
||||
source_ms: float | None = None # Source video cut point (ms) - for re-rendering (None = use original_ms)
|
||||
adjusted_ms: float | None = None # User-adjusted timestamp (ms), None = use original
|
||||
min_bound_ms: float # Minimum allowed value (end of previous AD segment)
|
||||
max_bound_ms: float # Maximum allowed value (start of next AD segment)
|
||||
natural_gap_ms: float = 0.0 # Natural silence already present at pause point (ms); used to size silence buffers
|
||||
|
||||
|
||||
class VideoSegmentMetadata(BaseModel):
|
||||
|
|
@ -87,16 +103,16 @@ class VideoSegmentMetadata(BaseModel):
|
|||
gcs_uri: str # GCS path to segment MP4
|
||||
duration_ms: float # Actual segment duration (ms)
|
||||
is_freeze_frame: bool = False # True if this is a freeze frame segment with AD audio
|
||||
cue_index: Optional[int] = None # AD cue index (only for freeze frame segments)
|
||||
cue_index: int | None = None # AD cue index (only for freeze frame segments)
|
||||
|
||||
|
||||
class TTSRegenerationRequest(BaseModel):
|
||||
"""Request to regenerate TTS for a specific cue during QC."""
|
||||
cue_index: int
|
||||
requested_at: datetime
|
||||
new_text: Optional[str] = None # If provided, use this text instead of current VTT
|
||||
new_text: str | None = None # If provided, use this text instead of current VTT
|
||||
status: Literal["pending", "processing", "completed", "failed"] = "pending"
|
||||
error_message: Optional[str] = None
|
||||
error_message: str | None = None
|
||||
|
||||
|
||||
class AccessibleVideoEditState(BaseModel):
|
||||
|
|
@ -104,45 +120,45 @@ class AccessibleVideoEditState(BaseModel):
|
|||
pause_points: list[PausePointData] = []
|
||||
video_segments: list[VideoSegmentMetadata] = []
|
||||
tts_regeneration_queue: list[TTSRegenerationRequest] = []
|
||||
last_render_at: Optional[datetime] = None
|
||||
last_render_at: datetime | None = None
|
||||
whisper_refine_enabled: bool = False # Default: off (user enables if cue positions changed)
|
||||
|
||||
|
||||
class LangOutput(BaseModel):
|
||||
captions_vtt_gcs: Optional[str] = None
|
||||
sdh_captions_vtt_gcs: Optional[str] = None # SDH-format captions (speaker labels, sound effects, music)
|
||||
ad_vtt_gcs: Optional[str] = None
|
||||
ad_mp3_gcs: Optional[str] = None
|
||||
captions_vtt_gcs: str | None = None
|
||||
sdh_captions_vtt_gcs: str | None = None # SDH-format captions (speaker labels, sound effects, music)
|
||||
ad_vtt_gcs: str | None = None
|
||||
ad_mp3_gcs: str | None = None
|
||||
# Accessible video outputs
|
||||
accessible_video_gcs: Optional[str] = None # Rendered accessible MP4
|
||||
accessible_video_method: Optional[Literal["overlay", "pause_insert"]] = None
|
||||
retimed_captions_vtt_gcs: Optional[str] = None # Re-timed captions for pause-insert method
|
||||
ad_cues_gcs_prefix: Optional[str] = None # GCS path prefix for per-cue MP3 segments
|
||||
ad_cue_manifest: Optional[list[dict]] = None # Per-cue manifest: [{cue_index, gcs_uri, text, duration_s}]
|
||||
accessible_video_gcs: str | None = None # Rendered accessible MP4
|
||||
accessible_video_method: Literal["overlay", "pause_insert"] | None = None
|
||||
retimed_captions_vtt_gcs: str | None = None # Re-timed captions for pause-insert method
|
||||
ad_cues_gcs_prefix: str | None = None # GCS path prefix for per-cue MP3 segments
|
||||
ad_cue_manifest: list[dict] | None = None # Per-cue manifest: [{cue_index, gcs_uri, text, duration_s}]
|
||||
# QC editing state for accessible video
|
||||
video_segments_gcs_prefix: Optional[str] = None # GCS prefix for persisted video segments
|
||||
accessible_video_edit_state: Optional[AccessibleVideoEditState] = None
|
||||
origin: Optional[Literal["translate", "transcreate", "gemini_translate", "video_native"]] = None
|
||||
qa_notes: Optional[str] = None
|
||||
descriptive_transcript_gcs: Optional[str] = None # WCAG-compliant combined speech+description transcript
|
||||
video_segments_gcs_prefix: str | None = None # GCS prefix for persisted video segments
|
||||
accessible_video_edit_state: AccessibleVideoEditState | None = None
|
||||
origin: Literal["translate", "transcreate", "gemini_translate", "video_native"] | None = None
|
||||
qa_notes: str | None = None
|
||||
descriptive_transcript_gcs: str | None = None # WCAG-compliant combined speech+description transcript
|
||||
|
||||
|
||||
class ReviewHistoryItem(BaseModel):
|
||||
at: datetime
|
||||
status: str
|
||||
by: Optional[str] = None
|
||||
notes: Optional[str] = None
|
||||
by: str | None = None
|
||||
notes: str | None = None
|
||||
|
||||
|
||||
class Review(BaseModel):
|
||||
notes: Optional[str] = ""
|
||||
reviewer_id: Optional[str] = None
|
||||
notes: str | None = ""
|
||||
reviewer_id: str | None = None
|
||||
history: list[ReviewHistoryItem] = []
|
||||
|
||||
|
||||
# ── Per-language QC ───────────────────────────────────────────────────────────
|
||||
|
||||
class LanguageQCStatus(str, Enum):
|
||||
class LanguageQCStatus(StrEnum):
|
||||
PENDING = "pending"
|
||||
IN_PROGRESS = "in_progress" # linguist is working
|
||||
PENDING_REVIEW = "pending_review" # linguist submitted, awaiting reviewer
|
||||
|
|
@ -162,8 +178,8 @@ class LanguageQCEvent(BaseModel):
|
|||
"approve", "reject", "reopen",
|
||||
"comment_added",
|
||||
]
|
||||
notes: Optional[str] = None
|
||||
previous_assignee_id: Optional[str] = None
|
||||
notes: str | None = None
|
||||
previous_assignee_id: str | None = None
|
||||
|
||||
|
||||
class LanguageQCComment(BaseModel):
|
||||
|
|
@ -178,25 +194,29 @@ class LanguageQCComment(BaseModel):
|
|||
class LanguageQCState(BaseModel):
|
||||
status: LanguageQCStatus = LanguageQCStatus.PENDING
|
||||
# Linguist slot
|
||||
assigned_linguist_id: Optional[str] = None
|
||||
assigned_linguist_email: Optional[str] = None
|
||||
assigned_linguist_name: Optional[str] = None
|
||||
assigned_at: Optional[datetime] = None
|
||||
assigned_by_user_id: Optional[str] = None
|
||||
submitted_for_review_at: Optional[datetime] = None
|
||||
linguist_deadline: Optional[datetime] = None # when linguist must submit
|
||||
assigned_linguist_id: str | None = None
|
||||
assigned_linguist_email: str | None = None
|
||||
assigned_linguist_name: str | None = None
|
||||
assigned_at: datetime | None = None
|
||||
assigned_by_user_id: str | None = None
|
||||
submitted_for_review_at: datetime | None = None
|
||||
linguist_deadline: datetime | None = None # when linguist must submit
|
||||
# Reviewer slot
|
||||
assigned_reviewer_id: Optional[str] = None
|
||||
assigned_reviewer_email: Optional[str] = None
|
||||
assigned_reviewer_name: Optional[str] = None
|
||||
assigned_reviewer_at: Optional[datetime] = None
|
||||
review_started_at: Optional[datetime] = None
|
||||
reviewer_deadline: Optional[datetime] = None # when reviewer must decide
|
||||
assigned_reviewer_id: str | None = None
|
||||
assigned_reviewer_email: str | None = None
|
||||
assigned_reviewer_name: str | None = None
|
||||
assigned_reviewer_at: datetime | None = None
|
||||
review_started_at: datetime | None = None
|
||||
reviewer_deadline: datetime | None = None # when reviewer must decide
|
||||
# Reviewer progress
|
||||
total_cues: int | None = None # set when reviewer opens the job
|
||||
reviewed_cues: int = 0 # incremented as reviewer marks cues reviewed
|
||||
# Final outcome
|
||||
reviewed_at: Optional[datetime] = None
|
||||
reviewed_by_user_id: Optional[str] = None
|
||||
reviewed_by_email: Optional[str] = None
|
||||
notes: Optional[str] = None
|
||||
reviewed_at: datetime | None = None
|
||||
reviewed_by_user_id: str | None = None
|
||||
reviewed_by_email: str | None = None
|
||||
notes: str | None = None
|
||||
reject_category: str | None = None # e.g. timing/mistranslation/terminology/profanity/length
|
||||
history: list[LanguageQCEvent] = []
|
||||
comments: list[LanguageQCComment] = []
|
||||
|
||||
|
|
@ -209,39 +229,47 @@ class QCAssignment(BaseModel):
|
|||
|
||||
|
||||
class AISection(BaseModel):
|
||||
ingestion_json: Optional[dict[str, Any]] = None
|
||||
confidence: Optional[float] = None
|
||||
ingestion_json: dict[str, Any] | None = None
|
||||
confidence: float | None = None
|
||||
|
||||
|
||||
class AccessibleVideoProgressItem(BaseModel):
|
||||
"""Progress tracking for accessible video rendering per language."""
|
||||
status: Literal["pending", "rendering", "completed", "failed"] = "pending"
|
||||
method: Optional[Literal["overlay", "pause_insert"]] = None
|
||||
error_message: Optional[str] = None
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
method: Literal["overlay", "pause_insert"] | None = None
|
||||
error_message: str | None = None
|
||||
started_at: datetime | None = None
|
||||
completed_at: datetime | None = None
|
||||
|
||||
|
||||
class Job(BaseModel):
|
||||
id: Optional[str] = Field(None, alias="_id")
|
||||
id: str | None = Field(None, alias="_id")
|
||||
client_id: str
|
||||
title: str
|
||||
source: Source
|
||||
requested_outputs: RequestedOutputs
|
||||
status: JobStatus = JobStatus.CREATED
|
||||
review: Review = Review()
|
||||
outputs: Optional[dict[str, LangOutput]] = None
|
||||
accessible_video_progress: Optional[dict[str, AccessibleVideoProgressItem]] = None
|
||||
ai: Optional[AISection] = None
|
||||
error: Optional[dict[str, Any]] = None
|
||||
tts_rewrites: Optional[list[dict[str, Any]]] = None # Track auto-rewritten TTS cues
|
||||
project_id: Optional[str] = None # Platform project this job belongs to (Client → Project → Job)
|
||||
brand_context: Optional[str] = None # Brand names present in the video for accurate product identification
|
||||
cost_tracker_project_id: Optional[str] = None # External project ID for AI cost attribution
|
||||
outputs: dict[str, LangOutput] | None = None
|
||||
accessible_video_progress: dict[str, AccessibleVideoProgressItem] | None = None
|
||||
ai: AISection | None = None
|
||||
error: dict[str, Any] | None = None
|
||||
failure: JobFailure | None = None # structured failure info; see failure.step for pipeline stage
|
||||
retry_count: int = 0 # total number of manual retries attempted
|
||||
tts_rewrites: list[dict[str, Any]] | None = None # Track auto-rewritten TTS cues
|
||||
project_id: str | None = None # Platform project this job belongs to (Client → Project → Job)
|
||||
organization_id: str | None = None # org-tenant ID; backfilled by 2026-04-28-000003 migration
|
||||
brief_id: str | None = None # JobBrief that originated this job (W-12)
|
||||
gcs_prefix: str | None = None # GCS path prefix; None = legacy flat {job_id}/ layout
|
||||
initial_linguist_id: str | None = None
|
||||
initial_reviewer_id: str | None = None
|
||||
brand_context: str | None = None # Brand names present in the video for accurate product identification
|
||||
cost_tracker_project_id: str | None = None # External project ID for AI cost attribution
|
||||
deadline: datetime | None = None # job-level PM deadline (overdue if past and not completed)
|
||||
language_qc: dict[str, LanguageQCState] = {} # per-language QC state, keyed by lang code
|
||||
qc_assignments: list[QCAssignment] = [] # denormalized for linguist-queue queries
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
created_at: datetime | None = None
|
||||
updated_at: datetime | None = None
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
|
|
@ -251,15 +279,17 @@ class Job(BaseModel):
|
|||
class JobCreate(BaseModel):
|
||||
title: str
|
||||
source_is_english: bool = True # True = English source, False = other language (auto-detect)
|
||||
language_hint: Optional[str] = None # Optional hint when source_is_english=False
|
||||
language_hint: str | None = None # Optional hint when source_is_english=False
|
||||
requested_outputs: RequestedOutputs
|
||||
brand_context: Optional[str] = None # Comma-separated brand names present in the video (e.g. "Sellotape, Coca-Cola")
|
||||
brand_context: str | None = None # Comma-separated brand names present in the video (e.g. "Sellotape, Coca-Cola")
|
||||
source_has_ad: bool = False # Source video already contains professional audio descriptions
|
||||
|
||||
|
||||
class JobUpdate(BaseModel):
|
||||
title: Optional[str] = None
|
||||
status: Optional[JobStatus] = None
|
||||
review: Optional[Review] = None
|
||||
outputs: Optional[dict[str, LangOutput]] = None
|
||||
ai: Optional[AISection] = None
|
||||
error: Optional[dict[str, Any]] = None
|
||||
title: str | None = None
|
||||
status: JobStatus | None = None
|
||||
review: Review | None = None
|
||||
outputs: dict[str, LangOutput] | None = None
|
||||
ai: AISection | None = None
|
||||
error: dict[str, Any] | None = None
|
||||
deadline: datetime | None = None
|
||||
|
|
|
|||
75
backend/app/models/job_brief.py
Normal file
75
backend/app/models/job_brief.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
"""Job Brief model — pre-approved work order submitted before job creation."""
|
||||
from datetime import datetime
|
||||
from enum import StrEnum
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .job import RequestedOutputs
|
||||
|
||||
|
||||
class BriefStatus(StrEnum):
|
||||
DRAFT = "draft"
|
||||
SUBMITTED = "submitted"
|
||||
APPROVED = "approved"
|
||||
REJECTED = "rejected"
|
||||
FULFILLED = "fulfilled"
|
||||
|
||||
|
||||
class JobBrief(BaseModel):
|
||||
id: str | None = Field(None, alias="_id")
|
||||
organization_id: str
|
||||
project_id: str | None = None
|
||||
title: str
|
||||
description: str | None = None
|
||||
requested_outputs: RequestedOutputs
|
||||
languages: list[str] = []
|
||||
deadline: datetime | None = None
|
||||
status: BriefStatus = BriefStatus.DRAFT
|
||||
created_by: str
|
||||
job_id: str | None = None
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
updated_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
submitted_at: datetime | None = None
|
||||
approved_by: str | None = None
|
||||
reject_reason: str | None = None
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
|
||||
|
||||
class JobBriefCreate(BaseModel):
|
||||
title: str
|
||||
description: str | None = None
|
||||
requested_outputs: RequestedOutputs
|
||||
languages: list[str] = []
|
||||
deadline: datetime | None = None
|
||||
project_id: str | None = None
|
||||
assignee_id: str | None = None
|
||||
source_has_ad: bool = False # Source video already contains professional audio descriptions
|
||||
|
||||
|
||||
class JobBriefUpdate(BaseModel):
|
||||
title: str | None = None
|
||||
description: str | None = None
|
||||
requested_outputs: RequestedOutputs | None = None
|
||||
languages: list[str] | None = None
|
||||
deadline: datetime | None = None
|
||||
|
||||
|
||||
class JobBriefResponse(BaseModel):
|
||||
id: str
|
||||
organization_id: str
|
||||
project_id: str | None = None
|
||||
title: str
|
||||
description: str | None = None
|
||||
requested_outputs: RequestedOutputs
|
||||
languages: list[str]
|
||||
deadline: datetime | None = None
|
||||
status: BriefStatus
|
||||
created_by: str
|
||||
assignee_id: str | None = None
|
||||
job_id: str | None = None
|
||||
created_at: str
|
||||
updated_at: str
|
||||
submitted_at: str | None = None
|
||||
approved_by: str | None = None
|
||||
|
|
@ -1,5 +1,4 @@
|
|||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
|
@ -7,12 +6,13 @@ from .organization import OrgRole
|
|||
|
||||
|
||||
class Membership(BaseModel):
|
||||
id: Optional[str] = None
|
||||
id: str | None = None
|
||||
user_id: str
|
||||
organization_id: str
|
||||
role_in_org: OrgRole
|
||||
created_at: Optional[datetime] = None
|
||||
created_by: Optional[str] = None
|
||||
team_ids: list[str] = [] # teams the user belongs to within this org (MT-17)
|
||||
created_at: datetime | None = None
|
||||
created_by: str | None = None
|
||||
|
||||
|
||||
class MembershipCreate(BaseModel):
|
||||
|
|
@ -31,4 +31,4 @@ class MemberDetail(BaseModel):
|
|||
email: str
|
||||
full_name: str
|
||||
role_in_org: OrgRole
|
||||
created_at: Optional[datetime] = None
|
||||
created_at: datetime | None = None
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
from enum import StrEnum
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class OrgRole(str, Enum):
|
||||
class OrgRole(StrEnum):
|
||||
OWNER = "owner"
|
||||
ADMIN = "admin"
|
||||
MANAGER = "manager"
|
||||
|
|
@ -30,13 +29,13 @@ class OrgRole(str, Enum):
|
|||
|
||||
|
||||
class Organization(BaseModel):
|
||||
id: Optional[str] = None
|
||||
id: str | None = None
|
||||
name: str
|
||||
slug: str
|
||||
is_active: bool = True
|
||||
plan: str = "standard"
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
created_at: datetime | None = None
|
||||
updated_at: datetime | None = None
|
||||
|
||||
|
||||
class OrganizationCreate(BaseModel):
|
||||
|
|
@ -45,7 +44,7 @@ class OrganizationCreate(BaseModel):
|
|||
|
||||
|
||||
class OrganizationUpdate(BaseModel):
|
||||
name: Optional[str] = None
|
||||
slug: Optional[str] = None
|
||||
is_active: Optional[bool] = None
|
||||
plan: Optional[str] = None
|
||||
name: str | None = None
|
||||
slug: str | None = None
|
||||
is_active: bool | None = None
|
||||
plan: str | None = None
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
"""Review Note model for timestamped video review notes."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
|
@ -9,7 +8,7 @@ from pydantic import BaseModel, Field
|
|||
class ReviewNote(BaseModel):
|
||||
"""A timestamped note attached to a video asset during review."""
|
||||
|
||||
id: Optional[str] = Field(None, alias="_id")
|
||||
id: str | None = Field(None, alias="_id")
|
||||
job_id: str
|
||||
asset_key: str # e.g., "en", "es", "en_accessible"
|
||||
timestamp_seconds: float # Video timestamp when note was created
|
||||
|
|
@ -17,7 +16,7 @@ class ReviewNote(BaseModel):
|
|||
user_id: str # Author's user ID
|
||||
user_name: str # Author's display name (denormalized for display)
|
||||
created_at: datetime
|
||||
updated_at: Optional[datetime] = None
|
||||
updated_at: datetime | None = None
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
|
|
|
|||
26
backend/app/models/share_token.py
Normal file
26
backend/app/models/share_token.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class ShareToken(BaseModel):
|
||||
id: str | None = None # token itself (32 hex chars), used as _id
|
||||
job_id: str
|
||||
organization_id: str
|
||||
created_by_user_id: str
|
||||
created_by_email: str
|
||||
created_at: datetime | None = None
|
||||
expires_at: datetime | None = None
|
||||
is_active: bool = True
|
||||
label: str | None = None # human-readable note e.g. "Sent to ACME 2026-05-01"
|
||||
|
||||
|
||||
class ShareTokenResponse(BaseModel):
|
||||
id: str
|
||||
job_id: str
|
||||
created_by_email: str
|
||||
created_at: datetime
|
||||
expires_at: datetime | None = None
|
||||
is_active: bool
|
||||
label: str | None = None
|
||||
share_url: str # full public URL, assembled server-side
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional, Annotated
|
||||
from enum import StrEnum
|
||||
from typing import Annotated
|
||||
|
||||
from bson import ObjectId
|
||||
from pydantic import BaseModel, EmailStr, Field, BeforeValidator
|
||||
from pydantic import BaseModel, BeforeValidator, EmailStr, Field
|
||||
|
||||
|
||||
def validate_object_id(v) -> str:
|
||||
|
|
@ -18,7 +18,7 @@ def validate_object_id(v) -> str:
|
|||
PyObjectId = Annotated[str, BeforeValidator(validate_object_id)]
|
||||
|
||||
|
||||
class UserRole(str, Enum):
|
||||
class UserRole(StrEnum):
|
||||
CLIENT = "client"
|
||||
REVIEWER = "reviewer"
|
||||
LINGUIST = "linguist"
|
||||
|
|
@ -27,22 +27,23 @@ class UserRole(str, Enum):
|
|||
ADMIN = "admin"
|
||||
|
||||
|
||||
class AuthProvider(str, Enum):
|
||||
class AuthProvider(StrEnum):
|
||||
LOCAL = "local"
|
||||
MICROSOFT = "microsoft"
|
||||
|
||||
|
||||
class User(BaseModel):
|
||||
id: Optional[PyObjectId] = Field(None, alias="_id")
|
||||
id: PyObjectId | None = Field(None, alias="_id")
|
||||
email: EmailStr
|
||||
hashed_password: Optional[str] = None # Optional for Microsoft users
|
||||
hashed_password: str | None = None # Optional for Microsoft users
|
||||
full_name: str
|
||||
role: UserRole = UserRole.CLIENT
|
||||
auth_provider: AuthProvider = AuthProvider.LOCAL
|
||||
is_active: bool = True
|
||||
pm_client_ids: list[str] = [] # Client IDs where this user is Project Manager (admin-assigned)
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
languages: list[str] = [] # BCP-47 language codes the user is competent in (R-8)
|
||||
created_at: datetime | None = None
|
||||
updated_at: datetime | None = None
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
|
|
@ -61,8 +62,9 @@ class UserCreate(BaseModel):
|
|||
|
||||
|
||||
class UserUpdate(BaseModel):
|
||||
email: Optional[EmailStr] = None
|
||||
full_name: Optional[str] = None
|
||||
role: Optional[UserRole] = None
|
||||
is_active: Optional[bool] = None
|
||||
pm_client_ids: Optional[list[str]] = None
|
||||
email: EmailStr | None = None
|
||||
full_name: str | None = None
|
||||
role: UserRole | None = None
|
||||
is_active: bool | None = None
|
||||
pm_client_ids: list[str] | None = None
|
||||
languages: list[str] | None = None
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
from datetime import datetime
|
||||
from typing import Literal, Optional
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
VttKind = Literal["captions", "ad"]
|
||||
|
||||
|
||||
|
|
@ -13,7 +12,7 @@ class VttVersionActor(BaseModel):
|
|||
|
||||
|
||||
class VttVersion(BaseModel):
|
||||
id: Optional[str] = Field(None, alias="_id")
|
||||
id: str | None = Field(None, alias="_id")
|
||||
job_id: str
|
||||
lang: str
|
||||
kind: VttKind
|
||||
|
|
@ -22,8 +21,8 @@ class VttVersion(BaseModel):
|
|||
gcs_uri: str
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
created_by: VttVersionActor
|
||||
note: Optional[str] = None
|
||||
parent_version: Optional[int] = None
|
||||
note: str | None = None
|
||||
parent_version: int | None = None
|
||||
cue_count: int = 0
|
||||
byte_size: int = 0
|
||||
|
||||
|
|
@ -33,7 +32,7 @@ class VttVersion(BaseModel):
|
|||
|
||||
class VttVersionSummary(BaseModel):
|
||||
"""Lightweight version entry for list responses (no content)."""
|
||||
id: Optional[str] = Field(None, alias="_id")
|
||||
id: str | None = Field(None, alias="_id")
|
||||
job_id: str
|
||||
lang: str
|
||||
kind: VttKind
|
||||
|
|
@ -41,8 +40,8 @@ class VttVersionSummary(BaseModel):
|
|||
gcs_uri: str
|
||||
created_at: datetime
|
||||
created_by: VttVersionActor
|
||||
note: Optional[str] = None
|
||||
parent_version: Optional[int] = None
|
||||
note: str | None = None
|
||||
parent_version: int | None = None
|
||||
cue_count: int = 0
|
||||
byte_size: int = 0
|
||||
|
||||
|
|
@ -58,8 +57,8 @@ class VttVersionListResponse(BaseModel):
|
|||
class DiffLine(BaseModel):
|
||||
type: Literal["unchanged", "added", "removed"]
|
||||
content: str
|
||||
line_no_old: Optional[int] = None
|
||||
line_no_new: Optional[int] = None
|
||||
line_no_old: int | None = None
|
||||
line_no_new: int | None = None
|
||||
|
||||
|
||||
class VttDiffResponse(BaseModel):
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ You are given a video. Return a JSON object with:
|
|||
- captions_vtt: a valid WebVTT file as a single string, with accurate timings and no styling (in the detected language)
|
||||
- audio_description_vtt: a valid WebVTT file as a single string, describing key visual elements (no spoilers), synchronized with the program (MUST be written in the detected language)
|
||||
{SDH_FIELD}
|
||||
{SOURCE_HAS_AD}
|
||||
|
||||
CRITICAL LANGUAGE REQUIREMENT:
|
||||
- First, detect the language spoken in the video
|
||||
|
|
@ -36,7 +37,7 @@ CRITICAL TIMING REQUIREMENTS:
|
|||
- Each caption cue should end exactly when the speaker finishes that phrase/sentence
|
||||
- Listen carefully to detect natural speech pauses and word boundaries
|
||||
- Avoid starting captions too early or ending them too late
|
||||
- Ensure captions align with lip movement and speech rhythm
|
||||
- Caption ALL audible speech — include off-screen narrators, voiceover, and any speaker not visible on screen. Do NOT omit speech because the speaker is not visible or because it plays over non-dialogue segments.
|
||||
- For audio descriptions, time them during natural speech gaps or over non-dialogue audio
|
||||
- Validate that all timestamps are monotonically increasing (each cue starts after the previous one ends)
|
||||
|
||||
|
|
@ -57,6 +58,14 @@ CAPTION FORMATTING (DCMP standard):
|
|||
- Minimum caption duration: approximately 1.3 seconds. Maximum: 6 seconds
|
||||
- Use mixed case. Use ALL CAPS only for screaming or shouting
|
||||
|
||||
DISFLUENCY REMOVAL (DCMP §6.01):
|
||||
- MANDATORY: Never include filler words, false starts, or hesitations in captions — remove them silently
|
||||
- English fillers to remove: "um", "uh", "ah", "er", "hmm", "you know", "I mean", "sort of", "kind of", "basically", "literally", "honestly"
|
||||
- Language-specific fillers: French "euh"/"beh"/"ben"/"genre", German "äh"/"ähm"/"halt"/"also", Spanish "eh"/"este"/"o sea"/"pues", Italian "ehm"/"allora"/"cioè"/"tipo", Dutch "eh"/"nou"/"zeg"/"eigenlijk", Portuguese "ahn"/"né"/"sabe"/"tipo"
|
||||
- Remove false starts when the speaker self-corrects immediately (e.g., "I was — I went to the store" → "I went to the store")
|
||||
- Do NOT remove meaningful repetition, emphasis, or intentional stylistic choices
|
||||
- When in doubt whether a word is a filler or content: omit it — clean captions are preferred over over-inclusive ones
|
||||
|
||||
SOUND AND MUSIC FORMATTING (DCMP standard):
|
||||
- Sound effects: lowercase in square brackets — e.g., [door slams], [footsteps approaching]
|
||||
- Use present participle for sustained sounds: [dog barking]; use third person for abrupt sounds: [dog barks]
|
||||
|
|
@ -69,7 +78,9 @@ SOUND AND MUSIC FORMATTING (DCMP standard):
|
|||
|
||||
CAPTION PLACEMENT:
|
||||
- Captions are normally positioned at the bottom of the screen
|
||||
- When visible text, graphics, logos, or on-screen information appear at the bottom of the frame during a caption cue, add the VTT cue setting "line:0%" to move that caption to the top — format: "00:00:01.000 --> 00:00:03.000 line:0%"
|
||||
- CRITICAL: When ANY of the following are visible at the BOTTOM of the frame during a caption cue — on-screen text, lower-thirds, name plates, location titles, graphics, logos, product labels, URLs, or any visual information — you MUST add the VTT cue setting "line:0%" to move that cue to the top of the screen. Format: "00:00:01.000 --> 00:00:03.000 line:0%"
|
||||
- When in doubt whether bottom content conflicts with captions, use "line:0%" — it is better to be at the top than to obstruct important on-screen information
|
||||
- Example: if a lower-third name plate is visible at seconds 0:05–0:08, all caption cues overlapping that range must have "line:0%"
|
||||
|
||||
ETHICAL GUIDELINES FOR DESCRIBING PEOPLE (DCMP standard):
|
||||
- Consistently identify people/characters by name. When a name is not yet known, identify by the most obvious visible attribute (e.g., "the person in the red jacket") until the name is established, then switch to the name and use it consistently
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ You are given a video. Return a JSON object with:
|
|||
- captions_vtt: a valid WebVTT file as a single string, with accurate timings and no styling (written in {TARGET_LANGUAGE})
|
||||
- audio_description_vtt: a valid WebVTT file as a single string, describing key visual elements (no spoilers), synchronized with the program (written in {TARGET_LANGUAGE})
|
||||
{SDH_FIELD}
|
||||
{SOURCE_HAS_AD}
|
||||
|
||||
TARGET LANGUAGE: {TARGET_LANGUAGE}
|
||||
|
||||
|
|
@ -40,7 +41,7 @@ CRITICAL TIMING REQUIREMENTS:
|
|||
- Each caption cue should end exactly when the speaker finishes that phrase/sentence
|
||||
- Listen carefully to detect natural speech pauses and word boundaries
|
||||
- Avoid starting captions too early or ending them too late
|
||||
- Ensure captions align with lip movement and speech rhythm
|
||||
- Caption ALL audible speech — include off-screen narrators, voiceover, and any speaker not visible on screen. Do NOT omit speech because the speaker is not visible or because it plays over non-dialogue segments.
|
||||
- For audio descriptions, time them during natural speech gaps or over non-dialogue audio
|
||||
- Validate that all timestamps are monotonically increasing (each cue starts after the previous one ends)
|
||||
|
||||
|
|
@ -61,6 +62,13 @@ CAPTION FORMATTING (DCMP standard):
|
|||
- Minimum caption duration: approximately 1.3 seconds. Maximum: 6 seconds
|
||||
- Use mixed case. Use ALL CAPS only for screaming or shouting
|
||||
|
||||
DISFLUENCY REMOVAL (DCMP §6.01):
|
||||
- Do NOT include filler words, false starts, or hesitations in captions
|
||||
- Remove: "um", "uh", "ah", "er", "hmm", "like" (as filler), "you know" (as filler), "I mean" (as filler)
|
||||
- Also remove language-specific fillers (e.g., "euh"/"beh" in French, "äh"/"ähm" in German, "eh"/"este" in Spanish, "ehm"/"allora" in Italian)
|
||||
- Remove false starts when the speaker self-corrects immediately (e.g., "I was — I went to the store" → "I went to the store")
|
||||
- Do NOT remove meaningful repetition, emphasis, or intentional stylistic choices
|
||||
|
||||
SOUND AND MUSIC FORMATTING (DCMP standard):
|
||||
- Sound effects: lowercase in square brackets — e.g., [door slams], [footsteps approaching]
|
||||
- Use present participle for sustained sounds: [dog barking]; use third person for abrupt sounds: [dog barks]
|
||||
|
|
|
|||
|
|
@ -1,12 +1,11 @@
|
|||
"""Schemas for accessible video generation with embedded audio descriptions."""
|
||||
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
from enum import StrEnum
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class AccessibleVideoMethod(str, Enum):
|
||||
class AccessibleVideoMethod(StrEnum):
|
||||
"""Method used for integrating audio descriptions into video."""
|
||||
OVERLAY = "overlay"
|
||||
PAUSE_INSERT = "pause_insert"
|
||||
|
|
@ -32,29 +31,29 @@ class ADPlacementCue(BaseModel):
|
|||
target_start_time: float = Field(..., description="Target time in output video (seconds)")
|
||||
ad_duration: float = Field(..., description="Duration of the AD TTS audio in seconds")
|
||||
# For pause-insert method
|
||||
pause_point: Optional[float] = Field(
|
||||
pause_point: float | None = Field(
|
||||
None,
|
||||
description="Where to pause the video - just before the next sentence starts (gap.end - buffer). Used for pause-insert method."
|
||||
)
|
||||
resume_from: Optional[float] = Field(
|
||||
resume_from: float | None = Field(
|
||||
None,
|
||||
description="Where to resume video after AD plays - just after the previous sentence ends (gap.start + buffer). Creates a small overlap for natural transitions."
|
||||
)
|
||||
pause_point_rationale: Optional[str] = Field(
|
||||
pause_point_rationale: str | None = Field(
|
||||
None,
|
||||
description="Explanation of why this pause point was chosen, referencing the sentence boundary."
|
||||
)
|
||||
# Whisper refinement tracking
|
||||
original_pause_point: Optional[float] = Field(
|
||||
original_pause_point: float | None = Field(
|
||||
None,
|
||||
description="Original pause point from Gemini before Whisper refinement (seconds)."
|
||||
)
|
||||
# For overlay method
|
||||
duck_start: Optional[float] = Field(
|
||||
duck_start: float | None = Field(
|
||||
None,
|
||||
description="When to start ducking original audio (seconds). Used for overlay method."
|
||||
)
|
||||
duck_end: Optional[float] = Field(
|
||||
duck_end: float | None = Field(
|
||||
None,
|
||||
description="When to end ducking original audio (seconds). Used for overlay method."
|
||||
)
|
||||
|
|
@ -118,10 +117,10 @@ class AccessibleVideoRenderRequest(BaseModel):
|
|||
class AccessibleVideoProgress(BaseModel):
|
||||
"""Progress status for accessible video rendering."""
|
||||
status: str = Field(..., description="pending | rendering | completed | failed")
|
||||
method: Optional[AccessibleVideoMethod] = None
|
||||
error_message: Optional[str] = None
|
||||
started_at: Optional[str] = None
|
||||
completed_at: Optional[str] = None
|
||||
method: AccessibleVideoMethod | None = None
|
||||
error_message: str | None = None
|
||||
started_at: str | None = None
|
||||
completed_at: str | None = None
|
||||
|
||||
|
||||
# === QC Review Accessible Video Editing Schemas ===
|
||||
|
|
@ -131,8 +130,8 @@ class PausePointResponse(BaseModel):
|
|||
"""Pause point timing data for QC editing."""
|
||||
cue_index: int = Field(..., description="AD cue index this pause point belongs to")
|
||||
original_ms: float = Field(..., description="Rendered timeline position (ms) - for display")
|
||||
source_ms: Optional[float] = Field(None, description="Source video cut point (ms) - for re-rendering (None = use original_ms)")
|
||||
adjusted_ms: Optional[float] = Field(None, description="User-adjusted timestamp (ms)")
|
||||
source_ms: float | None = Field(None, description="Source video cut point (ms) - for re-rendering (None = use original_ms)")
|
||||
adjusted_ms: float | None = Field(None, description="User-adjusted timestamp (ms)")
|
||||
min_bound_ms: float = Field(..., description="Minimum allowed value (ms)")
|
||||
max_bound_ms: float = Field(..., description="Maximum allowed value (ms)")
|
||||
|
||||
|
|
@ -145,16 +144,16 @@ class VideoSegmentResponse(BaseModel):
|
|||
gcs_uri: str = Field(..., description="GCS path to segment MP4")
|
||||
duration_ms: float = Field(..., description="Actual segment duration (ms)")
|
||||
is_freeze_frame: bool = Field(False, description="True if freeze frame with AD audio")
|
||||
cue_index: Optional[int] = Field(None, description="AD cue index (freeze frames only)")
|
||||
cue_index: int | None = Field(None, description="AD cue index (freeze frames only)")
|
||||
|
||||
|
||||
class TTSRegenerationItem(BaseModel):
|
||||
"""A queued TTS regeneration request."""
|
||||
cue_index: int = Field(..., description="AD cue index to regenerate")
|
||||
requested_at: str = Field(..., description="ISO timestamp when requested")
|
||||
new_text: Optional[str] = Field(None, description="Override text (if provided)")
|
||||
new_text: str | None = Field(None, description="Override text (if provided)")
|
||||
status: str = Field("pending", description="pending | processing | completed | failed")
|
||||
error_message: Optional[str] = None
|
||||
error_message: str | None = None
|
||||
|
||||
|
||||
class AccessibleVideoEditStateResponse(BaseModel):
|
||||
|
|
@ -171,12 +170,12 @@ class AccessibleVideoEditStateResponse(BaseModel):
|
|||
default_factory=list,
|
||||
description="Queued TTS regeneration requests"
|
||||
)
|
||||
last_render_at: Optional[str] = Field(
|
||||
last_render_at: str | None = Field(
|
||||
None,
|
||||
description="ISO timestamp of last accessible video render"
|
||||
)
|
||||
total_duration_ms: float = Field(..., description="Total accessible video duration (ms)")
|
||||
accessible_video_url: Optional[str] = Field(
|
||||
accessible_video_url: str | None = Field(
|
||||
None,
|
||||
description="Signed URL for accessible video preview"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, EmailStr
|
||||
from ..models.user import UserRole, AuthProvider
|
||||
|
||||
from ..models.user import AuthProvider, UserRole
|
||||
|
||||
|
||||
class LoginRequest(BaseModel):
|
||||
|
|
@ -52,8 +53,9 @@ class UserResponse(BaseModel):
|
|||
role: UserRole
|
||||
auth_provider: AuthProvider
|
||||
is_active: bool
|
||||
created_at: Optional[str] = None
|
||||
created_at: str | None = None
|
||||
pm_client_ids: list[str] = []
|
||||
languages: list[str] = [] # BCP-47 codes for R-8 linguist competence check
|
||||
|
||||
|
||||
class UserListResponse(BaseModel):
|
||||
|
|
@ -71,10 +73,10 @@ class CreateUserRequest(BaseModel):
|
|||
|
||||
|
||||
class UpdateUserRequest(BaseModel):
|
||||
email: Optional[EmailStr] = None
|
||||
full_name: Optional[str] = None
|
||||
role: Optional[UserRole] = None
|
||||
is_active: Optional[bool] = None
|
||||
email: EmailStr | None = None
|
||||
full_name: str | None = None
|
||||
role: UserRole | None = None
|
||||
is_active: bool | None = None
|
||||
|
||||
|
||||
class ChangePasswordRequest(BaseModel):
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
|
@ -6,10 +5,10 @@ from pydantic import BaseModel
|
|||
class SignedUploadRequest(BaseModel):
|
||||
filename: str
|
||||
content_type: str
|
||||
max_size: Optional[int] = None
|
||||
max_size: int | None = None
|
||||
|
||||
|
||||
class SignedUploadResponse(BaseModel):
|
||||
upload_url: str
|
||||
fields: dict[str, str]
|
||||
blob_path: str
|
||||
blob_path: str
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
from typing import Any, Literal, Optional, Union
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
from ..models.job import (
|
||||
AccessibleVideoProgressItem,
|
||||
JobFailure,
|
||||
JobStatus,
|
||||
LangOutput,
|
||||
RequestedOutputs,
|
||||
|
|
@ -15,18 +16,20 @@ from ..schemas.accessible_video import AccessibleVideoMethod
|
|||
|
||||
class JobResponse(BaseModel):
|
||||
id: str
|
||||
client_id: Optional[str] = None # ID of the user who created the job
|
||||
client_id: str | None = None # ID of the user who created the job
|
||||
title: str
|
||||
status: JobStatus
|
||||
source: dict[str, Any]
|
||||
requested_outputs: RequestedOutputs
|
||||
review: Review
|
||||
outputs: Optional[dict[str, LangOutput]] = None
|
||||
accessible_video_progress: Optional[dict[str, AccessibleVideoProgressItem]] = None
|
||||
created_at: Optional[str] = None
|
||||
updated_at: Optional[str] = None
|
||||
created_by_name: Optional[str] = None # User's full_name who created the job
|
||||
cost_tracker_project_id: Optional[str] = None
|
||||
outputs: dict[str, LangOutput] | None = None
|
||||
accessible_video_progress: dict[str, AccessibleVideoProgressItem] | None = None
|
||||
failure: JobFailure | None = None
|
||||
error: dict[str, Any] | None = None
|
||||
created_at: str | None = None
|
||||
updated_at: str | None = None
|
||||
created_by_name: str | None = None # User's full_name who created the job
|
||||
cost_tracker_project_id: str | None = None
|
||||
|
||||
|
||||
class JobListResponse(BaseModel):
|
||||
|
|
@ -42,20 +45,20 @@ class JobCreateRequest(BaseModel):
|
|||
|
||||
|
||||
class JobUpdateRequest(BaseModel):
|
||||
title: Optional[str] = None
|
||||
review_notes: Optional[str] = None
|
||||
cost_tracker_project_id: Optional[str] = None
|
||||
title: str | None = None
|
||||
review_notes: str | None = None
|
||||
cost_tracker_project_id: str | None = None
|
||||
|
||||
|
||||
class ApproveEnglishRequest(BaseModel):
|
||||
notes: Optional[str] = None
|
||||
notes: str | None = None
|
||||
|
||||
|
||||
class ApproveSourceRequest(BaseModel):
|
||||
"""Request to approve source language content (works for any language)"""
|
||||
notes: Optional[str] = None
|
||||
tts_preferences: Optional[TTSPreferences] = None # Override TTS voice settings
|
||||
accessible_video_method: Optional[AccessibleVideoMethod] = None # User-selected method for accessible video
|
||||
notes: str | None = None
|
||||
tts_preferences: TTSPreferences | None = None # Override TTS voice settings
|
||||
accessible_video_method: AccessibleVideoMethod | None = None # User-selected method for accessible video
|
||||
|
||||
|
||||
class UpdateTTSPreferencesRequest(BaseModel):
|
||||
|
|
@ -68,13 +71,21 @@ class RejectJobRequest(BaseModel):
|
|||
|
||||
|
||||
class CompleteJobRequest(BaseModel):
|
||||
notes: Optional[str] = None
|
||||
notes: str | None = None
|
||||
|
||||
|
||||
class VttUpdateRequest(BaseModel):
|
||||
captions_vtt: Optional[str] = None
|
||||
audio_description_vtt: Optional[str] = None
|
||||
language: Optional[str] = None # If None, defaults to source language
|
||||
captions_vtt: str | None = None
|
||||
audio_description_vtt: str | None = None
|
||||
language: str | None = None # If None, defaults to source language
|
||||
if_match: str | None = None # Optimistic locking — SHA1 of expected current content
|
||||
retranslate_languages: bool = False # Re-translate all target languages from updated source VTT
|
||||
note: str | None = None # Optional save message shown in version history
|
||||
|
||||
@field_validator('captions_vtt', 'audio_description_vtt', mode='before')
|
||||
@classmethod
|
||||
def empty_str_to_none(cls, v: Any) -> str | None:
|
||||
return None if v == '' else v
|
||||
|
||||
|
||||
class VttTimingAdjustRequest(BaseModel):
|
||||
|
|
@ -85,13 +96,14 @@ class VttTimingAdjustRequest(BaseModel):
|
|||
|
||||
|
||||
class JobDownloadsResponse(BaseModel):
|
||||
downloads: dict[str, Union[dict[str, str], str]] # language -> {file_type: signed_url} OR source_video -> signed_url
|
||||
downloads: dict[str, dict[str, str] | str] # language -> {file_type: signed_url} OR source_video -> signed_url
|
||||
|
||||
|
||||
class VttContentResponse(BaseModel):
|
||||
captions_vtt: Optional[str] = None
|
||||
audio_description_vtt: Optional[str] = None
|
||||
retimed_captions_vtt: Optional[str] = None # Re-timed captions for accessible videos
|
||||
captions_vtt: str | None = None
|
||||
audio_description_vtt: str | None = None
|
||||
retimed_captions_vtt: str | None = None # Re-timed captions for accessible videos
|
||||
etag: str | None = None # SHA1 hash for optimistic locking (If-Match on PATCH)
|
||||
|
||||
|
||||
class AssetValidationResponse(BaseModel):
|
||||
|
|
@ -117,9 +129,9 @@ class BulkDeleteResponse(BaseModel):
|
|||
class BulkApproveRequest(BaseModel):
|
||||
"""Request to bulk approve multiple jobs with optional settings"""
|
||||
job_ids: list[str]
|
||||
notes: Optional[str] = None
|
||||
accessible_video_method: Optional[AccessibleVideoMethod] = None # Method for accessible video
|
||||
tts_preferences: Optional[TTSPreferences] = None
|
||||
notes: str | None = None
|
||||
accessible_video_method: AccessibleVideoMethod | None = None # Method for accessible video
|
||||
tts_preferences: TTSPreferences | None = None
|
||||
|
||||
|
||||
class BulkApproveResponse(BaseModel):
|
||||
|
|
@ -147,3 +159,42 @@ class BulkReturnToQCResponse(BaseModel):
|
|||
class BulkDownloadRequest(BaseModel):
|
||||
"""Request to download multiple jobs as a single zip file"""
|
||||
job_ids: list[str]
|
||||
|
||||
|
||||
class BlockedOnSourceRequest(BaseModel):
|
||||
reason: str # brief description of what is wrong with the source video
|
||||
|
||||
|
||||
class PromoteToQCRequest(BaseModel):
|
||||
notes: str = "" # optional context for the QC team
|
||||
|
||||
|
||||
# ── PR-3: Resumable / chunked upload ──────────────────────────────────────────
|
||||
|
||||
class UploadInitRequest(BaseModel):
|
||||
filename: str
|
||||
content_type: str
|
||||
file_size: int # bytes — validated server-side against settings.upload_max_video_bytes
|
||||
|
||||
|
||||
class UploadInitResponse(BaseModel):
|
||||
job_id: str
|
||||
upload_url: str # GCS resumable session URI — browser uploads chunks directly here
|
||||
|
||||
|
||||
class UploadCompleteRequest(BaseModel):
|
||||
job_id: str
|
||||
title: str
|
||||
original_filename: str
|
||||
requested_outputs: dict
|
||||
brand_context: str | None = None
|
||||
project_id: str | None = None
|
||||
brief_id: str | None = None
|
||||
deadline: str | None = None
|
||||
initial_linguist_id: str | None = None
|
||||
initial_reviewer_id: str | None = None
|
||||
|
||||
|
||||
class RetranslateLanguageRequest(BaseModel):
|
||||
language: str
|
||||
reason: str | None = None
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
"""Pydantic schemas for Review Note API requests and responses."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
|
@ -31,7 +30,7 @@ class ReviewNoteResponse(BaseModel):
|
|||
user_id: str
|
||||
user_name: str
|
||||
created_at: str # ISO format
|
||||
updated_at: Optional[str] = None # ISO format
|
||||
updated_at: str | None = None # ISO format
|
||||
|
||||
@classmethod
|
||||
def from_model(cls, note: dict) -> "ReviewNoteResponse":
|
||||
|
|
|
|||
|
|
@ -2,19 +2,19 @@
|
|||
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any
|
||||
|
||||
from fastapi import Request
|
||||
from motor.motor_asyncio import AsyncIOMotorCollection
|
||||
|
||||
from app.core.database import get_database
|
||||
from app.core.config import get_settings
|
||||
from app.core.database import get_database
|
||||
from app.models.audit_log import (
|
||||
AuditLog,
|
||||
AuditLogCreate,
|
||||
AuditLogQuery,
|
||||
AuditAction,
|
||||
AuditLog,
|
||||
AuditLogQuery,
|
||||
AuditLogResponse,
|
||||
AuditAction,
|
||||
AuditLogSeverity
|
||||
AuditLogSeverity,
|
||||
)
|
||||
from app.models.user import User
|
||||
from app.telemetry.tracing import trace_async_operation
|
||||
|
|
@ -22,45 +22,45 @@ from app.telemetry.tracing import trace_async_operation
|
|||
|
||||
class AuditLogger:
|
||||
"""Service for managing audit logs."""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.settings = get_settings()
|
||||
self.collection: Optional[AsyncIOMotorCollection] = None
|
||||
|
||||
self.collection: AsyncIOMotorCollection | None = None
|
||||
|
||||
async def _get_collection(self) -> AsyncIOMotorCollection:
|
||||
"""Get the audit logs collection."""
|
||||
if self.collection is None:
|
||||
db = await get_database()
|
||||
self.collection = db.audit_logs
|
||||
return self.collection
|
||||
|
||||
|
||||
@trace_async_operation("audit_logger.log_action")
|
||||
async def log_action(
|
||||
self,
|
||||
action: AuditAction,
|
||||
description: str,
|
||||
user: Optional[User] = None,
|
||||
request: Optional[Request] = None,
|
||||
resource_type: Optional[str] = None,
|
||||
resource_id: Optional[str] = None,
|
||||
resource_name: Optional[str] = None,
|
||||
details: Optional[Dict[str, Any]] = None,
|
||||
user: User | None = None,
|
||||
request: Request | None = None,
|
||||
resource_type: str | None = None,
|
||||
resource_id: str | None = None,
|
||||
resource_name: str | None = None,
|
||||
details: dict[str, Any] | None = None,
|
||||
severity: AuditLogSeverity = AuditLogSeverity.INFO,
|
||||
success: bool = True,
|
||||
error_message: Optional[str] = None
|
||||
error_message: str | None = None
|
||||
) -> str:
|
||||
"""
|
||||
Log an audit event.
|
||||
|
||||
|
||||
Returns:
|
||||
The ID of the created audit log entry.
|
||||
"""
|
||||
|
||||
|
||||
# Extract request context
|
||||
ip_address = None
|
||||
user_agent = None
|
||||
request_id = None
|
||||
|
||||
|
||||
if request:
|
||||
# Get IP address (handle forwarded headers)
|
||||
forwarded_for = request.headers.get("X-Forwarded-For")
|
||||
|
|
@ -68,10 +68,10 @@ class AuditLogger:
|
|||
ip_address = forwarded_for.split(',')[0].strip()
|
||||
elif request.client:
|
||||
ip_address = request.client.host
|
||||
|
||||
|
||||
user_agent = request.headers.get("User-Agent")
|
||||
request_id = request.headers.get("X-Request-ID", str(uuid.uuid4()))
|
||||
|
||||
|
||||
# Create audit log entry
|
||||
audit_log = AuditLog(
|
||||
action=action,
|
||||
|
|
@ -93,22 +93,26 @@ class AuditLogger:
|
|||
service_name="accessible-video-api",
|
||||
api_version="v1"
|
||||
)
|
||||
|
||||
# Save to database
|
||||
|
||||
# Save to database — non-raising so audit failure never aborts the primary operation
|
||||
collection = await self._get_collection()
|
||||
result = await collection.insert_one(audit_log.dict(by_alias=True))
|
||||
|
||||
return str(result.inserted_id)
|
||||
|
||||
try:
|
||||
result = await collection.insert_one(audit_log.dict(by_alias=True))
|
||||
return str(result.inserted_id)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
import logging
|
||||
logging.getLogger(__name__).error("audit log insert failed: %s", exc)
|
||||
return ""
|
||||
|
||||
@trace_async_operation("audit_logger.query_logs")
|
||||
async def query_logs(self, query: AuditLogQuery) -> AuditLogResponse:
|
||||
"""Query audit logs with filtering and pagination."""
|
||||
|
||||
|
||||
collection = await self._get_collection()
|
||||
|
||||
|
||||
# Build MongoDB query
|
||||
mongo_query = {}
|
||||
|
||||
|
||||
# Time range filter
|
||||
if query.start_date or query.end_date:
|
||||
timestamp_filter = {}
|
||||
|
|
@ -117,7 +121,7 @@ class AuditLogger:
|
|||
if query.end_date:
|
||||
timestamp_filter["$lte"] = query.end_date
|
||||
mongo_query["timestamp"] = timestamp_filter
|
||||
|
||||
|
||||
# Exact match filters
|
||||
if query.action:
|
||||
mongo_query["action"] = query.action
|
||||
|
|
@ -136,7 +140,7 @@ class AuditLogger:
|
|||
mongo_query["resource_id"] = query.resource_id
|
||||
if query.success is not None:
|
||||
mongo_query["success"] = query.success
|
||||
|
||||
|
||||
# Text search
|
||||
if query.search:
|
||||
mongo_query["$or"] = [
|
||||
|
|
@ -144,23 +148,23 @@ class AuditLogger:
|
|||
{"details": {"$regex": query.search, "$options": "i"}},
|
||||
{"error_message": {"$regex": query.search, "$options": "i"}}
|
||||
]
|
||||
|
||||
|
||||
# Get total count
|
||||
total_count = await collection.count_documents(mongo_query)
|
||||
|
||||
|
||||
# Execute query with pagination and sorting
|
||||
cursor = collection.find(mongo_query)
|
||||
|
||||
|
||||
# Apply sorting
|
||||
sort_direction = query.sort_order
|
||||
cursor = cursor.sort(query.sort_by, sort_direction)
|
||||
|
||||
|
||||
# Apply pagination
|
||||
cursor = cursor.skip(query.skip).limit(query.limit)
|
||||
|
||||
|
||||
# Execute query
|
||||
documents = await cursor.to_list(length=query.limit)
|
||||
|
||||
|
||||
# Convert to Pydantic models
|
||||
logs = []
|
||||
for doc in documents:
|
||||
|
|
@ -170,11 +174,11 @@ class AuditLogger:
|
|||
# Log conversion error but continue
|
||||
print(f"Error converting audit log document: {e}")
|
||||
continue
|
||||
|
||||
|
||||
# Calculate pagination info
|
||||
page = (query.skip // query.limit) + 1
|
||||
has_more = (query.skip + len(logs)) < total_count
|
||||
|
||||
|
||||
return AuditLogResponse(
|
||||
logs=logs,
|
||||
total_count=total_count,
|
||||
|
|
@ -182,14 +186,14 @@ class AuditLogger:
|
|||
page_size=len(logs),
|
||||
has_more=has_more
|
||||
)
|
||||
|
||||
async def get_user_activity(self, user_id: str, days: int = 30) -> List[AuditLog]:
|
||||
|
||||
async def get_user_activity(self, user_id: str, days: int = 30) -> list[AuditLog]:
|
||||
"""Get recent activity for a specific user."""
|
||||
|
||||
|
||||
from_date = datetime.utcnow().replace(
|
||||
hour=0, minute=0, second=0, microsecond=0
|
||||
) - timedelta(days=days)
|
||||
|
||||
|
||||
query = AuditLogQuery(
|
||||
user_id=user_id,
|
||||
start_date=from_date,
|
||||
|
|
@ -197,15 +201,15 @@ class AuditLogger:
|
|||
sort_by="timestamp",
|
||||
sort_order=-1
|
||||
)
|
||||
|
||||
|
||||
response = await self.query_logs(query)
|
||||
return response.logs
|
||||
|
||||
async def get_security_events(self, hours: int = 24) -> List[AuditLog]:
|
||||
|
||||
async def get_security_events(self, hours: int = 24) -> list[AuditLog]:
|
||||
"""Get recent security-related events."""
|
||||
|
||||
|
||||
from_date = datetime.utcnow() - timedelta(hours=hours)
|
||||
|
||||
|
||||
security_actions = [
|
||||
AuditAction.LOGIN_FAILURE,
|
||||
AuditAction.RATE_LIMIT_EXCEEDED,
|
||||
|
|
@ -213,38 +217,38 @@ class AuditLogger:
|
|||
AuditAction.UNAUTHORIZED_ACCESS,
|
||||
AuditAction.SUSPICIOUS_ACTIVITY
|
||||
]
|
||||
|
||||
|
||||
collection = await self._get_collection()
|
||||
|
||||
|
||||
query = {
|
||||
"timestamp": {"$gte": from_date},
|
||||
"action": {"$in": security_actions}
|
||||
}
|
||||
|
||||
|
||||
cursor = collection.find(query).sort("timestamp", -1).limit(1000)
|
||||
documents = await cursor.to_list(length=1000)
|
||||
|
||||
|
||||
logs = []
|
||||
for doc in documents:
|
||||
try:
|
||||
logs.append(AuditLog(**doc))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
|
||||
return logs
|
||||
|
||||
|
||||
async def cleanup_old_logs(self, retention_days: int = 365) -> int:
|
||||
"""Clean up audit logs older than retention period."""
|
||||
|
||||
|
||||
cutoff_date = datetime.utcnow().replace(
|
||||
hour=0, minute=0, second=0, microsecond=0
|
||||
) - timedelta(days=retention_days)
|
||||
|
||||
|
||||
collection = await self._get_collection()
|
||||
result = await collection.delete_many({
|
||||
"timestamp": {"$lt": cutoff_date}
|
||||
})
|
||||
|
||||
|
||||
return result.deleted_count
|
||||
|
||||
|
||||
|
|
@ -277,16 +281,16 @@ async def log_auth_failure(email: str, request: Request, reason: str):
|
|||
)
|
||||
|
||||
|
||||
async def log_job_action(action: AuditAction, job_id: str, user: User, request: Request, details: Optional[Dict] = None):
|
||||
async def log_job_action(action: AuditAction, job_id: str, user: User, request: Request, details: dict | None = None):
|
||||
"""Log job-related actions."""
|
||||
action_descriptions = {
|
||||
AuditAction.JOB_CREATE: "Job created",
|
||||
AuditAction.JOB_APPROVE: "Job approved",
|
||||
AuditAction.JOB_APPROVE: "Job approved",
|
||||
AuditAction.JOB_REJECT: "Job rejected",
|
||||
AuditAction.JOB_CANCEL: "Job cancelled",
|
||||
AuditAction.JOB_UPDATE: "Job updated"
|
||||
}
|
||||
|
||||
|
||||
await audit_logger.log_action(
|
||||
action=action,
|
||||
description=f"{action_descriptions.get(action, str(action))} by {user.email}",
|
||||
|
|
@ -298,7 +302,7 @@ async def log_job_action(action: AuditAction, job_id: str, user: User, request:
|
|||
)
|
||||
|
||||
|
||||
async def log_user_management(action: AuditAction, target_user_id: str, admin_user: User, request: Request, details: Optional[Dict] = None):
|
||||
async def log_user_management(action: AuditAction, target_user_id: str, admin_user: User, request: Request, details: dict | None = None):
|
||||
"""Log user management actions."""
|
||||
action_descriptions = {
|
||||
AuditAction.USER_CREATE: "User created",
|
||||
|
|
@ -308,7 +312,7 @@ async def log_user_management(action: AuditAction, target_user_id: str, admin_us
|
|||
AuditAction.USER_ACTIVATE: "User activated",
|
||||
AuditAction.USER_DEACTIVATE: "User deactivated"
|
||||
}
|
||||
|
||||
|
||||
await audit_logger.log_action(
|
||||
action=action,
|
||||
description=f"{action_descriptions.get(action, str(action))} by admin {admin_user.email}",
|
||||
|
|
@ -321,7 +325,7 @@ async def log_user_management(action: AuditAction, target_user_id: str, admin_us
|
|||
)
|
||||
|
||||
|
||||
async def log_security_event(action: AuditAction, description: str, request: Request, user: Optional[User] = None, details: Optional[Dict] = None):
|
||||
async def log_security_event(action: AuditAction, description: str, request: Request, user: User | None = None, details: dict | None = None):
|
||||
"""Log security-related events."""
|
||||
await audit_logger.log_action(
|
||||
action=action,
|
||||
|
|
@ -331,4 +335,4 @@ async def log_security_event(action: AuditAction, description: str, request: Req
|
|||
severity=AuditLogSeverity.WARNING if action != AuditAction.SUSPICIOUS_ACTIVITY else AuditLogSeverity.CRITICAL,
|
||||
success=False,
|
||||
details=details
|
||||
)
|
||||
)
|
||||
|
|
|
|||
135
backend/app/services/caption_aligner.py
Normal file
135
backend/app/services/caption_aligner.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
"""Align Gemini caption VTT timings against Whisper word-level timestamps.
|
||||
|
||||
Algorithm:
|
||||
For each VTT cue, tokenise its text and search for the token sequence in the
|
||||
Whisper word stream starting from the cursor position (with a look-ahead window).
|
||||
When a match of sufficient confidence is found the cue's start/end timestamps
|
||||
are replaced with the matched Whisper words' start/end. Cues that cannot be
|
||||
matched (music notation, sound effects, empty cues) keep their original Gemini
|
||||
timestamps. The result has Whisper-accurate timings early in the video and
|
||||
graceful fallbacks where Whisper didn't capture the audio.
|
||||
"""
|
||||
|
||||
import bisect
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ..core.logging import get_logger
|
||||
from ..lib.vtt import VTTEditor, VTTParser
|
||||
from ..services.whisper_service import WordTimestamp
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# Characters to strip when comparing tokens
|
||||
_PUNCT = re.compile(r"[^\w']", re.UNICODE)
|
||||
# Tokens shorter than this are considered stop-words and excluded from matching
|
||||
_MIN_TOKEN_LEN = 2
|
||||
# Minimum fraction of cue tokens that must match Whisper words for alignment.
|
||||
# Lowered from 0.5 → 0.35 to handle Gemini paraphrasing and short cues.
|
||||
_MIN_MATCH_RATIO = 0.35
|
||||
# How many Whisper words ahead of the cursor to search for a cue's tokens.
|
||||
# Widened from 60 → 150 so the window stays valid even after several failed cues.
|
||||
_SEARCH_WINDOW = 150
|
||||
|
||||
|
||||
def _tokenise(text: str) -> list[str]:
|
||||
"""Lower-case, strip punctuation, drop short tokens."""
|
||||
return [
|
||||
t for t in (_PUNCT.sub("", w).lower() for w in text.split())
|
||||
if len(t) >= _MIN_TOKEN_LEN
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Match:
|
||||
first_word_idx: int
|
||||
last_word_idx: int
|
||||
ratio: float # matched_tokens / cue_tokens
|
||||
|
||||
|
||||
def _find_match(
|
||||
cue_tokens: list[str],
|
||||
whisper_words: list[WordTimestamp],
|
||||
cursor: int,
|
||||
) -> _Match | None:
|
||||
"""Return the best match for cue_tokens starting at cursor ± SEARCH_WINDOW."""
|
||||
if not cue_tokens:
|
||||
return None
|
||||
|
||||
best: _Match | None = None
|
||||
end = min(cursor + _SEARCH_WINDOW, len(whisper_words))
|
||||
|
||||
for start_idx in range(cursor, end):
|
||||
matched = 0
|
||||
last_idx = start_idx
|
||||
token_pos = 0
|
||||
|
||||
for w_idx in range(start_idx, end):
|
||||
if token_pos >= len(cue_tokens):
|
||||
break
|
||||
w_tok = _PUNCT.sub("", whisper_words[w_idx].word).lower()
|
||||
if w_tok == cue_tokens[token_pos]:
|
||||
matched += 1
|
||||
last_idx = w_idx
|
||||
token_pos += 1
|
||||
|
||||
ratio = matched / len(cue_tokens)
|
||||
if ratio >= _MIN_MATCH_RATIO:
|
||||
if best is None or ratio > best.ratio:
|
||||
best = _Match(start_idx, last_idx, ratio)
|
||||
if ratio == 1.0:
|
||||
break # perfect match — no need to search further
|
||||
|
||||
return best
|
||||
|
||||
|
||||
def _cursor_for_time(whisper_words: list[WordTimestamp], t: float, from_idx: int) -> int:
|
||||
"""Return the index of the first Whisper word at or after time t, starting from from_idx."""
|
||||
starts = [w.start for w in whisper_words]
|
||||
idx = bisect.bisect_left(starts, t, from_idx)
|
||||
return min(idx, len(whisper_words) - 1)
|
||||
|
||||
|
||||
def align(captions_vtt: str, whisper_words: list[WordTimestamp]) -> str:
|
||||
"""Replace VTT cue timings with Whisper-accurate timestamps where possible.
|
||||
|
||||
Returns a VTT string with the same cue count as the input, with improved
|
||||
timing accuracy on cues that could be matched to Whisper word output.
|
||||
"""
|
||||
if not whisper_words:
|
||||
logger.warning("caption_aligner: no Whisper words supplied — returning original VTT")
|
||||
return captions_vtt
|
||||
|
||||
cues = VTTParser.parse(captions_vtt)
|
||||
cursor = 0
|
||||
aligned = 0
|
||||
|
||||
for cue in cues:
|
||||
tokens = _tokenise(cue.text)
|
||||
if not tokens:
|
||||
continue
|
||||
|
||||
match = _find_match(tokens, whisper_words, cursor)
|
||||
if match is None:
|
||||
# Advance cursor to the Whisper word closest to this cue's start time
|
||||
# so subsequent cues don't search from a stale position.
|
||||
cursor = _cursor_for_time(whisper_words, cue.start_time, cursor)
|
||||
continue
|
||||
|
||||
new_start = whisper_words[match.first_word_idx].start
|
||||
new_end = whisper_words[match.last_word_idx].end
|
||||
|
||||
if new_end > new_start:
|
||||
cue.start_time = new_start
|
||||
cue.end_time = new_end
|
||||
aligned += 1
|
||||
|
||||
cursor = match.last_word_idx + 1
|
||||
|
||||
logger.info(
|
||||
f"caption_aligner: aligned {aligned}/{len(cues)} cues "
|
||||
f"against {len(whisper_words)} Whisper words"
|
||||
)
|
||||
return VTTEditor.translate_preserving_timing(
|
||||
captions_vtt, [c.text for c in cues]
|
||||
) if aligned == 0 else VTTParser.build(cues)
|
||||
100
backend/app/services/cloud_run_dispatch.py
Normal file
100
backend/app/services/cloud_run_dispatch.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
"""
|
||||
Cloud Run Jobs dispatcher — replaces Celery .delay() for heavy pipeline tasks.
|
||||
|
||||
Heavy tasks (ingest, translate, render, rerender) are dispatched as Cloud Run Job
|
||||
executions. Each execution runs `python -m app.tasks.runner --task <name> --job-id <id>`.
|
||||
|
||||
Light tasks (notify, embed_glossary) stay on the local Celery worker.
|
||||
|
||||
Env vars:
|
||||
CLOUD_RUN_WORKER_JOB — Cloud Run Job name (default: va-worker)
|
||||
GCP_PROJECT_ID — GCP project (from settings)
|
||||
GCP_REGION — Cloud Run region (default: europe-west1)
|
||||
USE_CELERY_FALLBACK — set to "true" to use local Celery instead (local dev)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ..core.logging import get_logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
pass
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
_JOB_NAME = os.environ.get("CLOUD_RUN_WORKER_JOB", "va-worker")
|
||||
_REGION = os.environ.get("GCP_REGION", "europe-west1")
|
||||
_USE_CELERY = os.environ.get("USE_CELERY_FALLBACK", "false").lower() == "true"
|
||||
|
||||
|
||||
def _job_resource(project: str) -> str:
|
||||
return f"projects/{project}/locations/{_REGION}/jobs/{_JOB_NAME}"
|
||||
|
||||
|
||||
async def dispatch(task: str, job_id: str, **extra_args: str | list) -> str:
|
||||
"""
|
||||
Dispatch a heavy task to Cloud Run Jobs.
|
||||
|
||||
Returns the Cloud Run Operation name (useful for tracking).
|
||||
Falls back to local Celery when USE_CELERY_FALLBACK=true (local dev).
|
||||
"""
|
||||
if _USE_CELERY:
|
||||
return _celery_fallback(task, job_id, **extra_args)
|
||||
|
||||
from google.cloud import run_v2 # type: ignore[import]
|
||||
|
||||
from ..core.config import settings
|
||||
|
||||
args = ["--task", task, "--job-id", job_id]
|
||||
for key, val in extra_args.items():
|
||||
cli_key = f"--{key.replace('_', '-')}"
|
||||
if isinstance(val, list):
|
||||
args += [cli_key, ",".join(str(v) for v in val)]
|
||||
elif val is not None:
|
||||
args += [cli_key, str(val)]
|
||||
|
||||
client = run_v2.JobsAsyncClient()
|
||||
request = run_v2.RunJobRequest(
|
||||
name=_job_resource(settings.gcp_project_id),
|
||||
overrides=run_v2.RunJobRequest.Overrides(
|
||||
container_overrides=[
|
||||
run_v2.RunJobRequest.Overrides.ContainerOverride(args=args)
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
logger.info("Dispatching Cloud Run Job: task=%s job_id=%s args=%s", task, job_id, args)
|
||||
operation = await client.run_job(request=request)
|
||||
op_name = operation.operation.name
|
||||
logger.info("Cloud Run Job dispatched: %s", op_name)
|
||||
return op_name
|
||||
|
||||
|
||||
def _celery_fallback(task: str, job_id: str, **extra_args) -> str:
|
||||
"""Use local Celery when Cloud Run is not available (dev/test)."""
|
||||
logger.warning("USE_CELERY_FALLBACK=true — dispatching via local Celery: task=%s", task)
|
||||
if task == "ingest":
|
||||
from ..tasks.ingest_and_ai import ingest_and_ai_task
|
||||
ingest_and_ai_task.delay(job_id)
|
||||
elif task == "translate":
|
||||
from ..tasks.translate_and_synthesize import translate_and_synthesize_task
|
||||
_langs = extra_args.get("languages")
|
||||
if isinstance(_langs, str):
|
||||
_langs = [lang for lang in _langs.split(",") if lang]
|
||||
translate_and_synthesize_task.delay(job_id, languages=_langs or None)
|
||||
elif task == "render":
|
||||
from ..tasks.render_accessible_video import render_accessible_video_task
|
||||
render_accessible_video_task.delay(job_id, extra_args.get("language", "en"))
|
||||
elif task == "rerender":
|
||||
from ..tasks.rerender_accessible_video import rerender_accessible_video_task
|
||||
rerender_accessible_video_task.delay(
|
||||
job_id,
|
||||
extra_args.get("language", "en"),
|
||||
extra_args.get("regenerate_cues", []),
|
||||
extra_args.get("whisper_refine", False),
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown task: {task}")
|
||||
return f"celery:{task}:{job_id}"
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
"""Thin HTTP client for the centralized Oliver AI Cost Tracker."""
|
||||
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
|
|
@ -19,7 +18,7 @@ def preflight(
|
|||
*,
|
||||
model: str,
|
||||
user_external_id: str,
|
||||
project_id: Optional[str] = None,
|
||||
project_id: str | None = None,
|
||||
) -> None:
|
||||
if not settings.cost_tracker_base_url or not settings.cost_tracker_enabled:
|
||||
return
|
||||
|
|
@ -51,7 +50,7 @@ async def aio_preflight(
|
|||
*,
|
||||
model: str,
|
||||
user_external_id: str,
|
||||
project_id: Optional[str] = None,
|
||||
project_id: str | None = None,
|
||||
) -> None:
|
||||
await asyncio.to_thread(preflight, model=model, user_external_id=user_external_id, project_id=project_id)
|
||||
|
||||
|
|
@ -61,11 +60,11 @@ def record(
|
|||
model: str,
|
||||
provider: str,
|
||||
user_external_id: str,
|
||||
project_id: Optional[str] = None,
|
||||
project_id: str | None = None,
|
||||
job_external_id: str = "",
|
||||
input_tokens: int = 0,
|
||||
output_tokens: int = 0,
|
||||
chars: Optional[int] = None,
|
||||
chars: int | None = None,
|
||||
latency_ms: int = 0,
|
||||
status: str = "success",
|
||||
) -> None:
|
||||
|
|
@ -76,8 +75,10 @@ def record(
|
|||
if chars is not None:
|
||||
units["char"] = chars
|
||||
else:
|
||||
if input_tokens: units["token_input"] = input_tokens
|
||||
if output_tokens: units["token_output"] = output_tokens
|
||||
if input_tokens:
|
||||
units["token_input"] = input_tokens
|
||||
if output_tokens:
|
||||
units["token_output"] = output_tokens
|
||||
|
||||
payload: dict = {
|
||||
"source_app": settings.cost_tracker_source_app,
|
||||
|
|
@ -88,8 +89,10 @@ def record(
|
|||
"latency_ms": latency_ms,
|
||||
"status": status,
|
||||
}
|
||||
if project_id: payload["project_external_id"] = project_id
|
||||
if job_external_id: payload["job_external_id"] = job_external_id
|
||||
if project_id:
|
||||
payload["project_external_id"] = project_id
|
||||
if job_external_id:
|
||||
payload["job_external_id"] = job_external_id
|
||||
|
||||
httpx.post(
|
||||
f"{settings.cost_tracker_base_url}/usage/record",
|
||||
|
|
|
|||
|
|
@ -16,8 +16,8 @@ Format:
|
|||
|
||||
Reference: WCAG 2.1 Success Criterion 1.2.1
|
||||
"""
|
||||
from ..lib.vtt import VTTCue, VTTParser
|
||||
from ..core.logging import get_logger
|
||||
from ..lib.vtt import VTTCue, VTTParser
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ Fetches and caches available voices from the ElevenLabs API.
|
|||
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
import aiohttp
|
||||
|
||||
|
|
@ -90,7 +89,7 @@ class ElevenLabsVoiceService:
|
|||
|
||||
return voices
|
||||
|
||||
async def get_voice_by_id(self, voice_id: str) -> Optional[ElevenLabsVoice]:
|
||||
async def get_voice_by_id(self, voice_id: str) -> ElevenLabsVoice | None:
|
||||
"""Look up a specific voice by ID."""
|
||||
voices = await self.get_voices()
|
||||
for v in voices:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
|
||||
import html as _html
|
||||
from datetime import datetime
|
||||
|
||||
from jinja2 import Template
|
||||
|
||||
from ..core.config import settings
|
||||
|
|
@ -385,7 +387,7 @@ class EmailService:
|
|||
|
||||
template = Template(template_str)
|
||||
return template.render(
|
||||
job_title=job_title,
|
||||
job_title=_html.escape(job_title),
|
||||
download_links=download_links
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,15 @@
|
|||
"""
|
||||
Embedding service backed by Gemini text-embedding-004.
|
||||
Embedding service backed by Vertex AI text-multilingual-embedding-002.
|
||||
|
||||
Provides batch embedding with retry/backoff for use in glossary ingestion.
|
||||
Batch size: 100 texts per API call (API limit is 2048 but we keep it conservative
|
||||
for memory and retry ergonomics with large glossaries).
|
||||
Uses the google-genai SDK in Vertex AI mode (Application Default Credentials)
|
||||
instead of AI Studio so we get higher per-project quotas and no per-user limits.
|
||||
|
||||
Batch size: 100 texts per API call.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
from collections.abc import Sequence
|
||||
|
||||
from google import genai
|
||||
|
|
@ -18,15 +20,29 @@ from ..core.logging import get_logger
|
|||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
_EMBED_MODEL = "gemini-embedding-001"
|
||||
# Vertex AI multilingual model — 768-dim, 50+ languages, higher quota than AI Studio
|
||||
_EMBED_MODEL = "text-multilingual-embedding-002"
|
||||
_BATCH_SIZE = 100
|
||||
_MAX_RETRIES = 3
|
||||
_INITIAL_BACKOFF = 2.0
|
||||
_MAX_RETRIES = 5
|
||||
_INITIAL_BACKOFF = 4.0
|
||||
|
||||
# Matches the 'retryDelay': '7s' field in Gemini/Vertex 429 error bodies
|
||||
_RETRY_DELAY_RE = re.compile(r"'retryDelay':\s*'(\d+)s'")
|
||||
|
||||
|
||||
def _parse_retry_delay(exc: Exception) -> float | None:
|
||||
"""Extract the server-suggested retry delay from a 429 error."""
|
||||
m = _RETRY_DELAY_RE.search(str(exc))
|
||||
return float(m.group(1)) if m else None
|
||||
|
||||
|
||||
class EmbeddingService:
|
||||
def __init__(self) -> None:
|
||||
self._client = genai.Client(api_key=settings.gemini_api_key)
|
||||
self._client = genai.Client(
|
||||
vertexai=True,
|
||||
project=settings.gcp_project_id,
|
||||
location=settings.gcp_location,
|
||||
)
|
||||
|
||||
async def embed_texts(self, texts: Sequence[str]) -> list[list[float]]:
|
||||
"""
|
||||
|
|
@ -62,8 +78,12 @@ class EmbeddingService:
|
|||
if attempt == _MAX_RETRIES:
|
||||
logger.error(f"Embedding batch failed after {_MAX_RETRIES} attempts: {exc}")
|
||||
raise
|
||||
logger.warning(f"Embedding attempt {attempt} failed, retrying in {backoff}s: {exc}")
|
||||
await asyncio.sleep(backoff)
|
||||
# Honour the server-suggested retryDelay when present (e.g. 429 RESOURCE_EXHAUSTED).
|
||||
# Fall back to our own exponential backoff otherwise.
|
||||
server_delay = _parse_retry_delay(exc)
|
||||
delay = max(server_delay + 1.0, backoff) if server_delay else backoff
|
||||
logger.warning(f"Embedding attempt {attempt} failed, retrying in {delay}s: {exc}")
|
||||
await asyncio.sleep(delay)
|
||||
backoff *= 2
|
||||
|
||||
raise RuntimeError("unreachable") # makes type-checker happy
|
||||
|
|
|
|||
|
|
@ -13,8 +13,6 @@ import logging
|
|||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import uuid
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from google.cloud import storage
|
||||
|
|
@ -275,7 +273,7 @@ async def run_ffmpeg(request: RunFFmpegRequest):
|
|||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"FFmpeg operation failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
|
||||
|
||||
@app.post("/probe", response_model=ProbeResponse)
|
||||
|
|
@ -330,7 +328,7 @@ async def probe_video(request: ProbeRequest):
|
|||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Probe failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
|
||||
|
||||
@app.post("/encode-segment", response_model=RunFFmpegResponse)
|
||||
|
|
@ -382,7 +380,7 @@ async def encode_segment(request: EncodeSegmentRequest):
|
|||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Encode segment failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
|
||||
|
||||
@app.post("/extract-frame", response_model=RunFFmpegResponse)
|
||||
|
|
@ -427,7 +425,7 @@ async def extract_frame(request: ExtractFrameRequest):
|
|||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Extract frame failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
|
||||
|
||||
@app.post("/create-freeze-segment", response_model=RunFFmpegResponse)
|
||||
|
|
@ -482,7 +480,7 @@ async def create_freeze_segment(request: CreateFreezeSegmentRequest):
|
|||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Create freeze segment failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
|
||||
|
||||
@app.post("/concatenate", response_model=RunFFmpegResponse)
|
||||
|
|
@ -536,4 +534,4 @@ async def concatenate_segments(request: ConcatenateRequest):
|
|||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Concatenate failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
raise HTTPException(status_code=500, detail=str(e)) from None
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import HTTPException, UploadFile
|
||||
from google.cloud import storage
|
||||
|
|
@ -13,16 +12,27 @@ from ..core.logging import get_logger
|
|||
logger = get_logger(__name__)
|
||||
|
||||
class GCSService:
|
||||
def __init__(self):
|
||||
self.client = storage.Client(project=settings.gcp_project_id)
|
||||
self.bucket = self.client.bucket(settings.gcs_bucket)
|
||||
def __init__(self) -> None:
|
||||
self._client: storage.Client | None = None
|
||||
self._bucket = None
|
||||
self.executor = ThreadPoolExecutor(max_workers=4)
|
||||
|
||||
@property
|
||||
def bucket(self):
|
||||
if self._bucket is None:
|
||||
self._client = storage.Client(project=settings.gcp_project_id)
|
||||
self._bucket = self._client.bucket(settings.gcs_bucket)
|
||||
return self._bucket
|
||||
|
||||
@bucket.setter
|
||||
def bucket(self, value) -> None:
|
||||
self._bucket = value
|
||||
|
||||
async def upload_file_to_gcs(
|
||||
self,
|
||||
file: UploadFile,
|
||||
destination_path: str,
|
||||
content_type: Optional[str] = None
|
||||
content_type: str | None = None
|
||||
) -> str:
|
||||
"""Upload file to GCS and return the GCS URI"""
|
||||
def _upload():
|
||||
|
|
@ -45,7 +55,7 @@ class GCSService:
|
|||
return await loop.run_in_executor(self.executor, _upload)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to upload file to GCS: {e}")
|
||||
raise HTTPException(status_code=500, detail="File upload failed")
|
||||
raise HTTPException(status_code=500, detail="File upload failed") from None
|
||||
|
||||
async def upload_text_to_gcs(
|
||||
self,
|
||||
|
|
@ -66,7 +76,7 @@ class GCSService:
|
|||
return await loop.run_in_executor(self.executor, _upload)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to upload text to GCS: {e}")
|
||||
raise HTTPException(status_code=500, detail="Text upload failed")
|
||||
raise HTTPException(status_code=500, detail="Text upload failed") from None
|
||||
|
||||
async def get_signed_url(
|
||||
self,
|
||||
|
|
@ -94,10 +104,26 @@ class GCSService:
|
|||
try:
|
||||
return await loop.run_in_executor(self.executor, _get_signed_url)
|
||||
except NotFound:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
raise HTTPException(status_code=404, detail="File not found") from None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate signed URL: {e}")
|
||||
raise HTTPException(status_code=500, detail="Failed to generate download URL")
|
||||
raise HTTPException(status_code=500, detail="Failed to generate download URL") from None
|
||||
|
||||
async def create_resumable_upload_session(self, blob_path: str, content_type: str) -> str:
|
||||
"""Create a GCS resumable upload session and return the session URI."""
|
||||
def _create():
|
||||
blob = self.bucket.blob(blob_path)
|
||||
return blob.create_resumable_upload_session(
|
||||
content_type=content_type,
|
||||
timeout=60,
|
||||
)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
try:
|
||||
return await loop.run_in_executor(self.executor, _create)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create resumable upload session: {e}")
|
||||
raise HTTPException(status_code=500, detail="Failed to initiate upload session") from None
|
||||
|
||||
async def delete_file(self, blob_path: str) -> bool:
|
||||
"""Delete a file from GCS"""
|
||||
|
|
@ -113,7 +139,7 @@ class GCSService:
|
|||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete file from GCS: {e}")
|
||||
raise HTTPException(status_code=500, detail="File deletion failed")
|
||||
raise HTTPException(status_code=500, detail="File deletion failed") from None
|
||||
|
||||
async def file_exists(self, blob_path: str) -> bool:
|
||||
"""Check if a file exists in GCS"""
|
||||
|
|
@ -125,6 +151,22 @@ class GCSService:
|
|||
return await loop.run_in_executor(self.executor, _exists)
|
||||
|
||||
|
||||
def gcs_path(job: "dict | object", *parts: str) -> str:
|
||||
"""Return a GCS object path rooted at the job's prefix.
|
||||
|
||||
Jobs created before MT-14 have no gcs_prefix and use bare job_id/ as the
|
||||
prefix. New jobs get prefix=orgs/{org_id}/jobs/{job_id}/.
|
||||
"""
|
||||
if isinstance(job, dict):
|
||||
prefix = job.get("gcs_prefix") or job["_id"]
|
||||
if not job.get("gcs_prefix"):
|
||||
prefix = job["_id"]
|
||||
else:
|
||||
prefix = getattr(job, "gcs_prefix", None) or getattr(job, "id", str(job))
|
||||
prefix = prefix.rstrip("/")
|
||||
return "/".join([prefix, *parts]) if parts else prefix
|
||||
|
||||
|
||||
# Global GCS service instance
|
||||
gcs_service = GCSService()
|
||||
|
||||
|
|
@ -141,6 +183,9 @@ async def upload_json_to_gcs(content: str, destination_path: str) -> str:
|
|||
async def get_signed_download_url(blob_path: str, expiration_hours: int = 24) -> str:
|
||||
return await gcs_service.get_signed_url(blob_path, expiration_hours)
|
||||
|
||||
async def create_resumable_upload_session(blob_path: str, content_type: str) -> str:
|
||||
return await gcs_service.create_resumable_upload_session(blob_path, content_type)
|
||||
|
||||
async def generate_signed_upload_url(
|
||||
blob_path: str,
|
||||
content_type: str,
|
||||
|
|
@ -149,7 +194,7 @@ async def generate_signed_upload_url(
|
|||
"""Generate a signed URL for direct browser-to-GCS upload"""
|
||||
def _generate():
|
||||
blob = gcs_service.bucket.blob(blob_path)
|
||||
|
||||
|
||||
# Generate signed POST URL
|
||||
url, fields = blob.generate_signed_post_policy_v4(
|
||||
expiration=timedelta(hours=1),
|
||||
|
|
@ -161,8 +206,8 @@ async def generate_signed_upload_url(
|
|||
"Content-Type": content_type
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
return {"url": url, "fields": fields}
|
||||
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(gcs_service.executor, _generate)
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
import json
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
from typing import Any
|
||||
|
||||
import google.genai as genai
|
||||
|
||||
|
|
@ -21,7 +21,7 @@ async def _record_gemini_usage(
|
|||
model: str,
|
||||
user_id: str,
|
||||
job_id: str,
|
||||
project_id: Optional[str],
|
||||
project_id: str | None,
|
||||
elapsed_ms: int,
|
||||
) -> None:
|
||||
try:
|
||||
|
|
@ -44,10 +44,39 @@ async def _record_gemini_usage(
|
|||
|
||||
|
||||
class GeminiService:
|
||||
_fallback_models: list[str] = [
|
||||
"gemini-3-flash-preview",
|
||||
"gemini-2.5-pro",
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
self.model_name = 'gemini-3-pro-preview' # Gemini 3 Pro preview model
|
||||
self.model_name = 'gemini-3.1-pro-preview'
|
||||
self.prompts_dir = Path(__file__).parent.parent / "prompts"
|
||||
|
||||
async def _generate(self, contents: Any, config: Any = None) -> tuple[Any, str]:
|
||||
"""Call generate_content, falling back on 429/503 transient errors. Returns (response, model_used)."""
|
||||
for model in [self.model_name, *self._fallback_models]:
|
||||
try:
|
||||
kw: dict[str, Any] = {"model": model, "contents": contents}
|
||||
if config is not None:
|
||||
kw["config"] = config
|
||||
response = await asyncio.to_thread(client.models.generate_content, **kw)
|
||||
if response.text is None:
|
||||
logger.warning(f"Model {model!r} returned empty response (safety block or overload), trying next fallback")
|
||||
last_exc: Exception = RuntimeError(f"Model {model!r} returned empty response")
|
||||
continue
|
||||
if model != self.model_name:
|
||||
logger.warning(f"Used fallback model {model!r} (primary unavailable)")
|
||||
return response, model
|
||||
except Exception as exc:
|
||||
msg = str(exc)
|
||||
if "429" in msg or "RESOURCE_EXHAUSTED" in msg or "503" in msg or "UNAVAILABLE" in msg:
|
||||
logger.warning(f"Model {model!r} unavailable, trying next fallback")
|
||||
last_exc = exc
|
||||
continue
|
||||
raise
|
||||
raise last_exc # noqa: F821 — set in loop above when all models exhausted
|
||||
|
||||
def _load_prompt(self, prompt_file: str) -> str:
|
||||
"""Load prompt template from prompts directory"""
|
||||
prompt_path = self.prompts_dir / prompt_file
|
||||
|
|
@ -61,31 +90,31 @@ class GeminiService:
|
|||
"""Wait for uploaded file to become ACTIVE state"""
|
||||
wait_time = 1 # Start with 1 second
|
||||
total_waited = 0
|
||||
|
||||
|
||||
while total_waited < max_wait_seconds:
|
||||
try:
|
||||
# Get file status - use asyncio.to_thread to avoid blocking event loop
|
||||
file_info = await asyncio.to_thread(client.files.get, name=file_name)
|
||||
logger.info(f"File {file_name} status: {file_info.state} (waited {total_waited}s)")
|
||||
|
||||
|
||||
if file_info.state == "ACTIVE":
|
||||
logger.info(f"File {file_name} is now ACTIVE!")
|
||||
return True
|
||||
elif file_info.state == "FAILED":
|
||||
logger.error(f"File {file_name} processing FAILED")
|
||||
return False
|
||||
|
||||
|
||||
# Wait with exponential backoff (max 30s)
|
||||
logger.info(f"File not ready, waiting {wait_time}s...")
|
||||
await asyncio.sleep(wait_time)
|
||||
total_waited += wait_time
|
||||
wait_time = min(wait_time * 1.5, 30) # Exponential backoff, max 30s
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking file status: {e}")
|
||||
await asyncio.sleep(5) # Wait 5s on error
|
||||
total_waited += 5
|
||||
|
||||
|
||||
logger.error(f"File {file_name} did not become ACTIVE within {max_wait_seconds}s")
|
||||
return False
|
||||
|
||||
|
|
@ -107,13 +136,25 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w
|
|||
- Maintain the same timestamp format as captions_vtt (HH:MM:SS.mmm --> HH:MM:SS.mmm)
|
||||
- Only add sound effect cues where they add meaningful context; do not annotate every minor sound"""
|
||||
|
||||
def _build_glossary_block(self, glossary_block: Optional[str]) -> str:
|
||||
def _build_glossary_block(self, glossary_block: str | None) -> str:
|
||||
"""Return the pre-built glossary block (from glossary_service.build_glossary_prompt_block), or empty string."""
|
||||
if glossary_block and glossary_block.strip():
|
||||
return glossary_block.strip()
|
||||
return ""
|
||||
|
||||
def _build_brand_context_block(self, brand_context: Optional[str]) -> str:
|
||||
def _build_source_has_ad_block(self, source_has_ad: bool) -> str:
|
||||
if source_has_ad:
|
||||
return (
|
||||
"SOURCE AUDIO DESCRIPTION NOTICE: This video already has professional audio descriptions "
|
||||
"embedded in its audio track. "
|
||||
"1) Return an empty audio_description_vtt containing only the WEBVTT header (\"WEBVTT\\n\") — do NOT generate new audio descriptions. "
|
||||
"2) For captions_vtt: transcribe ONLY the original program dialogue and relevant sound effects. "
|
||||
"Do NOT caption the audio description narration — AD narration is spoken during natural pauses "
|
||||
"and describes visual scenes rather than being part of the original dialogue."
|
||||
)
|
||||
return ""
|
||||
|
||||
def _build_brand_context_block(self, brand_context: str | None) -> str:
|
||||
"""Build the brand context instruction block for injection into prompts."""
|
||||
if brand_context and brand_context.strip():
|
||||
brands = [b.strip() for b in brand_context.split(",") if b.strip()]
|
||||
|
|
@ -125,7 +166,7 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w
|
|||
)
|
||||
return "No specific brand names have been provided for this video."
|
||||
|
||||
async def extract_accessibility(self, video_file_path: str, brand_context: Optional[str] = None, sdh_requested: bool = False, glossary_block: Optional[str] = None, _cost_ctx: Optional[dict] = None) -> dict[str, Any]:
|
||||
async def extract_accessibility(self, video_file_path: str, brand_context: str | None = None, sdh_requested: bool = False, glossary_block: str | None = None, source_has_ad: bool = False, _cost_ctx: dict | None = None) -> dict[str, Any]:
|
||||
"""
|
||||
Extract captions and audio descriptions from video using Gemini 2.0
|
||||
Returns structured JSON with transcript, captions VTT, and audio description VTT
|
||||
|
|
@ -137,12 +178,13 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w
|
|||
.replace("{GLOSSARY}", self._build_glossary_block(glossary_block))
|
||||
.replace("{SDH_FIELD}", self._build_sdh_field(sdh_requested))
|
||||
.replace("{SDH_GUIDELINES}", self._build_sdh_guidelines(sdh_requested))
|
||||
.replace("{SOURCE_HAS_AD}", self._build_source_has_ad_block(source_has_ad))
|
||||
)
|
||||
uploaded_file = None
|
||||
|
||||
try:
|
||||
logger.info(f"Starting Gemini processing for video: {video_file_path}")
|
||||
|
||||
|
||||
# Upload video file to Gemini using new API - use asyncio.to_thread to avoid blocking
|
||||
logger.info("Uploading video file to Gemini API...")
|
||||
uploaded_file = await asyncio.to_thread(
|
||||
|
|
@ -154,19 +196,17 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w
|
|||
}
|
||||
)
|
||||
logger.info(f"Successfully uploaded file: {uploaded_file.name} (URI: {uploaded_file.uri})")
|
||||
|
||||
|
||||
# Wait for file to become ACTIVE before using it
|
||||
logger.info("Waiting for file to become ACTIVE...")
|
||||
file_ready = await self._wait_for_file_active(uploaded_file.name)
|
||||
if not file_ready:
|
||||
raise Exception("File failed to become ACTIVE within timeout")
|
||||
|
||||
|
||||
# Generate content using new API - use asyncio.to_thread to avoid blocking
|
||||
logger.info("Generating content with Gemini model...")
|
||||
_t0 = time.monotonic()
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
response, _model_used = await self._generate(
|
||||
contents=[
|
||||
genai.types.Part.from_text(text=prompt),
|
||||
genai.types.Part.from_uri(
|
||||
|
|
@ -175,13 +215,13 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w
|
|||
)
|
||||
],
|
||||
config=genai.types.GenerateContentConfig(
|
||||
temperature=0.2, # Lower temperature for consistent, deterministic AD output
|
||||
temperature=0.2,
|
||||
top_p=0.8,
|
||||
top_k=40,
|
||||
),
|
||||
)
|
||||
if _cost_ctx:
|
||||
asyncio.create_task(_record_gemini_usage(response, self.model_name, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
asyncio.create_task(_record_gemini_usage(response, _model_used, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
|
||||
# Parse JSON response
|
||||
response_text = response.text.strip()
|
||||
|
|
@ -191,10 +231,10 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w
|
|||
if response_text.startswith("```json"):
|
||||
response_text = response_text.replace("```json", "").replace("```", "").strip()
|
||||
logger.info("Cleaned markdown formatting from response")
|
||||
|
||||
|
||||
# Additional cleanup for common JSON issues
|
||||
response_text = response_text.strip()
|
||||
|
||||
|
||||
logger.info("Parsing JSON response...")
|
||||
try:
|
||||
result = json.loads(response_text)
|
||||
|
|
@ -253,7 +293,7 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w
|
|||
async def _self_heal_response(self, video_file_path: str, invalid_response: str) -> dict[str, Any]:
|
||||
"""Attempt to self-heal invalid JSON response from Gemini"""
|
||||
logger.info("Attempting to self-heal JSON response without re-uploading video")
|
||||
|
||||
|
||||
# Try to fix common JSON issues first
|
||||
try:
|
||||
fixed_response = self._attempt_json_fix(invalid_response)
|
||||
|
|
@ -262,7 +302,7 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w
|
|||
return fixed_response
|
||||
except Exception as e:
|
||||
logger.warning(f"JSON fix attempt failed: {e}")
|
||||
|
||||
|
||||
# If simple fixes don't work, try a text-only self-heal prompt with more context
|
||||
self_heal_prompt = f"""
|
||||
SYSTEM: You are a JSON repair service. Fix the malformed JSON below and return ONLY the corrected JSON.
|
||||
|
|
@ -282,26 +322,24 @@ Fix the JSON and return it:
|
|||
"""
|
||||
|
||||
try:
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
response, _ = await self._generate(
|
||||
contents=[genai.types.Part.from_text(text=self_heal_prompt)]
|
||||
)
|
||||
|
||||
response_text = response.text.strip()
|
||||
|
||||
|
||||
# Handle potential markdown formatting
|
||||
if response_text.startswith("```json"):
|
||||
response_text = response_text.replace("```json", "").replace("```", "").strip()
|
||||
|
||||
result = json.loads(response_text)
|
||||
|
||||
|
||||
# Validate that all required fields are present after healing
|
||||
required_fields = [
|
||||
"language", "confidence", "summary",
|
||||
"transcript_plaintext", "captions_vtt", "audio_description_vtt"
|
||||
]
|
||||
|
||||
|
||||
missing_fields = [field for field in required_fields if field not in result]
|
||||
if missing_fields:
|
||||
logger.error(f"Self-heal lost required fields: {missing_fields}")
|
||||
|
|
@ -309,27 +347,27 @@ Fix the JSON and return it:
|
|||
if "audio_description_vtt" in missing_fields:
|
||||
logger.info("Creating fallback audio_description_vtt")
|
||||
result["audio_description_vtt"] = "WEBVTT\n\n00:00:00.000 --> 00:00:05.000\nVideo content with visual elements described."
|
||||
|
||||
|
||||
# If other critical fields are missing, raise an error
|
||||
remaining_missing = [f for f in missing_fields if f != "audio_description_vtt"]
|
||||
if remaining_missing:
|
||||
raise ValueError(f"Self-heal failed to preserve required fields: {remaining_missing}")
|
||||
|
||||
|
||||
logger.info("Successfully self-healed Gemini response with all required fields")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Self-heal attempt failed: {e}")
|
||||
raise ValueError("Failed to get valid JSON from Gemini after self-heal attempt")
|
||||
raise ValueError("Failed to get valid JSON from Gemini after self-heal attempt") from e
|
||||
|
||||
async def extract_accessibility_targeted(
|
||||
self,
|
||||
video_file_path: str,
|
||||
target_language: str,
|
||||
brand_context: Optional[str] = None,
|
||||
brand_context: str | None = None,
|
||||
sdh_requested: bool = False,
|
||||
glossary_block: Optional[str] = None,
|
||||
_cost_ctx: Optional[dict] = None,
|
||||
glossary_block: str | None = None,
|
||||
_cost_ctx: dict | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Extract captions and audio descriptions from video using Gemini,
|
||||
|
|
@ -384,9 +422,7 @@ Fix the JSON and return it:
|
|||
# Generate content using new API
|
||||
logger.info(f"Generating content with Gemini model for {target_language}...")
|
||||
_t0 = time.monotonic()
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
response, _model_used = await self._generate(
|
||||
contents=[
|
||||
genai.types.Part.from_text(text=prompt),
|
||||
genai.types.Part.from_uri(
|
||||
|
|
@ -396,7 +432,7 @@ Fix the JSON and return it:
|
|||
]
|
||||
)
|
||||
if _cost_ctx:
|
||||
asyncio.create_task(_record_gemini_usage(response, self.model_name, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
asyncio.create_task(_record_gemini_usage(response, _model_used, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
|
||||
# Parse JSON response
|
||||
response_text = response.text.strip()
|
||||
|
|
@ -499,9 +535,7 @@ Fix the JSON and return it:
|
|||
"""
|
||||
|
||||
try:
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
response, _ = await self._generate(
|
||||
contents=[genai.types.Part.from_text(text=self_heal_prompt)]
|
||||
)
|
||||
|
||||
|
|
@ -533,7 +567,7 @@ Fix the JSON and return it:
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"Self-heal attempt failed for {target_language}: {e}")
|
||||
raise ValueError(f"Failed to get valid JSON from Gemini targeted extraction for {target_language}")
|
||||
raise ValueError(f"Failed to get valid JSON from Gemini targeted extraction for {target_language}") from e
|
||||
|
||||
def _attempt_json_fix(self, json_text: str) -> dict[str, Any] | None:
|
||||
"""Attempt to fix common JSON syntax issues"""
|
||||
|
|
@ -541,17 +575,17 @@ Fix the JSON and return it:
|
|||
fixes_tried = []
|
||||
fixed_text = json_text
|
||||
import re
|
||||
|
||||
|
||||
# Fix 1: Remove trailing commas
|
||||
fixed_text = re.sub(r',(\s*[}\]])', r'\1', fixed_text)
|
||||
fixes_tried.append("removed trailing commas")
|
||||
|
||||
|
||||
# Fix 2: Try to fix unterminated strings by adding closing quote and brace
|
||||
if fixed_text.count('"') % 2 != 0: # Odd number of quotes suggests unterminated string
|
||||
# Find the last quote and see if we need to close the JSON
|
||||
last_quote_pos = fixed_text.rfind('"')
|
||||
remainder = fixed_text[last_quote_pos + 1:].strip()
|
||||
|
||||
|
||||
# If there's no closing brace after the last quote, try to fix it
|
||||
if remainder and not remainder.endswith('}'):
|
||||
# Try to intelligently close the JSON
|
||||
|
|
@ -562,27 +596,27 @@ Fix the JSON and return it:
|
|||
else:
|
||||
fixed_text += '"'
|
||||
fixes_tried.append("closed unterminated string")
|
||||
|
||||
|
||||
# Fix 3: Ensure JSON ends with closing brace
|
||||
if not fixed_text.rstrip().endswith('}'):
|
||||
fixed_text = fixed_text.rstrip() + '\n}'
|
||||
fixes_tried.append("added closing brace")
|
||||
|
||||
|
||||
try:
|
||||
result = json.loads(fixed_text)
|
||||
logger.info(f"JSON fixed with: {', '.join(fixes_tried)}")
|
||||
|
||||
|
||||
# Validate that we have the required fields
|
||||
required_fields = [
|
||||
"language", "confidence", "summary",
|
||||
"transcript_plaintext", "captions_vtt", "audio_description_vtt"
|
||||
]
|
||||
|
||||
|
||||
missing_fields = [field for field in required_fields if field not in result]
|
||||
if missing_fields:
|
||||
logger.warning(f"Fixed JSON is missing required fields: {missing_fields}")
|
||||
return None # Let the more advanced self-healing handle this
|
||||
|
||||
|
||||
return result
|
||||
except json.JSONDecodeError as e:
|
||||
logger.debug(f"JSON fix attempt failed: {e}")
|
||||
|
|
@ -658,9 +692,7 @@ Fix the JSON and return it:
|
|||
|
||||
# Generate content with video and prompt
|
||||
logger.info("Analyzing video with Gemini for accessible video placement...")
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
response, _ = await self._generate(
|
||||
contents=[
|
||||
genai.types.Part.from_text(text=prompt),
|
||||
genai.types.Part.from_uri(
|
||||
|
|
@ -742,9 +774,7 @@ Fix the JSON and return it:
|
|||
"""
|
||||
|
||||
try:
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
response, _ = await self._generate(
|
||||
contents=[genai.types.Part.from_text(text=self_heal_prompt)]
|
||||
)
|
||||
|
||||
|
|
@ -758,16 +788,16 @@ Fix the JSON and return it:
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"Self-heal attempt for accessible video analysis failed: {e}")
|
||||
raise ValueError("Failed to get valid JSON from accessible video analysis after self-heal")
|
||||
raise ValueError("Failed to get valid JSON from accessible video analysis after self-heal") from e
|
||||
|
||||
async def transcreate_content(
|
||||
self,
|
||||
captions_vtt: str,
|
||||
ad_vtt: str,
|
||||
target_language: str,
|
||||
brief: Optional[str] = None,
|
||||
glossary_block: Optional[str] = None,
|
||||
_cost_ctx: Optional[dict] = None,
|
||||
brief: str | None = None,
|
||||
glossary_block: str | None = None,
|
||||
_cost_ctx: dict | None = None,
|
||||
) -> dict[str, str]:
|
||||
"""
|
||||
Transcreate English VTT content to target language with cultural adaptation
|
||||
|
|
@ -792,15 +822,11 @@ JSON:
|
|||
|
||||
try:
|
||||
_t0 = time.monotonic()
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
contents=[
|
||||
genai.types.Part.from_text(text=prompt + "\n\n" + user_prompt)
|
||||
]
|
||||
response, _model_used = await self._generate(
|
||||
contents=[genai.types.Part.from_text(text=prompt + "\n\n" + user_prompt)]
|
||||
)
|
||||
if _cost_ctx:
|
||||
asyncio.create_task(_record_gemini_usage(response, self.model_name, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
asyncio.create_task(_record_gemini_usage(response, _model_used, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
|
||||
response_text = response.text.strip()
|
||||
|
||||
|
|
@ -819,7 +845,7 @@ JSON:
|
|||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse transcreation JSON response: {e}")
|
||||
raise ValueError("Invalid JSON response from transcreation")
|
||||
raise ValueError("Invalid JSON response from transcreation") from e
|
||||
except Exception as e:
|
||||
logger.error(f"Transcreation failed: {e}")
|
||||
raise
|
||||
|
|
@ -829,8 +855,9 @@ JSON:
|
|||
vtt_content: str,
|
||||
target_language: str,
|
||||
source_language: str = "en",
|
||||
glossary_block: Optional[str] = None,
|
||||
_cost_ctx: Optional[dict] = None,
|
||||
glossary_block: str | None = None,
|
||||
style: str = "literal",
|
||||
_cost_ctx: dict | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Translate VTT content using Gemini, preserving timing programmatically.
|
||||
|
|
@ -839,9 +866,10 @@ JSON:
|
|||
1. Send only the text cues (no timestamps) to Gemini as a numbered list
|
||||
2. Apply translated texts back onto the original VTT using translate_preserving_timing()
|
||||
|
||||
This avoids any possibility of Gemini drifting or altering timestamps.
|
||||
style="literal" — direct translation preserving meaning exactly
|
||||
style="transcreate" — culturally adapted but still returns EXACTLY N cues 1:1
|
||||
"""
|
||||
from ..lib.vtt import VTTParser, VTTEditor
|
||||
from ..lib.vtt import VTTEditor, VTTParser
|
||||
|
||||
source_cues = VTTParser.parse(vtt_content)
|
||||
if not source_cues:
|
||||
|
|
@ -850,6 +878,13 @@ JSON:
|
|||
|
||||
cue_count = len(source_cues)
|
||||
|
||||
_style_instruction = (
|
||||
"- Culturally adapt the text for {tgt} audiences (brand voice, natural phrasing), "
|
||||
"while keeping accessibility intent and line length (~32–40 chars)\n"
|
||||
if style == "transcreate"
|
||||
else ""
|
||||
)
|
||||
|
||||
async def _attempt_translation(extra_instruction: str = "") -> list[str]:
|
||||
numbered_texts = "\n".join(
|
||||
f"{i + 1}. {cue.text.replace(chr(10), ' ')}"
|
||||
|
|
@ -859,26 +894,29 @@ JSON:
|
|||
_tgt_label = locale_lib.get_gemini_label(target_language)
|
||||
_glossary_section = self._build_glossary_block(glossary_block)
|
||||
_glossary_line = f"\n\n{_glossary_section}" if _glossary_section else ""
|
||||
_glossary_req = (
|
||||
"\n- MUST use the exact approved terms from the glossary below — these override natural translation choices, even for English terms"
|
||||
if _glossary_section else ""
|
||||
)
|
||||
_adapt_line = _style_instruction.format(tgt=_tgt_label) if style == "transcreate" else ""
|
||||
prompt = f"""Translate the following {cue_count} numbered text segments from {_src_label} to {_tgt_label}.
|
||||
|
||||
REQUIREMENTS:
|
||||
- Return EXACTLY {cue_count} numbered lines, one translation per line
|
||||
- Format: "1. translated text", "2. translated text", etc.
|
||||
- Preserve speaker labels like [Speaker 1]: unchanged
|
||||
- Use natural, idiomatic {_tgt_label}
|
||||
- Do NOT add any explanation, preamble, or extra lines{extra_instruction}{_glossary_line}
|
||||
- {_adapt_line}Use natural, idiomatic {_tgt_label}
|
||||
- Do NOT add any explanation, preamble, or extra lines{extra_instruction}{_glossary_req}{_glossary_line}
|
||||
|
||||
Segments to translate:
|
||||
{numbered_texts}"""
|
||||
|
||||
_t0 = time.monotonic()
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
response, _model_used = await self._generate(
|
||||
contents=[genai.types.Part.from_text(text=prompt)]
|
||||
)
|
||||
if _cost_ctx:
|
||||
asyncio.create_task(_record_gemini_usage(response, self.model_name, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
asyncio.create_task(_record_gemini_usage(response, _model_used, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
return self._parse_numbered_translation(response.text.strip(), cue_count)
|
||||
|
||||
try:
|
||||
|
|
@ -939,7 +977,7 @@ Segments to translate:
|
|||
self,
|
||||
original_text: str,
|
||||
language: str = "en",
|
||||
_cost_ctx: Optional[dict] = None,
|
||||
_cost_ctx: dict | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Rewrite an audio description cue to be TTS-friendly.
|
||||
|
|
@ -965,13 +1003,11 @@ Segments to translate:
|
|||
logger.info(f"Rewriting TTS cue for safety: '{original_text[:50]}...'")
|
||||
|
||||
_t0 = time.monotonic()
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
response, _model_used = await self._generate(
|
||||
contents=[genai.types.Part.from_text(text=prompt)]
|
||||
)
|
||||
if _cost_ctx:
|
||||
asyncio.create_task(_record_gemini_usage(response, self.model_name, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
asyncio.create_task(_record_gemini_usage(response, _model_used, _cost_ctx.get("user_id", "system"), _cost_ctx.get("job_id", ""), _cost_ctx.get("project_id"), int((time.monotonic() - _t0) * 1000)))
|
||||
|
||||
result = response.text.strip()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
import io
|
||||
import wave
|
||||
import re
|
||||
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
from google.cloud import texttospeech
|
||||
from pydub import AudioSegment
|
||||
|
||||
from ..core.config import settings
|
||||
|
|
@ -23,14 +22,26 @@ class TTSSynthesisError(Exception):
|
|||
|
||||
|
||||
class GeminiTTSService:
|
||||
"""Text-to-Speech service using Gemini TTS API"""
|
||||
"""Text-to-Speech service using Google Cloud Text-to-Speech API with Gemini models."""
|
||||
|
||||
def __init__(self):
|
||||
self.client = genai.Client(api_key=settings.gemini_api_key)
|
||||
self.client = texttospeech.TextToSpeechClient()
|
||||
self.model = settings.gemini_tts_model
|
||||
self.default_voice = settings.gemini_tts_default_voice
|
||||
logger.info(f"Gemini TTS service initialized with model: {self.model}")
|
||||
|
||||
@staticmethod
|
||||
def _extract_retry_after(error: Exception) -> float | None:
|
||||
"""Return seconds to wait from a Google API 429 retryDelay, or None."""
|
||||
msg = str(error)
|
||||
m = re.search(r"retry in ([0-9.]+)s", msg, re.IGNORECASE)
|
||||
if m:
|
||||
return float(m.group(1)) + 5
|
||||
m = re.search(r"'retryDelay':\s*'([0-9.]+)s'", msg)
|
||||
if m:
|
||||
return float(m.group(1)) + 5
|
||||
return None
|
||||
|
||||
async def synthesize_text(
|
||||
self,
|
||||
text: str,
|
||||
|
|
@ -41,117 +52,56 @@ class GeminiTTSService:
|
|||
style_prompt: str = ""
|
||||
) -> bytes:
|
||||
"""
|
||||
Synthesize text to audio using Gemini TTS.
|
||||
Returns MP3 audio bytes.
|
||||
Synthesize text to MP3 using Google Cloud TTS with Gemini model.
|
||||
|
||||
Args:
|
||||
text: The text to synthesize
|
||||
voice_name: Name of the voice to use
|
||||
language: Language code (e.g., "en", "es")
|
||||
model: Model variant - "flash" (fast) or "pro" (quality)
|
||||
speed: Speech rate multiplier (0.5 to 2.0)
|
||||
style_prompt: Style instructions to prepend (e.g., "Speak calmly...")
|
||||
voice_name: Gemini voice name (e.g. "Kore", "Puck")
|
||||
language: Language code (e.g. "en", "en-US", "fr")
|
||||
model: Model variant key — "flash" or "pro"
|
||||
speed: Speech rate multiplier (0.25–4.0)
|
||||
style_prompt: Natural-language style instruction sent as prompt
|
||||
"""
|
||||
if not text.strip():
|
||||
raise ValueError("Text cannot be empty")
|
||||
|
||||
# Validate voice
|
||||
if voice_name not in settings.gemini_tts_voices:
|
||||
logger.warning(f"Unknown voice '{voice_name}', using default '{self.default_voice}'")
|
||||
voice_name = self.default_voice
|
||||
|
||||
# Select model from config
|
||||
model_id = settings.gemini_tts_models.get(model, settings.gemini_tts_model)
|
||||
|
||||
# Build the full prompt with style and speed instructions
|
||||
prompt_parts = []
|
||||
|
||||
# Add style prompt if provided
|
||||
if style_prompt:
|
||||
prompt_parts.append(style_prompt)
|
||||
|
||||
# Add speed instruction if not default
|
||||
if speed != 1.0:
|
||||
speed_pct = int(speed * 100)
|
||||
if speed < 1.0:
|
||||
prompt_parts.append(f"Speak slowly at approximately {speed_pct}% of normal speed. ")
|
||||
else:
|
||||
prompt_parts.append(f"Speak quickly at approximately {speed_pct}% of normal speed. ")
|
||||
|
||||
# Combine prompts with actual text
|
||||
full_text = "".join(prompt_parts) + text
|
||||
language_code = locale_lib.get_tts_lang(language)
|
||||
|
||||
try:
|
||||
# Generate audio using Gemini TTS
|
||||
response = self.client.models.generate_content(
|
||||
model=model_id,
|
||||
contents=full_text,
|
||||
config=types.GenerateContentConfig(
|
||||
response_modalities=["AUDIO"],
|
||||
speech_config=types.SpeechConfig(
|
||||
voice_config=types.VoiceConfig(
|
||||
prebuilt_voice_config=types.PrebuiltVoiceConfig(
|
||||
voice_name=voice_name,
|
||||
)
|
||||
)
|
||||
),
|
||||
synthesis_input = texttospeech.SynthesisInput(text=text)
|
||||
if style_prompt:
|
||||
synthesis_input = texttospeech.SynthesisInput(
|
||||
text=text,
|
||||
prompt=style_prompt,
|
||||
)
|
||||
|
||||
response = self.client.synthesize_speech(
|
||||
input=synthesis_input,
|
||||
voice=texttospeech.VoiceSelectionParams(
|
||||
language_code=language_code,
|
||||
name=voice_name,
|
||||
model_name=model_id,
|
||||
),
|
||||
audio_config=texttospeech.AudioConfig(
|
||||
audio_encoding=texttospeech.AudioEncoding.MP3,
|
||||
speaking_rate=speed,
|
||||
),
|
||||
)
|
||||
|
||||
# Extract PCM audio data from response with proper null-safe checks
|
||||
if not response.candidates:
|
||||
logger.error(
|
||||
f"Gemini TTS response missing candidates. "
|
||||
f"Response type: {type(response)}, Response: {response}"
|
||||
)
|
||||
raise ValueError("No candidates in Gemini TTS response")
|
||||
if not response.audio_content:
|
||||
raise ValueError("Empty audio content in Cloud TTS response")
|
||||
|
||||
candidate = response.candidates[0]
|
||||
|
||||
if candidate.content is None:
|
||||
logger.error(
|
||||
f"Gemini TTS candidate has no content. "
|
||||
f"Finish reason: {getattr(candidate, 'finish_reason', 'unknown')}, "
|
||||
f"Safety ratings: {getattr(candidate, 'safety_ratings', 'unknown')}"
|
||||
)
|
||||
raise ValueError(
|
||||
f"Candidate content is None in Gemini TTS response. "
|
||||
f"Finish reason: {getattr(candidate, 'finish_reason', 'unknown')}"
|
||||
)
|
||||
|
||||
if not candidate.content.parts:
|
||||
logger.error(
|
||||
f"Gemini TTS content has no parts. "
|
||||
f"Content role: {getattr(candidate.content, 'role', 'unknown')}"
|
||||
)
|
||||
raise ValueError("No parts in Gemini TTS response content")
|
||||
|
||||
part = candidate.content.parts[0]
|
||||
if not hasattr(part, 'inline_data') or part.inline_data is None:
|
||||
logger.error(
|
||||
f"Gemini TTS part missing inline_data. "
|
||||
f"Part type: {type(part)}, Part: {part}"
|
||||
)
|
||||
raise ValueError("No inline_data in Gemini TTS response part")
|
||||
|
||||
pcm_data = part.inline_data.data
|
||||
|
||||
# Convert PCM to MP3
|
||||
mp3_data = self._pcm_to_mp3(pcm_data)
|
||||
|
||||
return mp3_data
|
||||
return response.audio_content
|
||||
|
||||
except Exception as e:
|
||||
# Log comprehensive error information for debugging
|
||||
error_context = {
|
||||
"text_length": len(text),
|
||||
"text_preview": text[:100] + "..." if len(text) > 100 else text,
|
||||
"voice_name": voice_name,
|
||||
"language": language,
|
||||
"model_id": model_id,
|
||||
}
|
||||
logger.error(
|
||||
f"Gemini TTS synthesis failed: {e}. Context: {error_context}"
|
||||
f"Gemini TTS synthesis failed: {e}. "
|
||||
f"text_len={len(text)}, voice={voice_name}, model={model_id}, lang={language_code}"
|
||||
)
|
||||
raise
|
||||
|
||||
|
|
@ -163,23 +113,18 @@ class GeminiTTSService:
|
|||
speed: float = 1.0,
|
||||
style_prompt: str = ""
|
||||
) -> bytes:
|
||||
"""
|
||||
Generate a preview audio sample for voice selection.
|
||||
Uses language-specific sample text and applies all TTS settings.
|
||||
"""
|
||||
# Get preview sample text — try settings override, then locale registry, then fallback
|
||||
"""Generate a preview audio sample for voice selection."""
|
||||
sample_text = (
|
||||
settings.gemini_tts_preview_samples.get(language)
|
||||
or locale_lib.get_preview_sample(language)
|
||||
)
|
||||
|
||||
return await self.synthesize_text(
|
||||
sample_text,
|
||||
voice_name,
|
||||
language,
|
||||
model=model,
|
||||
speed=speed,
|
||||
style_prompt=style_prompt
|
||||
style_prompt=style_prompt,
|
||||
)
|
||||
|
||||
async def _synthesize_cue_with_retry(
|
||||
|
|
@ -194,26 +139,7 @@ class GeminiTTSService:
|
|||
max_attempts: int = 3,
|
||||
base_delay: float = 1.0
|
||||
) -> bytes:
|
||||
"""
|
||||
Synthesize a single cue with exponential backoff retry.
|
||||
|
||||
Args:
|
||||
cue_index: Index of the cue (for error reporting)
|
||||
text: Text to synthesize
|
||||
voice_name: TTS voice name
|
||||
language: Language code
|
||||
model: Model variant
|
||||
speed: Speech rate
|
||||
style_prompt: Style instructions
|
||||
max_attempts: Total attempts (1 initial + retries)
|
||||
base_delay: Base delay in seconds for backoff
|
||||
|
||||
Returns:
|
||||
MP3 audio bytes
|
||||
|
||||
Raises:
|
||||
TTSSynthesisError: If all attempts fail
|
||||
"""
|
||||
"""Synthesize a single cue with retry, honouring API-provided retryDelay on 429."""
|
||||
import asyncio
|
||||
import random
|
||||
|
||||
|
|
@ -228,32 +154,31 @@ class GeminiTTSService:
|
|||
language,
|
||||
model=model,
|
||||
speed=speed,
|
||||
style_prompt=style_prompt
|
||||
style_prompt=style_prompt,
|
||||
)
|
||||
except Exception as e:
|
||||
last_exception = e
|
||||
api_response_info = str(e)
|
||||
|
||||
if attempt < max_attempts - 1:
|
||||
# Exponential backoff with jitter
|
||||
delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
|
||||
api_delay = self._extract_retry_after(e)
|
||||
delay = api_delay if api_delay else base_delay * (2 ** attempt) + random.uniform(0, 1)
|
||||
logger.warning(
|
||||
f"TTS synthesis attempt {attempt + 1}/{max_attempts} failed for cue {cue_index}. "
|
||||
f"TTS attempt {attempt + 1}/{max_attempts} failed for cue {cue_index}. "
|
||||
f"Retrying in {delay:.2f}s. Error: {e}"
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
else:
|
||||
logger.error(
|
||||
f"TTS synthesis FAILED after {max_attempts} attempts for cue {cue_index}. "
|
||||
f"Text: {text[:50]}{'...' if len(text) > 50 else ''}. Error: {e}"
|
||||
f"TTS FAILED after {max_attempts} attempts for cue {cue_index}. "
|
||||
f"text='{text[:50]}{'...' if len(text) > 50 else ''}'. Error: {e}"
|
||||
)
|
||||
|
||||
# All retries exhausted - raise hard failure
|
||||
raise TTSSynthesisError(
|
||||
message=f"TTS synthesis failed after {max_attempts} attempts: {last_exception}",
|
||||
cue_index=cue_index,
|
||||
cue_text=text,
|
||||
api_response_info=api_response_info
|
||||
api_response_info=api_response_info,
|
||||
)
|
||||
|
||||
async def synthesize_audio_description(
|
||||
|
|
@ -268,56 +193,38 @@ class GeminiTTSService:
|
|||
"""
|
||||
Synthesize full audio description from VTT content.
|
||||
Maintains timing alignment with original VTT cues.
|
||||
|
||||
Args:
|
||||
ad_vtt_content: VTT content with audio description cues
|
||||
language: Language code (e.g., "en", "es")
|
||||
voice_name: Name of the voice to use (defaults to service default)
|
||||
model: Model variant - "flash" (fast) or "pro" (quality)
|
||||
speed: Speech rate multiplier (0.5 to 2.0)
|
||||
style_prompt: Style instructions to prepend to each cue
|
||||
"""
|
||||
if voice_name is None:
|
||||
voice_name = self.default_voice
|
||||
|
||||
# Validate voice
|
||||
if voice_name not in settings.gemini_tts_voices:
|
||||
logger.warning(f"Unknown voice '{voice_name}', using default '{self.default_voice}'")
|
||||
voice_name = self.default_voice
|
||||
|
||||
# Parse VTT cues
|
||||
cues = self._parse_ad_cues(ad_vtt_content)
|
||||
|
||||
if not cues:
|
||||
raise ValueError("No audio description cues found in VTT content")
|
||||
|
||||
logger.info(
|
||||
f"Synthesizing {len(cues)} audio description cues with voice '{voice_name}', "
|
||||
f"model '{model}', speed {speed}x"
|
||||
f"Synthesizing {len(cues)} AD cues: voice='{voice_name}', model='{model}', speed={speed}x"
|
||||
)
|
||||
|
||||
# Synthesize each cue with precise timing anchoring
|
||||
audio_segments = []
|
||||
current_audio_position = 0.0
|
||||
|
||||
for i, cue in enumerate(cues):
|
||||
target_start_time = cue["start_time"]
|
||||
|
||||
# Add silence to reach the exact VTT start time
|
||||
if target_start_time > current_audio_position:
|
||||
silence_duration = target_start_time - current_audio_position
|
||||
silence = AudioSegment.silent(duration=int(silence_duration * 1000))
|
||||
audio_segments.append(silence)
|
||||
audio_segments.append(AudioSegment.silent(duration=int(silence_duration * 1000)))
|
||||
current_audio_position = target_start_time
|
||||
|
||||
# Synthesize this cue's text
|
||||
text = cue["text"].strip()
|
||||
if text:
|
||||
# Ensure proper punctuation for natural TTS flow
|
||||
if not text.endswith(('.', '!', '?')):
|
||||
text += "."
|
||||
|
||||
# Use retry helper - will raise TTSSynthesisError on failure after retries
|
||||
audio_data = await self._synthesize_cue_with_retry(
|
||||
cue_index=i,
|
||||
text=text,
|
||||
|
|
@ -327,115 +234,62 @@ class GeminiTTSService:
|
|||
speed=speed,
|
||||
style_prompt=style_prompt,
|
||||
max_attempts=3,
|
||||
base_delay=1.0
|
||||
base_delay=1.0,
|
||||
)
|
||||
|
||||
# Convert to AudioSegment and get actual duration
|
||||
audio_segment = AudioSegment.from_file(io.BytesIO(audio_data), format="mp3")
|
||||
audio_segments.append(audio_segment)
|
||||
current_audio_position += len(audio_segment) / 1000.0
|
||||
|
||||
# Update position based on actual audio duration
|
||||
actual_audio_duration = len(audio_segment) / 1000.0
|
||||
current_audio_position += actual_audio_duration
|
||||
|
||||
# Combine all segments
|
||||
if audio_segments:
|
||||
final_audio = sum(audio_segments, AudioSegment.empty())
|
||||
else:
|
||||
final_audio = AudioSegment.silent(duration=1000)
|
||||
|
||||
# Export to MP3
|
||||
final_audio = sum(audio_segments, AudioSegment.empty()) if audio_segments else AudioSegment.silent(duration=1000)
|
||||
output_buffer = io.BytesIO()
|
||||
final_audio.export(output_buffer, format="mp3", bitrate="128k")
|
||||
|
||||
logger.info(f"Audio description synthesized: {len(output_buffer.getvalue())} bytes")
|
||||
return output_buffer.getvalue()
|
||||
|
||||
def _pcm_to_mp3(self, pcm_data: bytes) -> bytes:
|
||||
"""
|
||||
Convert raw PCM audio (24kHz, 16-bit, mono) to MP3.
|
||||
Gemini TTS outputs PCM at 24000 Hz sample rate.
|
||||
"""
|
||||
# Create WAV from PCM data
|
||||
wav_buffer = io.BytesIO()
|
||||
with wave.open(wav_buffer, "wb") as wf:
|
||||
wf.setnchannels(1) # Mono
|
||||
wf.setsampwidth(2) # 16-bit (2 bytes)
|
||||
wf.setframerate(24000) # 24kHz
|
||||
wf.writeframes(pcm_data)
|
||||
|
||||
# Convert WAV to MP3 using pydub
|
||||
wav_buffer.seek(0)
|
||||
audio_segment = AudioSegment.from_wav(wav_buffer)
|
||||
|
||||
# Export as MP3
|
||||
mp3_buffer = io.BytesIO()
|
||||
audio_segment.export(mp3_buffer, format="mp3", bitrate="128k")
|
||||
|
||||
return mp3_buffer.getvalue()
|
||||
|
||||
def _parse_ad_cues(self, vtt_content: str) -> list[dict]:
|
||||
"""Parse audio description VTT and extract timing + text"""
|
||||
"""Parse audio description VTT and extract timing + text."""
|
||||
lines = vtt_content.strip().split('\n')
|
||||
cues = []
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
# Skip header and empty lines
|
||||
if line == "WEBVTT" or line == "" or line.startswith("NOTE"):
|
||||
if line in ("WEBVTT", "") or line.startswith("NOTE"):
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Check for timing line
|
||||
if " --> " in line:
|
||||
timing_parts = line.split(" --> ")
|
||||
start_time = self._parse_timestamp(timing_parts[0].strip())
|
||||
end_time = self._parse_timestamp(timing_parts[1].strip())
|
||||
|
||||
# Get text from next line(s)
|
||||
i += 1
|
||||
text_lines = []
|
||||
while i < len(lines) and lines[i].strip() != "":
|
||||
while i < len(lines) and lines[i].strip():
|
||||
text_lines.append(lines[i].strip())
|
||||
i += 1
|
||||
|
||||
if text_lines:
|
||||
cues.append({
|
||||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
"text": " ".join(text_lines)
|
||||
})
|
||||
cues.append({"start_time": start_time, "end_time": end_time, "text": " ".join(text_lines)})
|
||||
else:
|
||||
i += 1
|
||||
|
||||
return cues
|
||||
|
||||
def _parse_timestamp(self, timestamp: str) -> float:
|
||||
"""Convert VTT timestamp to seconds"""
|
||||
"""Convert VTT timestamp to seconds."""
|
||||
parts = timestamp.split(":")
|
||||
|
||||
if len(parts) == 3: # HH:MM:SS.mmm
|
||||
if len(parts) == 3:
|
||||
hours, minutes, seconds = parts
|
||||
elif len(parts) == 2: # MM:SS.mmm
|
||||
elif len(parts) == 2:
|
||||
hours, minutes, seconds = "0", parts[0], parts[1]
|
||||
else:
|
||||
raise ValueError(f"Invalid timestamp format: {timestamp}")
|
||||
|
||||
sec_parts = seconds.split(".")
|
||||
seconds_val = int(sec_parts[0])
|
||||
milliseconds = int(sec_parts[1]) if len(sec_parts) > 1 else 0
|
||||
|
||||
total_seconds = (
|
||||
int(hours) * 3600 +
|
||||
int(minutes) * 60 +
|
||||
seconds_val +
|
||||
milliseconds / 1000.0
|
||||
return (
|
||||
int(hours) * 3600
|
||||
+ int(minutes) * 60
|
||||
+ int(sec_parts[0])
|
||||
+ (int(sec_parts[1]) / 1000.0 if len(sec_parts) > 1 else 0)
|
||||
)
|
||||
|
||||
return total_seconds
|
||||
|
||||
|
||||
# Global service instance
|
||||
gemini_tts_service = GeminiTTSService()
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ from ..models.glossary import (
|
|||
EmbeddingStatus,
|
||||
Glossary,
|
||||
GlossaryStatus,
|
||||
GlossaryTerm,
|
||||
GlossaryVersion,
|
||||
MatchedTerm,
|
||||
glossary_from_doc,
|
||||
|
|
@ -335,12 +334,24 @@ async def activate_version(glossary_id: str, version_id: str) -> None:
|
|||
|
||||
|
||||
async def archive_glossary(glossary_id: str) -> None:
|
||||
"""Hard-delete the glossary and all its versions and terms."""
|
||||
db = await get_database()
|
||||
await db[_COLL_GLOSSARIES].update_one(
|
||||
{"_id": ObjectId(glossary_id)},
|
||||
{"$set": {"status": GlossaryStatus.ARCHIVED.value}},
|
||||
)
|
||||
|
||||
versions = await db[_COLL_VERSIONS].find(
|
||||
{"glossary_id": glossary_id}, {"_id": 1}
|
||||
).to_list(length=None)
|
||||
version_ids = [str(v["_id"]) for v in versions]
|
||||
|
||||
if version_ids:
|
||||
terms_result = await db[_COLL_TERMS].delete_many({"version_id": {"$in": version_ids}})
|
||||
logger.info(f"Deleted {terms_result.deleted_count} terms for glossary {glossary_id}")
|
||||
|
||||
await db[_COLL_VERSIONS].delete_many({"glossary_id": glossary_id})
|
||||
logger.info(f"Deleted {len(version_ids)} versions for glossary {glossary_id}")
|
||||
|
||||
await db[_COLL_GLOSSARIES].delete_one({"_id": ObjectId(glossary_id)})
|
||||
await _invalidate_cache(glossary_id)
|
||||
logger.info(f"Deleted glossary {glossary_id}")
|
||||
|
||||
|
||||
# ── Retrieval ─────────────────────────────────────────────────────────────────
|
||||
|
|
@ -454,8 +465,11 @@ async def _exact_match(
|
|||
# Build automaton
|
||||
automaton = ahocorasick.Automaton()
|
||||
for doc in terms:
|
||||
stl = doc["source_term_lower"]
|
||||
automaton.add_word(stl, (doc["source_term"], doc["translations"]))
|
||||
stl = doc.get("source_term_lower") or doc.get("source_term", "")
|
||||
if stl:
|
||||
automaton.add_word(stl.lower(), (doc["source_term"], doc.get("translations", {})))
|
||||
if not automaton:
|
||||
return []
|
||||
automaton.make_automaton()
|
||||
|
||||
text_lower = text.lower()
|
||||
|
|
@ -545,18 +559,26 @@ async def _vector_match(
|
|||
|
||||
|
||||
def _get_translation(translations: dict[str, str], target_locale: str) -> str | None:
|
||||
"""Look up a translation with locale-fallback: fr-CA → fr-FR → fr → None."""
|
||||
if not translations:
|
||||
"""Look up a translation with locale-fallback.
|
||||
|
||||
Specific → bare: fr-CA → fr-FR siblings → fr
|
||||
Bare → specific: fr → fr-FR, fr-CA (first match)
|
||||
"""
|
||||
if not translations or not target_locale:
|
||||
return None
|
||||
if target_locale in translations:
|
||||
return translations[target_locale]
|
||||
# Try parent language
|
||||
parent = target_locale.split("-")[0] if "-" in target_locale else None
|
||||
if parent:
|
||||
# Try sibling locales, e.g. fr-CA not found → try fr-FR
|
||||
if "-" in target_locale:
|
||||
# Specific locale: try sibling regions and bare parent (fr-CA → fr-FR → fr)
|
||||
parent = target_locale.split("-")[0]
|
||||
for code, text in translations.items():
|
||||
if code.startswith(parent + "-") or code == parent:
|
||||
return text
|
||||
else:
|
||||
# Bare code (fr): try any fr-* region variant stored in the glossary
|
||||
for code, text in translations.items():
|
||||
if code == target_locale or code.startswith(target_locale + "-"):
|
||||
return text
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -630,28 +652,35 @@ async def get_glossary_block_for_job(
|
|||
Non-fatal: any failure returns "" so the pipeline continues without a glossary.
|
||||
"""
|
||||
try:
|
||||
job_id_for_log = job_doc.get("_id", "unknown")
|
||||
project_id = job_doc.get("project_id")
|
||||
if not project_id:
|
||||
logger.debug(f"Glossary skip job={job_id_for_log}: no project_id")
|
||||
return ""
|
||||
|
||||
project = await db.projects.find_one({"_id": project_id})
|
||||
if not project:
|
||||
logger.warning(f"Glossary skip job={job_id_for_log}: project {project_id!r} not found")
|
||||
return ""
|
||||
|
||||
client_id = project.get("client_id")
|
||||
if not client_id:
|
||||
logger.debug(f"Glossary skip job={job_id_for_log}: project has no client_id")
|
||||
return ""
|
||||
|
||||
# Get active version id via our cache-backed helper (reuses Redis if available)
|
||||
active_version_id = await _get_active_version_id(client_id)
|
||||
if not active_version_id:
|
||||
logger.debug(f"Glossary skip job={job_id_for_log}: no active glossary for client {client_id!r}")
|
||||
return ""
|
||||
|
||||
# Combine source VTT texts for matching
|
||||
source_text = job_doc.get("_glossary_source_text", "")
|
||||
if not source_text:
|
||||
logger.debug(f"Glossary skip job={job_id_for_log}: no source text provided for matching")
|
||||
return ""
|
||||
|
||||
logger.info(f"Glossary lookup job={job_id_for_log} client={client_id!r} version={active_version_id!r} locale={target_locale!r}")
|
||||
norm_target = locale_lib.normalize_code(target_locale)
|
||||
exact_matches = await _exact_match(db, active_version_id, source_text, norm_target)
|
||||
|
||||
|
|
@ -676,7 +705,8 @@ async def get_glossary_block_for_job(
|
|||
return build_glossary_prompt_block(combined, target_locale)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Glossary lookup failed for job {job_doc.get('_id')} (non-fatal): {e}")
|
||||
import traceback
|
||||
logger.warning(f"Glossary lookup failed for job {job_doc.get('_id')} (non-fatal): {e}\n{traceback.format_exc()}")
|
||||
return ""
|
||||
|
||||
|
||||
|
|
@ -698,6 +728,17 @@ async def get_glossary(glossary_id: str) -> Glossary | None:
|
|||
return glossary_from_doc(doc) if doc else None
|
||||
|
||||
|
||||
async def get_versions_by_ids(version_ids: list[str]) -> dict[str, GlossaryVersion]:
|
||||
"""Batch-fetch versions by ID, returns {version_id: GlossaryVersion}."""
|
||||
if not version_ids:
|
||||
return {}
|
||||
db = await get_database()
|
||||
docs = await db[_COLL_VERSIONS].find(
|
||||
{"_id": {"$in": [ObjectId(vid) for vid in version_ids]}}
|
||||
).to_list(length=len(version_ids))
|
||||
return {str(d["_id"]): glossary_version_from_doc(d) for d in docs}
|
||||
|
||||
|
||||
async def get_versions(glossary_id: str) -> list[GlossaryVersion]:
|
||||
db = await get_database()
|
||||
cursor = db[_COLL_VERSIONS].find(
|
||||
|
|
|
|||
|
|
@ -2,10 +2,10 @@
|
|||
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from typing import Any, Optional
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from fastapi import HTTPException
|
||||
from fastapi import HTTPException, status
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
|
||||
from ..core.logging import get_logger
|
||||
|
|
@ -26,6 +26,32 @@ logger = get_logger(__name__)
|
|||
_JOBS = "jobs"
|
||||
|
||||
|
||||
async def _assert_user_in_job_org(
|
||||
db: AsyncIOMotorDatabase,
|
||||
user_id: str,
|
||||
job_doc: dict,
|
||||
) -> None:
|
||||
"""Raise 403 if user_id is not a member of the job's organization."""
|
||||
org_id = job_doc.get("organization_id")
|
||||
if not org_id:
|
||||
project_id = job_doc.get("project_id")
|
||||
if project_id:
|
||||
project = await db.projects.find_one({"_id": project_id}, {"client_id": 1})
|
||||
if project:
|
||||
org_id = project.get("client_id")
|
||||
if not org_id:
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail="Job is not bound to an organization; cannot validate cross-org assignment",
|
||||
)
|
||||
mem = await db.memberships.find_one({"user_id": user_id, "organization_id": org_id})
|
||||
if not mem:
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="Assignee is not a member of this job's organization",
|
||||
)
|
||||
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
def _job_languages(job_doc: dict) -> list[str]:
|
||||
|
|
@ -72,13 +98,13 @@ def _rebuild_qc_assignments(language_qc: dict) -> list[dict]:
|
|||
def _qc_recipients(
|
||||
job_doc: dict,
|
||||
lang_state: dict,
|
||||
exclude_user_id: Optional[str],
|
||||
exclude_user_id: str | None,
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Return [(email, full_name)] for linguist + reviewer assigned to a language, minus the actor."""
|
||||
seen: set[str] = set()
|
||||
result: list[tuple[str, str]] = []
|
||||
|
||||
def _add(email: Optional[str], name: Optional[str]) -> None:
|
||||
def _add(email: str | None, name: str | None) -> None:
|
||||
if email and email not in seen and email != exclude_user_id:
|
||||
seen.add(email)
|
||||
result.append((email, name or email.split("@")[0]))
|
||||
|
|
@ -90,13 +116,73 @@ def _qc_recipients(
|
|||
|
||||
def _deep_link(job_id: str, lang: str) -> str:
|
||||
from ..core.config import settings
|
||||
base = getattr(settings, "app_url", "https://ai-sandbox.oliver.solutions/video-accessibility")
|
||||
return f"{base}/admin/qc/{job_id}#lang-{lang}"
|
||||
return f"{settings.app_url}/admin/qc/{job_id}#lang-{lang}"
|
||||
|
||||
|
||||
# ── Auto-assignment ───────────────────────────────────────────────────────────
|
||||
|
||||
async def auto_assign_defaults(db: AsyncIOMotorDatabase, job_id: str) -> int:
|
||||
"""Apply job.initial_linguist_id / initial_reviewer_id to all unassigned languages.
|
||||
|
||||
Called lazily when the language-QC map is first fetched in PENDING_QC state,
|
||||
so PM assignments made at job-creation time take effect without touching Celery tasks.
|
||||
Returns the number of languages updated.
|
||||
"""
|
||||
job_doc = await db[_JOBS].find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
return 0
|
||||
|
||||
linguist_id: str | None = job_doc.get("initial_linguist_id")
|
||||
reviewer_id: str | None = job_doc.get("initial_reviewer_id")
|
||||
if not linguist_id and not reviewer_id:
|
||||
return 0
|
||||
|
||||
languages: list[str] = (job_doc.get("requested_outputs") or {}).get("languages") or []
|
||||
if not languages:
|
||||
return 0
|
||||
|
||||
linguist_doc = await db.users.find_one({"_id": linguist_id}) if linguist_id else None
|
||||
reviewer_doc = await db.users.find_one({"_id": reviewer_id}) if reviewer_id else None
|
||||
|
||||
now = datetime.utcnow()
|
||||
updated = 0
|
||||
current_qc: dict = job_doc.get("language_qc") or {}
|
||||
|
||||
for lang in languages:
|
||||
lang_state: dict = current_qc.get(lang) or {}
|
||||
already_assigned = bool(lang_state.get("assigned_linguist_id"))
|
||||
if already_assigned:
|
||||
continue
|
||||
|
||||
patch: dict = {}
|
||||
if linguist_doc:
|
||||
patch.update({
|
||||
f"language_qc.{lang}.assigned_linguist_id": linguist_id,
|
||||
f"language_qc.{lang}.assigned_linguist_email": linguist_doc["email"],
|
||||
f"language_qc.{lang}.assigned_linguist_name": linguist_doc.get("full_name", ""),
|
||||
f"language_qc.{lang}.assigned_at": now,
|
||||
f"language_qc.{lang}.assigned_by_user_id": "system",
|
||||
f"language_qc.{lang}.status": lang_state.get("status", LanguageQCStatus.PENDING.value),
|
||||
})
|
||||
if reviewer_doc:
|
||||
patch.update({
|
||||
f"language_qc.{lang}.assigned_reviewer_id": reviewer_id,
|
||||
f"language_qc.{lang}.assigned_reviewer_email": reviewer_doc["email"],
|
||||
f"language_qc.{lang}.assigned_reviewer_name": reviewer_doc.get("full_name", ""),
|
||||
})
|
||||
|
||||
if patch:
|
||||
await db[_JOBS].update_one({"_id": job_id}, {"$set": patch})
|
||||
updated += 1
|
||||
|
||||
if updated:
|
||||
logger.info("auto_assign_defaults: assigned %d languages on job %s", updated, job_id)
|
||||
return updated
|
||||
|
||||
|
||||
# ── Core mutations ────────────────────────────────────────────────────────────
|
||||
|
||||
async def get_state(db: AsyncIOMotorDatabase, job_id: str, lang: str) -> Optional[LanguageQCState]:
|
||||
async def get_state(db: AsyncIOMotorDatabase, job_id: str, lang: str) -> LanguageQCState | None:
|
||||
job_doc = await db[_JOBS].find_one({"_id": job_id}, {f"language_qc.{lang}": 1})
|
||||
if not job_doc:
|
||||
return None
|
||||
|
|
@ -126,8 +212,8 @@ async def assign_linguist(
|
|||
actor: User,
|
||||
*,
|
||||
http_request=None,
|
||||
notes: Optional[str] = None,
|
||||
deadline: Optional[datetime] = None,
|
||||
notes: str | None = None,
|
||||
deadline: datetime | None = None,
|
||||
) -> LanguageQCState:
|
||||
"""PM/PROD/ADMIN assigns a linguist to a language. Creates per-lang state if missing."""
|
||||
job_doc = await db[_JOBS].find_one({"_id": job_id})
|
||||
|
|
@ -138,6 +224,8 @@ async def assign_linguist(
|
|||
if not linguist_doc:
|
||||
raise HTTPException(status_code=404, detail="Linguist not found")
|
||||
|
||||
await _assert_user_in_job_org(db, linguist_user_id, job_doc)
|
||||
|
||||
now = datetime.utcnow()
|
||||
current_state_raw = (job_doc.get("language_qc") or {}).get(lang, {})
|
||||
prev_assignee = current_state_raw.get("assigned_linguist_id") if isinstance(current_state_raw, dict) else None
|
||||
|
|
@ -221,8 +309,8 @@ async def reassign_linguist(
|
|||
actor: User,
|
||||
*,
|
||||
http_request=None,
|
||||
notes: Optional[str] = None,
|
||||
deadline: Optional[datetime] = None,
|
||||
notes: str | None = None,
|
||||
deadline: datetime | None = None,
|
||||
) -> LanguageQCState:
|
||||
"""Currently-assigned linguist OR PM/PROD/ADMIN hands off to a colleague."""
|
||||
job_doc = await db[_JOBS].find_one({"_id": job_id})
|
||||
|
|
@ -251,8 +339,8 @@ async def assign_reviewer(
|
|||
actor: User,
|
||||
*,
|
||||
http_request=None,
|
||||
notes: Optional[str] = None,
|
||||
deadline: Optional[datetime] = None,
|
||||
notes: str | None = None,
|
||||
deadline: datetime | None = None,
|
||||
) -> LanguageQCState:
|
||||
"""PM/PROD/ADMIN assigns a reviewer to a language."""
|
||||
job_doc = await db[_JOBS].find_one({"_id": job_id})
|
||||
|
|
@ -263,6 +351,8 @@ async def assign_reviewer(
|
|||
if not reviewer_doc:
|
||||
raise HTTPException(status_code=404, detail="Reviewer not found")
|
||||
|
||||
await _assert_user_in_job_org(db, reviewer_user_id, job_doc)
|
||||
|
||||
now = datetime.utcnow()
|
||||
current_state_raw = (job_doc.get("language_qc") or {}).get(lang, {})
|
||||
prev_reviewer = current_state_raw.get("assigned_reviewer_id") if isinstance(current_state_raw, dict) else None
|
||||
|
|
@ -335,8 +425,8 @@ async def reassign_reviewer(
|
|||
actor: User,
|
||||
*,
|
||||
http_request=None,
|
||||
notes: Optional[str] = None,
|
||||
deadline: Optional[datetime] = None,
|
||||
notes: str | None = None,
|
||||
deadline: datetime | None = None,
|
||||
) -> LanguageQCState:
|
||||
if actor.role not in (UserRole.PRODUCTION, UserRole.ADMIN, UserRole.PROJECT_MANAGER):
|
||||
raise HTTPException(status_code=403, detail="Only PM/PROD/ADMIN can reassign reviewer")
|
||||
|
|
@ -424,6 +514,7 @@ async def submit_for_review(
|
|||
**(current_state_raw if isinstance(current_state_raw, dict) else {}),
|
||||
"status": LanguageQCStatus.PENDING_REVIEW.value,
|
||||
"submitted_for_review_at": now,
|
||||
"reviewed_cues": 0, # R-12: reviewer must re-acknowledge cues after each resubmit
|
||||
"history": history,
|
||||
}
|
||||
|
||||
|
|
@ -535,7 +626,7 @@ async def approve_language(
|
|||
actor: User,
|
||||
*,
|
||||
http_request=None,
|
||||
notes: Optional[str] = None,
|
||||
notes: str | None = None,
|
||||
) -> LanguageQCState:
|
||||
job_doc = await db[_JOBS].find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
|
|
@ -601,11 +692,76 @@ async def approve_language(
|
|||
logger.exception("Failed to send approval emails")
|
||||
|
||||
refreshed = await db[_JOBS].find_one({"_id": job_id})
|
||||
|
||||
# When the source language is approved, dispatch translation for any target
|
||||
# languages that don't have VTTs yet (EN-first gate).
|
||||
source_lang = (refreshed.get("source") or {}).get("language", "en")
|
||||
if lang == source_lang:
|
||||
target_langs = [lg for lg in _job_languages(refreshed) if lg != source_lang]
|
||||
if target_langs:
|
||||
outputs = refreshed.get("outputs") or {}
|
||||
untranslated = [lg for lg in target_langs if not (outputs.get(lg) or {}).get("captions_vtt_gcs")]
|
||||
if untranslated:
|
||||
try:
|
||||
from ..services.cloud_run_dispatch import dispatch as _cr_dispatch
|
||||
await db[_JOBS].update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": {
|
||||
"status": JobStatus.TRANSLATING.value,
|
||||
"updated_at": datetime.utcnow(),
|
||||
},
|
||||
"$push": {
|
||||
"review.history": {
|
||||
"at": datetime.utcnow(),
|
||||
"status": JobStatus.TRANSLATING.value,
|
||||
"by": "system",
|
||||
"notes": f"EN approved — dispatching translation for {untranslated}",
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
await _cr_dispatch("translate", job_id, languages=untranslated)
|
||||
logger.info(f"Job {job_id}: EN approved, dispatched translation for {untranslated}")
|
||||
return LanguageQCState(**updated_state)
|
||||
except Exception as exc:
|
||||
logger.error(f"Job {job_id}: failed to dispatch translation after EN approval: {exc}")
|
||||
elif (refreshed.get("requested_outputs") or {}).get("accessible_video_mp4"):
|
||||
# Source-only job requesting accessible video: no translation needed,
|
||||
# but TTS+render pipeline must run to produce the accessible MP4.
|
||||
try:
|
||||
from ..services.cloud_run_dispatch import dispatch as _cr_dispatch
|
||||
await db[_JOBS].update_one(
|
||||
{"_id": job_id},
|
||||
{
|
||||
"$set": {
|
||||
"status": JobStatus.TRANSLATING.value,
|
||||
"updated_at": datetime.utcnow(),
|
||||
},
|
||||
"$push": {
|
||||
"review.history": {
|
||||
"at": datetime.utcnow(),
|
||||
"status": JobStatus.TRANSLATING.value,
|
||||
"by": "system",
|
||||
"notes": "EN approved — dispatching TTS and accessible video render (source-only)",
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
await _cr_dispatch("translate", job_id)
|
||||
logger.info(f"Job {job_id}: EN approved (source-only), dispatched TTS+render pipeline")
|
||||
return LanguageQCState(**updated_state)
|
||||
except Exception as exc:
|
||||
logger.error(f"Job {job_id}: failed to dispatch TTS+render after EN approval: {exc}")
|
||||
|
||||
await _maybe_advance_job(db, refreshed)
|
||||
|
||||
return LanguageQCState(**updated_state)
|
||||
|
||||
|
||||
REJECT_CATEGORIES = frozenset(["timing", "mistranslation", "terminology", "profanity", "length", "other"])
|
||||
|
||||
|
||||
async def reject_language(
|
||||
db: AsyncIOMotorDatabase,
|
||||
job_id: str,
|
||||
|
|
@ -613,10 +769,13 @@ async def reject_language(
|
|||
actor: User,
|
||||
notes: str,
|
||||
*,
|
||||
category: str | None = None,
|
||||
http_request=None,
|
||||
) -> LanguageQCState:
|
||||
if not notes or not notes.strip():
|
||||
raise HTTPException(status_code=422, detail="Rejection notes are required")
|
||||
if category and category not in REJECT_CATEGORIES:
|
||||
raise HTTPException(status_code=422, detail=f"Invalid reject category. Must be one of: {', '.join(sorted(REJECT_CATEGORIES))}")
|
||||
|
||||
job_doc = await db[_JOBS].find_one({"_id": job_id})
|
||||
if not job_doc:
|
||||
|
|
@ -639,6 +798,8 @@ async def reject_language(
|
|||
"reviewed_by_user_id": str(actor.id),
|
||||
"reviewed_by_email": actor.email,
|
||||
"notes": notes,
|
||||
"reject_category": category,
|
||||
"reviewed_cues": 0,
|
||||
"submitted_for_review_at": None,
|
||||
"history": history,
|
||||
}
|
||||
|
|
@ -701,7 +862,7 @@ async def reopen_language(
|
|||
actor: User,
|
||||
*,
|
||||
http_request=None,
|
||||
notes: Optional[str] = None,
|
||||
notes: str | None = None,
|
||||
) -> LanguageQCState:
|
||||
"""PROD/ADMIN only — resets an approved language back to pending for re-review."""
|
||||
if actor.role not in (UserRole.PRODUCTION, UserRole.ADMIN):
|
||||
|
|
@ -818,7 +979,22 @@ async def add_comment(
|
|||
details={"lang": lang},
|
||||
)
|
||||
|
||||
# Fan-out to all other assignees
|
||||
# WS broadcast — live comment indicator for everyone on this job
|
||||
try:
|
||||
await connection_manager.broadcast_to_job(job_id, {
|
||||
"type": "language_qc_comment",
|
||||
"job_id": job_id,
|
||||
"lang": lang,
|
||||
"data": {
|
||||
"author_name": actor.full_name or actor.email,
|
||||
"lang": lang,
|
||||
"comment_id": comment.id,
|
||||
},
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fan-out email to all other assignees
|
||||
recipients = _qc_recipients(job_doc, current_state_raw if isinstance(current_state_raw, dict) else {}, exclude_user_id=actor.email)
|
||||
if recipients:
|
||||
try:
|
||||
|
|
@ -845,12 +1021,15 @@ async def list_for_linguist(
|
|||
db: AsyncIOMotorDatabase,
|
||||
linguist_id: str,
|
||||
*,
|
||||
status_filter: Optional[str] = None,
|
||||
accessible_org_ids: list[str] | None = None,
|
||||
status_filter: str | None = None,
|
||||
skip: int = 0,
|
||||
limit: int = 50,
|
||||
) -> list[dict]:
|
||||
"""Return jobs where the linguist has an assignment, along with which languages."""
|
||||
query: dict = {"qc_assignments.linguist_id": linguist_id}
|
||||
if accessible_org_ids is not None:
|
||||
query["organization_id"] = {"$in": accessible_org_ids}
|
||||
if status_filter:
|
||||
query["qc_assignments"] = {"$elemMatch": {"linguist_id": linguist_id, "status": status_filter}}
|
||||
|
||||
|
|
@ -868,14 +1047,18 @@ async def list_for_reviewer(
|
|||
db: AsyncIOMotorDatabase,
|
||||
reviewer_id: str,
|
||||
*,
|
||||
status_filter: Optional[str] = None,
|
||||
accessible_org_ids: list[str] | None = None,
|
||||
status_filter: str | None = None,
|
||||
skip: int = 0,
|
||||
limit: int = 50,
|
||||
) -> list[dict]:
|
||||
"""Return jobs where the reviewer is assigned to at least one language."""
|
||||
# language_qc is an embedded dict keyed by lang code; scan in Python
|
||||
# language_qc is a dict keyed by lang; pre-filter by org then scan in Python for assigned reviewer
|
||||
base_query: dict = {}
|
||||
if accessible_org_ids is not None:
|
||||
base_query["organization_id"] = {"$in": accessible_org_ids}
|
||||
all_jobs_cursor = db[_JOBS].find(
|
||||
{},
|
||||
base_query,
|
||||
{"title": 1, "status": 1, "language_qc": 1, "qc_assignments": 1, "created_at": 1, "updated_at": 1}
|
||||
).sort("updated_at", -1).skip(skip).limit(limit * 5) # over-fetch, filter in Python
|
||||
|
||||
|
|
@ -966,8 +1149,20 @@ def _assert_can_approve(job_doc: dict, lang: str, actor: User) -> None:
|
|||
"""Raise 403 if actor cannot approve this language.
|
||||
|
||||
Two-stage QC is enforced: linguist must submit before reviewer can approve.
|
||||
PRODUCTION and ADMIN may override (explicit admin action, logged separately).
|
||||
English-first is enforced: source language must be approved before any target.
|
||||
PRODUCTION and ADMIN may override both gates.
|
||||
"""
|
||||
source_lang = (job_doc.get("source") or {}).get("language", "en")
|
||||
if lang != source_lang and actor.role not in (UserRole.PRODUCTION, UserRole.ADMIN):
|
||||
source_state = (job_doc.get("language_qc") or {}).get(source_lang, {})
|
||||
if not isinstance(source_state, dict):
|
||||
source_state = {}
|
||||
if source_state.get("status") != LanguageQCStatus.APPROVED.value:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
detail=f"Source language '{source_lang}' must be approved before approving '{lang}'",
|
||||
)
|
||||
|
||||
if actor.role in (UserRole.PRODUCTION, UserRole.ADMIN):
|
||||
return
|
||||
|
||||
|
|
|
|||
|
|
@ -1,24 +1,28 @@
|
|||
"""Membership service — queries the memberships collection."""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from motor.motor_asyncio import AsyncIOMotorDatabase
|
||||
|
||||
from ..models.membership import Membership, MemberDetail
|
||||
from ..models.membership import MemberDetail, Membership
|
||||
from ..models.organization import OrgRole
|
||||
|
||||
|
||||
def _now() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
def _membership_from_doc(doc: dict) -> Membership:
|
||||
raw_role = doc.get("role_in_org", "member")
|
||||
try:
|
||||
role = OrgRole(raw_role)
|
||||
except ValueError:
|
||||
role = OrgRole.MEMBER
|
||||
return Membership(
|
||||
id=str(doc["_id"]),
|
||||
user_id=doc["user_id"],
|
||||
organization_id=doc["organization_id"],
|
||||
role_in_org=OrgRole(doc["role_in_org"]),
|
||||
user_id=doc.get("user_id", ""),
|
||||
organization_id=doc.get("organization_id", ""),
|
||||
role_in_org=role,
|
||||
created_at=doc.get("created_at"),
|
||||
created_by=doc.get("created_by"),
|
||||
)
|
||||
|
|
@ -36,7 +40,7 @@ async def get_membership(
|
|||
user_id: str,
|
||||
organization_id: str,
|
||||
db: AsyncIOMotorDatabase,
|
||||
) -> Optional[Membership]:
|
||||
) -> Membership | None:
|
||||
doc = await db.memberships.find_one(
|
||||
{"user_id": user_id, "organization_id": organization_id}
|
||||
)
|
||||
|
|
@ -59,7 +63,7 @@ async def upsert_membership(
|
|||
user_id: str,
|
||||
organization_id: str,
|
||||
role_in_org: OrgRole,
|
||||
created_by: Optional[str],
|
||||
created_by: str | None,
|
||||
db: AsyncIOMotorDatabase,
|
||||
) -> Membership:
|
||||
now = _now()
|
||||
|
|
@ -105,19 +109,24 @@ async def list_org_members(
|
|||
"as": "user_doc",
|
||||
}
|
||||
},
|
||||
{"$unwind": {"path": "$user_doc", "preserveNullAndEmpty": False}},
|
||||
{"$unwind": {"path": "$user_doc", "preserveNullAndEmptyArrays": False}},
|
||||
{"$sort": {"created_at": 1}},
|
||||
]
|
||||
details = []
|
||||
async for doc in db.memberships.aggregate(pipeline):
|
||||
u = doc["user_doc"]
|
||||
raw_role = doc.get("role_in_org", "member")
|
||||
try:
|
||||
role = OrgRole(raw_role)
|
||||
except ValueError:
|
||||
role = OrgRole.MEMBER
|
||||
details.append(
|
||||
MemberDetail(
|
||||
membership_id=str(doc["_id"]),
|
||||
user_id=doc["user_id"],
|
||||
email=u.get("email", ""),
|
||||
full_name=u.get("full_name", ""),
|
||||
role_in_org=OrgRole(doc["role_in_org"]),
|
||||
user_id=doc.get("user_id", ""),
|
||||
email=u.get("email") or "",
|
||||
full_name=u.get("full_name") or "",
|
||||
role_in_org=role,
|
||||
created_at=doc.get("created_at"),
|
||||
)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
Validates Microsoft ID tokens and extracts user information.
|
||||
"""
|
||||
import time
|
||||
from typing import Dict, Optional
|
||||
|
||||
import httpx
|
||||
from jose import JWTError, jwt
|
||||
|
|
@ -50,11 +49,11 @@ class MicrosoftAuthService:
|
|||
self.openid_config_url = f"{self.authority}/v2.0/.well-known/openid-configuration"
|
||||
|
||||
# Cache for JWKS (public keys)
|
||||
self._jwks_cache: Optional[Dict] = None
|
||||
self._jwks_cache: dict | None = None
|
||||
self._jwks_cache_time: float = 0
|
||||
self._jwks_cache_ttl: int = 3600 # Cache for 1 hour
|
||||
|
||||
async def _get_openid_config(self) -> Dict:
|
||||
async def _get_openid_config(self) -> dict:
|
||||
"""Fetch OpenID Connect configuration from Microsoft."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
|
|
@ -63,9 +62,9 @@ class MicrosoftAuthService:
|
|||
return response.json()
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"Failed to fetch OpenID configuration: {e}")
|
||||
raise MicrosoftAuthError("Failed to fetch Microsoft authentication configuration")
|
||||
raise MicrosoftAuthError("Failed to fetch Microsoft authentication configuration") from e
|
||||
|
||||
async def _get_jwks(self, force_refresh: bool = False) -> Dict:
|
||||
async def _get_jwks(self, force_refresh: bool = False) -> dict:
|
||||
"""Fetch JSON Web Key Set (JWKS) from Microsoft.
|
||||
|
||||
Args:
|
||||
|
|
@ -98,7 +97,7 @@ class MicrosoftAuthService:
|
|||
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"Failed to fetch JWKS: {e}")
|
||||
raise MicrosoftAuthError("Failed to fetch Microsoft public keys")
|
||||
raise MicrosoftAuthError("Failed to fetch Microsoft public keys") from e
|
||||
|
||||
async def validate_token(self, id_token: str) -> MicrosoftUserInfo:
|
||||
"""Validate Microsoft ID token and extract user information.
|
||||
|
|
@ -121,7 +120,7 @@ class MicrosoftAuthService:
|
|||
if not kid:
|
||||
raise MicrosoftTokenValidationError("Token header missing 'kid' claim")
|
||||
|
||||
def _find_key(keys: list) -> Optional[Dict]:
|
||||
def _find_key(keys: list) -> dict | None:
|
||||
for key in keys:
|
||||
if key.get('kid') == kid:
|
||||
return {'kty': key['kty'], 'kid': key['kid'], 'use': key.get('use'),
|
||||
|
|
@ -146,7 +145,7 @@ class MicrosoftAuthService:
|
|||
issuer=f"https://login.microsoftonline.com/{self.tenant_id}/v2.0"
|
||||
)
|
||||
except JWTError as e:
|
||||
raise MicrosoftTokenValidationError(f"Token validation failed: {str(e)}")
|
||||
raise MicrosoftTokenValidationError(f"Token validation failed: {str(e)}") from e
|
||||
|
||||
email = payload.get('email') or payload.get('preferred_username')
|
||||
if not email:
|
||||
|
|
@ -177,12 +176,12 @@ class MicrosoftAuthService:
|
|||
|
||||
except JWKError as e:
|
||||
logger.error(f"JWK error during token validation: {e}")
|
||||
raise MicrosoftTokenValidationError(f"Key processing error: {str(e)}")
|
||||
raise MicrosoftTokenValidationError(f"Key processing error: {str(e)}") from e
|
||||
except Exception as e:
|
||||
if isinstance(e, (MicrosoftAuthError, MicrosoftTokenValidationError)):
|
||||
raise
|
||||
logger.error(f"Unexpected error during token validation: {e}")
|
||||
raise MicrosoftTokenValidationError(f"Token validation failed: {str(e)}")
|
||||
raise MicrosoftTokenValidationError(f"Token validation failed: {str(e)}") from e
|
||||
|
||||
|
||||
# Singleton instance
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
"""Google Cloud Secret Manager integration service."""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
from typing import Dict, List, Optional, Any
|
||||
from functools import lru_cache
|
||||
from google.cloud import secretmanager
|
||||
import os
|
||||
|
||||
from google.api_core import exceptions as gcp_exceptions
|
||||
from google.cloud import secretmanager
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.core.logging import get_logger
|
||||
|
|
@ -21,14 +20,14 @@ class SecretManagerError(Exception):
|
|||
|
||||
class SecretsManager:
|
||||
"""Service for managing secrets via Google Cloud Secret Manager."""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.settings = get_settings()
|
||||
self.client: Optional[secretmanager.SecretManagerServiceClient] = None
|
||||
self.client: secretmanager.SecretManagerServiceClient | None = None
|
||||
self.project_id = self.settings.google_cloud_project
|
||||
self._cache: Dict[str, str] = {}
|
||||
self._cache: dict[str, str] = {}
|
||||
self._cache_ttl = 300 # 5 minutes cache
|
||||
|
||||
|
||||
def _get_client(self) -> secretmanager.SecretManagerServiceClient:
|
||||
"""Get or create Secret Manager client."""
|
||||
if not self.client:
|
||||
|
|
@ -37,93 +36,93 @@ class SecretsManager:
|
|||
logger.info("Secret Manager client initialized")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize Secret Manager client: {e}")
|
||||
raise SecretManagerError(f"Failed to initialize Secret Manager: {e}")
|
||||
|
||||
raise SecretManagerError(f"Failed to initialize Secret Manager: {e}") from e
|
||||
|
||||
return self.client
|
||||
|
||||
|
||||
@trace_async_operation("secrets_manager.get_secret")
|
||||
async def get_secret(self, secret_name: str, version: str = "latest") -> str:
|
||||
"""
|
||||
Retrieve a secret from Google Cloud Secret Manager.
|
||||
|
||||
|
||||
Args:
|
||||
secret_name: Name of the secret
|
||||
version: Version of the secret (default: "latest")
|
||||
|
||||
|
||||
Returns:
|
||||
The secret value as a string
|
||||
|
||||
|
||||
Raises:
|
||||
SecretManagerError: If secret cannot be retrieved
|
||||
"""
|
||||
|
||||
|
||||
cache_key = f"{secret_name}:{version}"
|
||||
|
||||
|
||||
# Check cache first
|
||||
if cache_key in self._cache:
|
||||
logger.debug(f"Secret {secret_name} retrieved from cache")
|
||||
return self._cache[cache_key]
|
||||
|
||||
|
||||
try:
|
||||
# Build the secret name
|
||||
name = f"projects/{self.project_id}/secrets/{secret_name}/versions/{version}"
|
||||
|
||||
|
||||
# Get the secret
|
||||
client = self._get_client()
|
||||
|
||||
|
||||
# Run in thread pool since Secret Manager client is synchronous
|
||||
loop = asyncio.get_event_loop()
|
||||
response = await loop.run_in_executor(
|
||||
None,
|
||||
None,
|
||||
client.access_secret_version,
|
||||
{"name": name}
|
||||
)
|
||||
|
||||
|
||||
secret_value = response.payload.data.decode("UTF-8")
|
||||
|
||||
|
||||
# Cache the secret (with TTL handled by application restart)
|
||||
self._cache[cache_key] = secret_value
|
||||
|
||||
|
||||
logger.info(f"Successfully retrieved secret: {secret_name}")
|
||||
return secret_value
|
||||
|
||||
|
||||
except gcp_exceptions.NotFound:
|
||||
error_msg = f"Secret not found: {secret_name}"
|
||||
logger.error(error_msg)
|
||||
raise SecretManagerError(error_msg)
|
||||
|
||||
raise SecretManagerError(error_msg) from None
|
||||
|
||||
except gcp_exceptions.PermissionDenied:
|
||||
error_msg = f"Permission denied accessing secret: {secret_name}"
|
||||
logger.error(error_msg)
|
||||
raise SecretManagerError(error_msg)
|
||||
|
||||
raise SecretManagerError(error_msg) from None
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to retrieve secret {secret_name}: {e}"
|
||||
logger.error(error_msg)
|
||||
raise SecretManagerError(error_msg)
|
||||
|
||||
raise SecretManagerError(error_msg) from e
|
||||
|
||||
@trace_async_operation("secrets_manager.get_secrets_batch")
|
||||
async def get_secrets_batch(self, secret_names: List[str]) -> Dict[str, str]:
|
||||
async def get_secrets_batch(self, secret_names: list[str]) -> dict[str, str]:
|
||||
"""
|
||||
Retrieve multiple secrets efficiently.
|
||||
|
||||
|
||||
Args:
|
||||
secret_names: List of secret names to retrieve
|
||||
|
||||
|
||||
Returns:
|
||||
Dictionary mapping secret names to their values
|
||||
"""
|
||||
|
||||
|
||||
secrets = {}
|
||||
tasks = []
|
||||
|
||||
|
||||
for secret_name in secret_names:
|
||||
task = asyncio.create_task(
|
||||
self.get_secret(secret_name),
|
||||
name=f"get_secret_{secret_name}"
|
||||
)
|
||||
tasks.append((secret_name, task))
|
||||
|
||||
|
||||
# Wait for all tasks to complete
|
||||
for secret_name, task in tasks:
|
||||
try:
|
||||
|
|
@ -132,34 +131,34 @@ class SecretsManager:
|
|||
logger.warning(f"Failed to retrieve secret {secret_name}: {e}")
|
||||
# Continue with other secrets
|
||||
continue
|
||||
|
||||
|
||||
return secrets
|
||||
|
||||
async def create_secret(self, secret_name: str, secret_value: str, labels: Optional[Dict[str, str]] = None) -> str:
|
||||
|
||||
async def create_secret(self, secret_name: str, secret_value: str, labels: dict[str, str] | None = None) -> str:
|
||||
"""
|
||||
Create a new secret in Secret Manager.
|
||||
|
||||
|
||||
Args:
|
||||
secret_name: Name of the secret
|
||||
secret_value: Value to store
|
||||
labels: Optional labels for the secret
|
||||
|
||||
|
||||
Returns:
|
||||
The full secret resource name
|
||||
"""
|
||||
|
||||
|
||||
try:
|
||||
client = self._get_client()
|
||||
parent = f"projects/{self.project_id}"
|
||||
|
||||
|
||||
# Create the secret
|
||||
secret = {
|
||||
"labels": labels or {},
|
||||
"replication": {"automatic": {}}
|
||||
}
|
||||
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
|
||||
# Create secret resource
|
||||
create_response = await loop.run_in_executor(
|
||||
None,
|
||||
|
|
@ -170,7 +169,7 @@ class SecretsManager:
|
|||
"secret": secret
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# Add secret version with the actual value
|
||||
version_response = await loop.run_in_executor(
|
||||
None,
|
||||
|
|
@ -180,20 +179,20 @@ class SecretsManager:
|
|||
"payload": {"data": secret_value.encode("UTF-8")}
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
logger.info(f"Successfully created secret: {secret_name}")
|
||||
return version_response.name
|
||||
|
||||
|
||||
except gcp_exceptions.AlreadyExists:
|
||||
error_msg = f"Secret already exists: {secret_name}"
|
||||
logger.error(error_msg)
|
||||
raise SecretManagerError(error_msg)
|
||||
|
||||
raise SecretManagerError(error_msg) from None
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to create secret {secret_name}: {e}"
|
||||
logger.error(error_msg)
|
||||
raise SecretManagerError(error_msg)
|
||||
|
||||
raise SecretManagerError(error_msg) from e
|
||||
|
||||
def clear_cache(self) -> None:
|
||||
"""Clear the secrets cache."""
|
||||
self._cache.clear()
|
||||
|
|
@ -218,7 +217,7 @@ async def get_database_url() -> str:
|
|||
# Fallback to environment variable
|
||||
url = os.getenv("MONGODB_URL")
|
||||
if not url:
|
||||
raise SecretManagerError("MongoDB URL not available in secrets or environment")
|
||||
raise SecretManagerError("MongoDB URL not available in secrets or environment") from None
|
||||
return url
|
||||
|
||||
|
||||
|
|
@ -230,11 +229,11 @@ async def get_redis_url() -> str:
|
|||
# Fallback to environment variable
|
||||
url = os.getenv("REDIS_URL")
|
||||
if not url:
|
||||
raise SecretManagerError("Redis URL not available in secrets or environment")
|
||||
raise SecretManagerError("Redis URL not available in secrets or environment") from None
|
||||
return url
|
||||
|
||||
|
||||
async def get_jwt_secrets() -> Dict[str, str]:
|
||||
async def get_jwt_secrets() -> dict[str, str]:
|
||||
"""Get JWT secrets from Secret Manager."""
|
||||
try:
|
||||
return await secrets_manager.get_secrets_batch([
|
||||
|
|
@ -249,22 +248,22 @@ async def get_jwt_secrets() -> Dict[str, str]:
|
|||
}
|
||||
|
||||
|
||||
async def get_api_keys() -> Dict[str, str]:
|
||||
async def get_api_keys() -> dict[str, str]:
|
||||
"""Get all API keys from Secret Manager."""
|
||||
api_keys = {}
|
||||
|
||||
|
||||
secret_names = [
|
||||
"gemini-api-key",
|
||||
"sendgrid-api-key",
|
||||
"sendgrid-api-key",
|
||||
"elevenlabs-api-key",
|
||||
"sentry-dsn"
|
||||
]
|
||||
|
||||
|
||||
try:
|
||||
api_keys = await secrets_manager.get_secrets_batch(secret_names)
|
||||
except SecretManagerError:
|
||||
logger.warning("Failed to retrieve some API keys from Secret Manager, using environment fallback")
|
||||
|
||||
|
||||
# Fallback to environment variables for missing keys
|
||||
env_mapping = {
|
||||
"gemini-api-key": "GEMINI_API_KEY",
|
||||
|
|
@ -272,7 +271,7 @@ async def get_api_keys() -> Dict[str, str]:
|
|||
"elevenlabs-api-key": "ELEVENLABS_API_KEY",
|
||||
"sentry-dsn": "SENTRY_DSN"
|
||||
}
|
||||
|
||||
|
||||
for secret_name, env_var in env_mapping.items():
|
||||
if secret_name not in api_keys:
|
||||
env_value = os.getenv(env_var)
|
||||
|
|
@ -280,5 +279,5 @@ async def get_api_keys() -> Dict[str, str]:
|
|||
api_keys[secret_name] = env_value
|
||||
else:
|
||||
logger.warning(f"API key {secret_name} not available in secrets or environment")
|
||||
|
||||
return api_keys
|
||||
|
||||
return api_keys
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import io
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
import aiohttp
|
||||
from google.cloud import texttospeech
|
||||
|
|
@ -47,8 +46,8 @@ class TTSService:
|
|||
self,
|
||||
ad_vtt_content: str,
|
||||
language_code: str = "en-US",
|
||||
voice_name: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
voice_name: str | None = None,
|
||||
provider: str | None = None,
|
||||
model: str = "flash",
|
||||
speed: float = 1.0,
|
||||
style_prompt: str = "",
|
||||
|
|
@ -114,8 +113,8 @@ class TTSService:
|
|||
self,
|
||||
ad_vtt_content: str,
|
||||
language_code: str = "en-US",
|
||||
voice_name: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
voice_name: str | None = None,
|
||||
provider: str | None = None,
|
||||
model: str = "flash",
|
||||
speed: float = 1.0,
|
||||
style_prompt: str = "",
|
||||
|
|
@ -219,7 +218,7 @@ class TTSService:
|
|||
self,
|
||||
ad_vtt_content: str,
|
||||
language_code: str = "en-US",
|
||||
voice_name: Optional[str] = None
|
||||
voice_name: str | None = None
|
||||
) -> bytes:
|
||||
"""Generate MP3 using Google TTS with 2-second pauses between passages"""
|
||||
|
||||
|
|
@ -233,7 +232,7 @@ class TTSService:
|
|||
audio_segments = []
|
||||
current_audio_position = 0.0 # Track actual audio timeline position
|
||||
|
||||
for i, cue in enumerate(cues):
|
||||
for _i, cue in enumerate(cues):
|
||||
# Calculate where this cue should start (anchored to VTT timing)
|
||||
target_start_time = cue["start_time"]
|
||||
|
||||
|
|
@ -281,7 +280,7 @@ class TTSService:
|
|||
self,
|
||||
ad_vtt_content: str,
|
||||
language_code: str = "en-US",
|
||||
voice_name: Optional[str] = None,
|
||||
voice_name: str | None = None,
|
||||
stability: float = 0.5,
|
||||
similarity_boost: float = 0.5,
|
||||
) -> bytes:
|
||||
|
|
@ -299,7 +298,7 @@ class TTSService:
|
|||
audio_segments = []
|
||||
current_audio_position = 0.0 # Track actual audio timeline position
|
||||
|
||||
for i, cue in enumerate(cues):
|
||||
for _i, cue in enumerate(cues):
|
||||
# Calculate where this cue should start (anchored to VTT timing)
|
||||
target_start_time = cue["start_time"]
|
||||
|
||||
|
|
@ -339,7 +338,7 @@ class TTSService:
|
|||
self,
|
||||
text: str,
|
||||
language_code: str,
|
||||
voice_name: Optional[str] = None
|
||||
voice_name: str | None = None
|
||||
) -> bytes:
|
||||
"""Synthesize a single text string to audio using Google TTS"""
|
||||
# Configure voice
|
||||
|
|
@ -404,7 +403,7 @@ class TTSService:
|
|||
error_text = await response.text()
|
||||
raise ValueError(f"ElevenLabs TTS failed: {response.status} - {error_text}")
|
||||
|
||||
def _get_elevenlabs_voice(self, language_code: str, voice_name: Optional[str] = None) -> str:
|
||||
def _get_elevenlabs_voice(self, language_code: str, voice_name: str | None = None) -> str:
|
||||
"""Get ElevenLabs voice ID for language"""
|
||||
if voice_name:
|
||||
return voice_name
|
||||
|
|
@ -452,28 +451,32 @@ class TTSService:
|
|||
def _parse_timestamp(self, timestamp: str) -> float:
|
||||
"""Convert VTT timestamp to seconds"""
|
||||
# Format: HH:MM:SS.mmm or MM:SS.mmm
|
||||
parts = timestamp.split(":")
|
||||
try:
|
||||
parts = timestamp.split(":")
|
||||
|
||||
if len(parts) == 3: # HH:MM:SS.mmm
|
||||
hours, minutes, seconds = parts
|
||||
elif len(parts) == 2: # MM:SS.mmm
|
||||
hours, minutes, seconds = "0", parts[0], parts[1]
|
||||
else:
|
||||
raise ValueError(f"Invalid timestamp format: {timestamp}")
|
||||
if len(parts) == 3: # HH:MM:SS.mmm
|
||||
hours, minutes, seconds = parts
|
||||
elif len(parts) == 2: # MM:SS.mmm
|
||||
hours, minutes, seconds = "0", parts[0], parts[1]
|
||||
else:
|
||||
raise ValueError(f"Invalid timestamp format: {timestamp}")
|
||||
|
||||
# Parse seconds and milliseconds
|
||||
sec_parts = seconds.split(".")
|
||||
seconds = int(sec_parts[0])
|
||||
milliseconds = int(sec_parts[1]) if len(sec_parts) > 1 else 0
|
||||
# Parse seconds and milliseconds
|
||||
sec_parts = seconds.split(".")
|
||||
seconds_int = int(sec_parts[0])
|
||||
milliseconds = int(sec_parts[1]) if len(sec_parts) > 1 else 0
|
||||
|
||||
total_seconds = (
|
||||
int(hours) * 3600 +
|
||||
int(minutes) * 60 +
|
||||
seconds +
|
||||
milliseconds / 1000.0
|
||||
)
|
||||
|
||||
return total_seconds
|
||||
total_seconds = (
|
||||
int(hours) * 3600 +
|
||||
int(minutes) * 60 +
|
||||
seconds_int +
|
||||
milliseconds / 1000.0
|
||||
)
|
||||
return total_seconds
|
||||
except (ValueError, IndexError) as e:
|
||||
if "Invalid timestamp format" in str(e):
|
||||
raise
|
||||
raise ValueError(f"Invalid timestamp format: {timestamp}") from e
|
||||
|
||||
|
||||
# Global service instance
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Dict, List, Any
|
||||
from typing import Any
|
||||
|
||||
from ..core.logging import get_logger
|
||||
from ..lib.vtt import VTTEditor
|
||||
|
|
@ -11,7 +11,7 @@ class AssetValidationService:
|
|||
"""Service for validating job assets before completion"""
|
||||
|
||||
@staticmethod
|
||||
async def validate_job_assets(job_doc: Dict[str, Any]) -> tuple[bool, List[str]]:
|
||||
async def validate_job_assets(job_doc: dict[str, Any]) -> tuple[bool, list[str]]:
|
||||
"""
|
||||
Validate all assets for a job before allowing completion
|
||||
Returns (is_valid, list_of_errors)
|
||||
|
|
@ -19,7 +19,7 @@ class AssetValidationService:
|
|||
errors = []
|
||||
outputs = job_doc.get("outputs", {})
|
||||
requested_outputs = job_doc.get("requested_outputs", {})
|
||||
|
||||
|
||||
if not outputs:
|
||||
errors.append("No outputs generated for this job")
|
||||
return False, errors
|
||||
|
|
@ -88,13 +88,13 @@ class AssetValidationService:
|
|||
# Download and validate VTT content
|
||||
blob_path = gcs_uri.replace(f"gs://{gcs_service.bucket.name}/", "")
|
||||
blob = gcs_service.bucket.blob(blob_path)
|
||||
|
||||
|
||||
if not blob.exists():
|
||||
return f"{asset_name} file not found in storage"
|
||||
|
||||
vtt_content = blob.download_as_text()
|
||||
is_valid, vtt_errors = VTTEditor.validate_vtt(vtt_content)
|
||||
|
||||
|
||||
if not is_valid:
|
||||
return f"{asset_name} validation failed: {'; '.join(vtt_errors[:3])}"
|
||||
|
||||
|
|
@ -118,13 +118,13 @@ class AssetValidationService:
|
|||
try:
|
||||
blob_path = gcs_uri.replace(f"gs://{gcs_service.bucket.name}/", "")
|
||||
blob = gcs_service.bucket.blob(blob_path)
|
||||
|
||||
|
||||
if not blob.exists():
|
||||
return f"{asset_name} file not found in storage"
|
||||
|
||||
# Reload blob to get metadata (including size)
|
||||
blob.reload()
|
||||
|
||||
|
||||
# Check file size (should be reasonable for audio)
|
||||
size_mb = blob.size / (1024 * 1024) if blob.size else 0
|
||||
if size_mb < 0.01: # Less than 10KB
|
||||
|
|
@ -169,4 +169,4 @@ class AssetValidationService:
|
|||
|
||||
|
||||
# Global service instance
|
||||
asset_validation_service = AssetValidationService()
|
||||
asset_validation_service = AssetValidationService()
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ from google.oauth2 import id_token
|
|||
from ..core.config import settings
|
||||
from ..core.logging import get_logger
|
||||
from ..models.job import PausePointData, VideoSegmentMetadata
|
||||
from ..schemas.accessible_video import AccessibleVideoMethod, GeminiAccessibleVideoAnalysis
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
|
@ -55,6 +54,9 @@ class VideoRendererService:
|
|||
# Audio ducking settings
|
||||
self.duck_level = getattr(settings, 'accessible_video_duck_level', 0.3)
|
||||
self.duck_fade_ms = getattr(settings, 'accessible_video_duck_fade_ms', 200)
|
||||
# Adaptive silence buffer settings (A1)
|
||||
self._silence_buffer_default = getattr(settings, 'ad_silence_buffer_default', 0.5)
|
||||
self._silence_buffer_min_after = getattr(settings, 'ad_silence_buffer_min_after', 0.1)
|
||||
# Cloud Run support
|
||||
self._gcs_client: storage.Client | None = None
|
||||
# Source video caching for Cloud Run (uploaded once, reused across operations)
|
||||
|
|
@ -229,7 +231,7 @@ class VideoRendererService:
|
|||
error_detail = e.response.json().get("detail", str(e))
|
||||
except Exception:
|
||||
error_detail = str(e)
|
||||
raise FFmpegExecutionError(f"Cloud Run {endpoint} failed: {error_detail}")
|
||||
raise FFmpegExecutionError(f"Cloud Run {endpoint} failed: {error_detail}") from e
|
||||
|
||||
async def _dispatch_ffmpeg(self, cmd: list[str], timeout: int = 3600) -> dict[str, Any]:
|
||||
"""
|
||||
|
|
@ -249,6 +251,7 @@ class VideoRendererService:
|
|||
FFmpegExecutionError: If the command fails
|
||||
"""
|
||||
from celery.result import allow_join_result
|
||||
|
||||
from ..tasks.ffmpeg_operations import run_ffmpeg_command
|
||||
|
||||
# Dispatch to ffmpeg queue
|
||||
|
|
@ -292,6 +295,7 @@ class VideoRendererService:
|
|||
FFmpegExecutionError: If the command fails
|
||||
"""
|
||||
from celery.result import allow_join_result
|
||||
|
||||
from ..tasks.ffmpeg_operations import run_ffprobe_command
|
||||
|
||||
# Dispatch to ffmpeg queue
|
||||
|
|
@ -387,8 +391,7 @@ class VideoRendererService:
|
|||
logger.info(f"Starting overlay render for {source_video_path}")
|
||||
placements = analysis.get("placements", [])
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_dir_path = Path(temp_dir)
|
||||
with tempfile.TemporaryDirectory() as _temp_dir:
|
||||
|
||||
# Get source video duration
|
||||
duration = await self._get_video_duration(source_video_path)
|
||||
|
|
@ -411,7 +414,7 @@ class VideoRendererService:
|
|||
filter_parts = []
|
||||
|
||||
# Add each AD segment as input
|
||||
for cue_index, mp3_path in ad_segments:
|
||||
for _cue_index, mp3_path in ad_segments:
|
||||
inputs.extend(["-i", mp3_path])
|
||||
|
||||
# Build complex filter
|
||||
|
|
@ -425,7 +428,7 @@ class VideoRendererService:
|
|||
|
||||
# Add delay to each AD segment and mix
|
||||
ad_labels = []
|
||||
for i, (cue_index, mp3_path) in enumerate(ad_segments):
|
||||
for i, (cue_index, _mp3_path) in enumerate(ad_segments):
|
||||
# Find the placement for this cue
|
||||
placement = next(
|
||||
(p for p in placements if p.get("ad_cue_index") == cue_index),
|
||||
|
|
@ -478,7 +481,7 @@ class VideoRendererService:
|
|||
output_path
|
||||
])
|
||||
|
||||
logger.info(f"Running ffmpeg overlay command...")
|
||||
logger.info("Running ffmpeg overlay command...")
|
||||
await self._run_ffmpeg(cmd)
|
||||
|
||||
logger.info(f"Overlay render complete: {output_path}")
|
||||
|
|
@ -560,7 +563,7 @@ class VideoRendererService:
|
|||
logger.info(f"Source Properties: {video_props}, Duration: {source_duration:.2f}s")
|
||||
|
||||
# Create a mapping of cue_index to mp3_path
|
||||
cue_to_mp3 = {cue_index: mp3_path for cue_index, mp3_path in ad_segments}
|
||||
cue_to_mp3 = dict(ad_segments)
|
||||
|
||||
# Pre-process placements and validate
|
||||
valid_placements = []
|
||||
|
|
@ -598,18 +601,38 @@ class VideoRendererService:
|
|||
final_segment_needed = final_segment_start < source_duration
|
||||
|
||||
# ============================================================
|
||||
# PARALLEL PHASE 1: Generate shared silence + extract all frames + all video segments
|
||||
# PARALLEL PHASE 1: Generate per-cue silence files + extract all frames + video segments
|
||||
# ============================================================
|
||||
logger.info(f"Phase 1: Parallel extraction of {len(valid_placements)} frames and video segments")
|
||||
|
||||
silence_duration = 0.5 # 500ms shared by all
|
||||
silence_path = temp_dir_path / "silence_shared.m4a"
|
||||
# Compute adaptive silence buffers per cue (A1):
|
||||
# natural_gap_ms already present at the pause point reduces how much silence we add.
|
||||
_buf_default = self._silence_buffer_default
|
||||
_buf_min_after = self._silence_buffer_min_after
|
||||
silence_pre_paths: dict[int, str] = {}
|
||||
silence_post_paths: dict[int, str] = {}
|
||||
for p in valid_placements:
|
||||
i = p["index"]
|
||||
natural_gap = (p.get("natural_gap_ms") or 0.0) / 1000.0
|
||||
silence_before = max(0.05, _buf_default - natural_gap * 0.5)
|
||||
silence_after = max(_buf_min_after, _buf_default - natural_gap * 0.3)
|
||||
p["silence_before"] = silence_before
|
||||
p["silence_after"] = silence_after
|
||||
silence_pre_paths[i] = str(temp_dir_path / f"silence_pre_{i}.m4a")
|
||||
silence_post_paths[i] = str(temp_dir_path / f"silence_post_{i}.m4a")
|
||||
logger.debug(
|
||||
f"Cue {p['cue_index']}: natural_gap={natural_gap*1000:.0f}ms → "
|
||||
f"silence_before={silence_before*1000:.0f}ms silence_after={silence_after*1000:.0f}ms"
|
||||
)
|
||||
|
||||
# Build tasks for phase 1
|
||||
phase1_tasks = []
|
||||
|
||||
# Task: Generate silence (just once, shared by all)
|
||||
phase1_tasks.append(self._generate_silence(silence_duration, str(silence_path), video_props))
|
||||
# Tasks: Generate per-cue silence files
|
||||
for p in valid_placements:
|
||||
i = p["index"]
|
||||
phase1_tasks.append(self._generate_silence(p["silence_before"], silence_pre_paths[i], video_props))
|
||||
phase1_tasks.append(self._generate_silence(p["silence_after"], silence_post_paths[i], video_props))
|
||||
|
||||
# Tasks: Extract all video segments
|
||||
video_segment_paths = {}
|
||||
|
|
@ -666,7 +689,7 @@ class VideoRendererService:
|
|||
combined_audio_path = temp_dir_path / f"combined_audio_{i}.m4a"
|
||||
combined_audio_paths[i] = str(combined_audio_path)
|
||||
phase2_tasks.append(self._concatenate_audio(
|
||||
[str(silence_path), p["ad_mp3_path"], str(silence_path)],
|
||||
[silence_pre_paths[i], p["ad_mp3_path"], silence_post_paths[i]],
|
||||
str(combined_audio_path),
|
||||
video_props
|
||||
))
|
||||
|
|
@ -685,11 +708,14 @@ class VideoRendererService:
|
|||
i = p["index"]
|
||||
cue_index = p["cue_index"]
|
||||
ad_duration = p["ad_duration"]
|
||||
total_freeze_duration = ad_duration + (2 * silence_duration)
|
||||
silence_before = p["silence_before"]
|
||||
silence_after = p["silence_after"]
|
||||
total_freeze_duration = ad_duration + silence_before + silence_after
|
||||
|
||||
logger.info(
|
||||
f"Cue {cue_index}: Freeze segment with silence buffers - "
|
||||
f"500ms + AD={ad_duration:.2f}s + 500ms = {total_freeze_duration:.2f}s"
|
||||
f"Cue {cue_index}: Freeze segment — "
|
||||
f"pre={silence_before*1000:.0f}ms + AD={ad_duration:.2f}s + "
|
||||
f"post={silence_after*1000:.0f}ms = {total_freeze_duration:.2f}s"
|
||||
)
|
||||
|
||||
freeze_segment_path = temp_dir_path / f"freeze_segment_{i}.mp4"
|
||||
|
|
@ -708,29 +734,17 @@ class VideoRendererService:
|
|||
# ============================================================
|
||||
# PHASE 3.5: Measure actual freeze segment durations for VTT retiming
|
||||
# ============================================================
|
||||
# NOTE: Use _get_video_duration_local directly since freeze segments are
|
||||
# local files. Using _get_video_duration would incorrectly use the cached
|
||||
# source video URI in Cloud Run mode instead of measuring the freeze segment.
|
||||
logger.info("Measuring actual freeze segment durations...")
|
||||
# Use the pre-computed expected duration for each freeze segment.
|
||||
# Cloud Run-generated freeze segments are created to this exact duration,
|
||||
# so probing is unnecessary and avoids dispatching to the Celery ffmpeg
|
||||
# queue (which caused FFprobe code 1 failures on Cloud Run output files).
|
||||
logger.info("Setting freeze segment durations from pre-computed values...")
|
||||
for p in valid_placements:
|
||||
i = p["index"]
|
||||
freeze_path = freeze_segment_paths[i]
|
||||
actual_duration = await self._get_video_duration_local(freeze_path)
|
||||
p["actual_freeze_duration"] = actual_duration
|
||||
|
||||
# Log any discrepancy between expected and actual duration
|
||||
expected = p["ad_duration"] + (2 * silence_duration)
|
||||
discrepancy = actual_duration - expected
|
||||
if abs(discrepancy) > 0.01: # 10ms threshold
|
||||
logger.warning(
|
||||
f"Freeze segment duration mismatch for cue {p['cue_index']}: "
|
||||
f"expected={expected:.3f}s, actual={actual_duration:.3f}s, "
|
||||
f"discrepancy={discrepancy:+.3f}s"
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
f"Freeze segment cue {p['cue_index']}: duration={actual_duration:.3f}s (expected={expected:.3f}s)"
|
||||
)
|
||||
expected = p["ad_duration"] + p["silence_before"] + p["silence_after"]
|
||||
p["actual_freeze_duration"] = expected
|
||||
logger.debug(
|
||||
f"Freeze segment cue {p['cue_index']}: duration={expected:.3f}s"
|
||||
)
|
||||
|
||||
# ============================================================
|
||||
# PHASE 4: Assemble segment list in correct order
|
||||
|
|
@ -869,9 +883,6 @@ class VideoRendererService:
|
|||
# Pause point is at the START of the freeze frame in the rendered timeline
|
||||
pause_ms = freeze_frame_starts.get(cue_index, p["pause_point"] * 1000)
|
||||
|
||||
# Find the freeze segment for this cue to get its end position
|
||||
freeze_seg = next((s for s in segment_metadata_list if s.is_freeze_frame and s.cue_index == cue_index), None)
|
||||
|
||||
# Compute min bound: end of previous AD segment (or 0 for first)
|
||||
if idx == 0:
|
||||
min_bound_ms = 0.0
|
||||
|
|
@ -1534,7 +1545,7 @@ class VideoRendererService:
|
|||
"""
|
||||
Generate a silent audio file of specified duration.
|
||||
|
||||
Used to create 500ms silence buffers before/after AD audio.
|
||||
Used to create adaptive silence buffers before/after AD audio.
|
||||
"""
|
||||
if self._use_cloud_run:
|
||||
await self._generate_silence_cloud_run(duration, output_path, props)
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue