diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..9dda026 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,92 @@ +{ + "permissions": { + "allow": [ + "WebSearch", + "Bash(cd /Volumes/SSD/Projects/Oliver/video-accessibility/backend && ruff check app/services/elevenlabs_voices.py app/services/tts.py app/api/v1/routes_tts.py app/models/job.py app/tasks/tts_synthesis.py app/core/config.py 2>&1)", + "Bash(cd /Volumes/SSD/Projects/Oliver/video-accessibility/backend && python -m ruff check app/services/elevenlabs_voices.py app/services/tts.py app/api/v1/routes_tts.py app/models/job.py app/tasks/tts_synthesis.py app/core/config.py 2>&1)", + "Bash(cd /Volumes/SSD/Projects/Oliver/video-accessibility/backend && pip3 show ruff 2>&1 | head -5; which pip3 2>&1)", + "Bash(cd /Volumes/SSD/Projects/Oliver/video-accessibility/frontend && npm run type-check 2>&1 | tail -20)", + "Bash(node_modules/.bin/tsc --noEmit 2>&1 | tail -20)", + "Bash(./node_modules/.bin/tsc --noEmit 2>&1 | tail -30)", + "Bash(npm run type-check 2>&1)", + "Bash(cd /Volumes/SSD/Projects/Oliver/video-accessibility/frontend && npm run type-check 2>&1)", + "Bash(npm run lint 2>&1)", + "WebFetch(domain:dcmp.org)", + "WebFetch(domain:www.w3.org)", + "WebFetch(domain:partnerhelp.netflixstudios.com)", + "WebFetch(domain:m.media-amazon.com)", + "WebFetch(domain:www.acb.org)", + "Bash(./node_modules/.bin/tsc --noEmit)", + "Bash(node_modules/.bin/tsc --noEmit)", + "Bash(pandoc --version)", + "WebFetch(domain:ai-sandbox.oliver.solutions)", + "Bash(gcloud run:*)", + "Bash(gcloud logging:*)", + "Bash(ssh optical:*)", + "Bash(/Volumes/SSD/Projects/Oliver/video-accessibility/backend/.venv/bin/python3.11 -c \"import sys; sys.path.insert\\(0, '.'\\); from app.models.user import UserRole; print\\([r.value for r in UserRole]\\)\")", + "Bash(npm list *)", + "Bash(brew list *)", + "Bash(npx --yes puppeteer --version)", + "Bash(node md_to_pdf.js)", + "Bash(npm root *)", + "Bash(node *)", + "Bash(ssh optical-web-1 *)", + "Bash(git *)", + "WebFetch(domain:docs.anthropic.com)", + "Bash(poetry lock *)", + "Bash(pip show *)", + "Read(//Users/ai_leed/.local/bin/**)", + "Read(//opt/homebrew/bin/**)", + "Bash(pip3 install *)", + "Bash(poetry --version)", + "Bash(docker run *)", + "Read(//Users/ai_leed/.docker/run/**)", + "Bash(docker context *)", + "Bash(DOCKER_HOST=unix:///var/run/docker.sock docker run --rm -v \"$\\(pwd\\):/app\" -w /app python:3.11-slim bash -c \"pip install poetry==1.8.2 -q && poetry lock --no-update\")", + "Bash(brew install *)", + "Bash(npm run *)", + "Bash(scp /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/models/audit_log.py optical:/tmp/audit_log.py)", + "Bash(scp *)", + "Bash(kill %1)", + "Bash(ssh optical-dev *)", + "Skill(fullstack-dev-skills:security-reviewer)", + "Bash(chmod +x *)", + "Bash(gcloud auth *)", + "Bash(gcloud config *)", + "Bash(gcloud artifacts *)", + "Bash(sed -n '190,200p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/api/v1/routes_jobs.py)", + "Bash(sed -n '1914,1922p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/api/v1/routes_jobs.py)", + "Bash(sed -n '2048,2062p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/api/v1/routes_jobs.py)", + "Bash(sed -n '2490,2502p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/api/v1/routes_jobs.py)", + "Bash(sed -n '2628,2638p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/api/v1/routes_jobs.py)", + "Bash(gcloud builds submit *)", + "Bash(gcloud builds describe 79802b34-e17b-4446-b01d-68d99d569262 *)", + "Bash(gcloud compute instances list *)", + "Bash(gcloud compute networks vpc-access connectors list *)", + "Bash(gcloud builds *)", + "Bash(gcloud projects get-iam-policy optical-414516 *)", + "Bash(gcloud projects *)", + "Bash(npm audit *)", + "Skill(codebase-audit-suite:ln-622-build-auditor)", + "Skill(codebase-audit-suite:ln-624-code-quality-auditor)", + "Skill(codebase-audit-suite:ln-625-dependencies-auditor)", + "Skill(codebase-audit-suite:ln-626-dead-code-auditor)", + "Bash(/opt/homebrew/bin/ruff check *)", + "Bash(npm test *)", + "Bash(sed -n '35,42p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/frontend/src/test/utils.tsx)", + "Bash(sed -n '55,90p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/frontend/tests/helpers/auth.ts)", + "Bash(sed -n '48,60p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/frontend/src/components/Layout/Sidebar.tsx)", + "Bash(sed -n '152,170p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/frontend/src/components/Layout/Sidebar.tsx)", + "Bash(poetry env *)", + "Bash(poetry install *)", + "Bash(poetry run *)", + "Bash(docker info *)", + "Bash(sed -n '1,30p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/services/gcs.py)", + "Bash(sed -n '155,165p' /Users/ai_leed/Documents/Projects/Oliver/video-accessibility/backend/app/services/gcs.py)", + "Bash(gcloud secrets *)", + "Bash(openssl rand *)", + "Bash(ssh *)", + "Skill(commit-commands:commit-push-pr)" + ] + } +} diff --git a/.hex-skills/audit-reports/ln-622--build-health.md b/.hex-skills/audit-reports/ln-622--build-health.md new file mode 100644 index 0000000..ec73a85 --- /dev/null +++ b/.hex-skills/audit-reports/ln-622--build-health.md @@ -0,0 +1,118 @@ +# Build Health Audit — ln-622 +**Score: 5.5/10** | Issues: 28 (C:0 H:5 M:18 L:5) +**Date:** 2026-04-30 | **Stack:** Python 3.11 / FastAPI / Celery + React 19 / Vite / TypeScript 5.8 + +--- + +## 1. Compiler / Linter Errors + +### Backend — ruff: 1314 errors (HIGH) +`ruff check app/` exits non-zero with 1314 violations. The ruff config in `pyproject.toml` uses **deprecated top-level `select`/`ignore`/`per-file-ignores`** instead of `[tool.ruff.lint]` — ruff emits a warning on every run. + +Top violation codes: +| Code | Meaning | Volume | +|------|---------|--------| +| I001 | Import block unsorted | ~400 | +| UP | pyupgrade (f-strings, typing aliases) | ~500 | +| B | flake8-bugbear | ~200 | +| F401 | Unused import | 58 | + +Most violations are **auto-fixable** (`ruff check --fix`). The unsorted imports and UP rules are cosmetic but make CI noisy and block future enforcement. + +**Severity: HIGH** — CI cannot gate on ruff without fixing this first. + +### Frontend — ESLint: 36 problems (30 errors, 6 warnings) (MEDIUM) +Key errors: +| File | Rule | Count | +|------|------|-------| +| `contexts/GlobalWebSocketContext.tsx:56` | `react-refresh/only-export-components` | 1 | +| `contexts/NotificationContext.tsx:91` | `react-refresh/only-export-components` | 1 | +| `contexts/ToastContext.tsx:83` | `react-refresh/only-export-components` | 1 | +| `lib/api.ts:539` | `@typescript-eslint/no-explicit-any` | 1 | +| `routes/admin/QCDetail.tsx` | `@typescript-eslint/no-explicit-any` | 6 | +| `routes/AcceptInvite.tsx` | `@typescript-eslint/no-explicit-any` | 1 | +| `routes/jobs/JobDetail.tsx` | `no-unused-vars` (err catch) | 2 | +| `hooks/__tests__/useJob.test.tsx` | `no-unused-vars` | 1 | +| `tests/helpers/auth.ts` | `no-explicit-any` | 3 | + +**Severity: MEDIUM** — build succeeds, but `any` types and react-refresh errors degrade DX and HMR. + +--- + +## 2. Type Errors + +### Frontend — tsc: CLEAN ✓ +`tsc --noEmit` exits 0. No TypeScript compilation errors. The `any` issues above are ESLint-level, not tsc errors. + +### Backend — mypy: NOT RUN +Cannot run mypy outside the poetry venv. Needs `poetry run mypy .` inside Docker or an activated venv. + +**Severity: LOW** (mypy not blocking, but should be run in CI) + +--- + +## 3. Tests + +### Frontend — vitest: 13 failed / 75 total (HIGH) +8 test files affected: + +| Test | Failures | Root cause | +|------|----------|-----------| +| `auth.test.ts` | 1 | Mock shape mismatch — response has extra field `organizationId` | +| `StatusBadge.test.tsx` | 1 | Unknown status no longer renders text (component changed) | +| `VttEditor.test.tsx` | 1 | Multiple elements found for `Insert cue before` title — DOM duplication | +| `useJob.test.tsx` | 3 | `useApproveEnglish` — pending state never resolves in test (timeout 1s); `useCreateJob` arg mismatch | +| `UploadDropzone.test.tsx` | 6 | Text broken across elements — test uses exact string match, component renders in `` nodes | +| `useJobStatusWebSocket.test.tsx` | 1 | (see output) | + +**Severity: HIGH** — 17% test failure rate. Several are stale tests from component refactors (UploadDropzone, StatusBadge). + +### Backend — pytest: CANNOT RUN (CRITICAL) +Running `pytest` outside poetry venv fails with `ModuleNotFoundError` for `fastapi`, `aiohttp`, etc. Tests must be run with `poetry run pytest` inside Docker or an activated poetry environment. + +The `backend/.venv` exists but appears to be a plain venv, not the poetry-managed one. **Tests are effectively unrunnable in local dev without explicit poetry activation.** + +**Severity: CRITICAL** — Developers with system Python cannot run tests without explicit setup steps. + +--- + +## 4. Build Configuration Issues + +### ruff config deprecated (MEDIUM) +`pyproject.toml` uses `[tool.ruff]` top-level `select`, `ignore`, `per-file-ignores`. Current ruff ≥ 0.2 expects `[tool.ruff.lint]`. Fix: + +```toml +# Before +[tool.ruff] +select = ["E", "W", ...] +ignore = ["E501", ...] + +# After +[tool.ruff] +target-version = "py311" +line-length = 88 + +[tool.ruff.lint] +select = ["E", "W", ...] +ignore = ["E501", ...] +``` + +### Backend venv mismatch (MEDIUM) +`backend/.venv` cannot run `ruff`, `pytest`, or `mypy` — they are installed in the poetry-managed venv, not this one. Confusing to new devs. + +### AGENTS.md commands incorrect (LOW) +`AGENTS.md` documents `cd backend && poetry run pytest` but the backend has `.venv` and `pyproject.toml` with no Makefile wrapper. The actual working path is `cd backend && .venv/bin/python -m pytest` or requires `poetry shell`. + +--- + +## Summary +| Check | Result | Severity | +|-------|--------|---------| +| ruff backend | 1314 violations (auto-fixable) | HIGH | +| ESLint frontend | 36 problems | MEDIUM | +| tsc frontend | ✓ Clean | OK | +| mypy backend | Not runnable locally | LOW | +| vitest frontend | 13/75 failing | HIGH | +| pytest backend | Not runnable locally | CRITICAL | +| ruff config | Deprecated syntax | MEDIUM | +| venv setup | Confusing / broken | MEDIUM | diff --git a/.hex-skills/audit-reports/ln-624--code-quality.md b/.hex-skills/audit-reports/ln-624--code-quality.md new file mode 100644 index 0000000..41449bd --- /dev/null +++ b/.hex-skills/audit-reports/ln-624--code-quality.md @@ -0,0 +1,116 @@ +# Code Quality Audit — ln-624 +**Score: 5.0/10** | Issues: 22 (C:2 H:8 M:9 L:3) +**Date:** 2026-04-30 + +--- + +## 1. God Classes / Files (> 500 lines) + +| File | Lines | Severity | +|------|-------|---------| +| `backend/app/api/v1/routes_jobs.py` | 2882 | **CRITICAL** | +| `frontend/src/routes/admin/QCDetail.tsx` | 2079 | **CRITICAL** | +| `backend/app/services/video_renderer.py` | 1695 | **HIGH** | +| `frontend/src/routes/jobs/JobsList.tsx` | 1246 | **HIGH** | +| `frontend/src/lib/api.ts` | 1056 | **HIGH** | +| `backend/app/tasks/translate_and_synthesize.py` | 1019 | **HIGH** | +| `frontend/src/routes/jobs/NewJob.tsx` | 1038 | **HIGH** | +| `frontend/src/types/api.ts` | 891 | **MEDIUM** | +| `frontend/src/routes/jobs/JobDetail.tsx` | 732 | **MEDIUM** | +| `frontend/src/routes/admin/UserDetail.tsx` | 523 | **MEDIUM** | +| `frontend/src/hooks/useJobStatusWebSocket.ts` | 443 | **MEDIUM** | + +**routes_jobs.py at 2882 lines** is the worst offender — it mixes upload, approval, translation, TTS, VTT editing, download, admin, and websocket concerns in a single router. Splitting by domain (e.g., `routes_upload.py`, `routes_vtt.py`, `routes_review.py`, `routes_tts.py`) would bring each under 500 lines. + +**QCDetail.tsx at 2079 lines** handles the entire QC workflow, VTT display, audio preview, language selection, and approval modals in one component. Needs extraction of at minimum: `LanguageQCPanel`, `VttReviewView`, `ApprovalModal`. + +--- + +## 2. Long Methods (> 100 lines) + +| File:line | Function | Length | Severity | +|-----------|---------|--------|---------| +| `tasks/translate_and_synthesize.py:109` | `_async_translate_and_synthesize()` | 485 lines | **CRITICAL** | +| `services/video_renderer.py:487` | `_render_pause_insert_method()` | 419 lines | **CRITICAL** | +| `tasks/ingest_and_ai.py:53` | `ingest_and_ai_task_impl()` | 276 lines | **HIGH** | +| `tasks/rerender_accessible_video.py:110` | `_async_rerender_accessible_video()` | 280 lines | **HIGH** | +| `tasks/render_accessible_video.py:56` | `_async_render_accessible_video()` | 287 lines | **HIGH** | +| `api/v1/routes_jobs.py:1552` | `update_job_vtt_content()` | 215 lines | **HIGH** | +| `tasks/notify.py:29` | `run_async()` | 169 lines | **HIGH** | +| `api/v1/routes_jobs.py:2738` | `update_tts_preferences()` | 144 lines | **MEDIUM** | +| `services/whisper_service.py:241` | `_find_sentence_boundaries()` | 120 lines | **MEDIUM** | +| `services/gemini.py:591` | `analyze_accessible_video_placement()` | 132 lines | **MEDIUM** | + +The two most critical ones (`_async_translate_and_synthesize` at 485 lines and `_render_pause_insert_method` at 419 lines) are orchestrator-style functions with sequential pipeline steps. They could be split into named pipeline stages, each ~50 lines. + +--- + +## 3. Deep Nesting + +Not systematically scanned with a tool (radon/lizard not installed). The long functions above likely contain 4–5+ nesting levels given their complexity. + +--- + +## 4. Too Many Parameters + +| Location | Function | Params | Severity | +|----------|---------|--------|---------| +| `services/gemini.py` | `extract_accessibility_targeted()` | 7+ | **MEDIUM** | +| `tasks/translate_and_synthesize.py` | `_generate_language_tts()` | 8+ | **MEDIUM** | + +Pattern: many functions pass `db`, `job`, `language`, `settings`, `gcs_client`, etc. individually instead of grouping into a context dataclass. + +--- + +## 5. Magic Numbers + +### Backend (MEDIUM) +Scattered timing constants without named definitions: +- TTS retry delays (hardcoded seconds) +- chunk sizes in upload +- Audio padding values in video_renderer.py + +### Frontend (LOW) +Mostly clean. Some inline pixel values in Tailwind (acceptable). No concerning business-logic magic numbers found. + +--- + +## 6. N+1 Query Patterns (MEDIUM) + +Potential N+1 patterns found: +- `app/main.py:102` — `async for job_doc in db.jobs.find(...)` — check if this iterates and makes additional queries per document +- `app/core/dependencies.py:185` — `async for m in db.memberships.find(...)` — membership lookup per request in auth middleware (acceptable if cached, but no caching observed) +- `app/core/authz.py:54` — `async for doc in db.memberships.find(...)` — similar pattern in auth check + +These are all async iterators over `find()` — not necessarily N+1 if no nested DB calls, but should be reviewed for `.find()` calls inside the loop body. + +--- + +## 7. Method Signature Quality + +### Boolean flag parameters (MEDIUM) +Several async functions in tasks accept `bool` flags controlling behavior variants (e.g., `skip_tts`, `force_regenerate`). These should be enums or separate functions. + +### Unclear return types (MEDIUM) +Some routes return `dict` or untyped responses instead of Pydantic response models. `routes_admin_production.py` has a few endpoints returning bare dicts. + +--- + +## 8. Side-Effect Cascade Depth + +`_async_translate_and_synthesize()` at 485 lines is the worst case: it writes to GCS, updates MongoDB, dispatches TTS tasks, sends notifications, and updates job status — 5+ distinct side-effect categories from a single function call. This warrants extraction into an orchestrator that delegates to named sink functions. + +--- + +## Summary + +| Check | Status | Severity | +|-------|--------|---------| +| God files (>500L) | 11 files | CRITICAL×2, HIGH×4 | +| Long methods (>100L) | 10 functions | CRITICAL×2, HIGH×5 | +| N+1 patterns | 3 potential | MEDIUM | +| Magic numbers | Some in tasks | MEDIUM | +| Method signatures | Boolean flags, unclear returns | MEDIUM | +| Side-effect cascade | translate_and_synthesize | HIGH | + +**Primary recommendation:** Split `routes_jobs.py` and `QCDetail.tsx` — these two files account for the majority of the quality debt. diff --git a/.hex-skills/audit-reports/ln-625--dependencies.md b/.hex-skills/audit-reports/ln-625--dependencies.md new file mode 100644 index 0000000..b186bca --- /dev/null +++ b/.hex-skills/audit-reports/ln-625--dependencies.md @@ -0,0 +1,94 @@ +# Dependencies & Reuse Audit — ln-625 +**Score: 7.5/10** | Issues: 9 (C:0 H:2 M:5 L:2) +**Date:** 2026-04-30 + +--- + +## 1. Vulnerability Scan (CVE/CVSS) + +### Frontend — npm audit: ✓ CLEAN +``` +Total packages: 479 +Vulnerabilities: info:0 low:0 moderate:0 high:0 critical:0 total:0 +``` +Zero CVEs. Excellent. + +### Backend — pip-audit: NOT RUN +`pip-audit` not installed in local env. Recommended to add to CI: +```bash +pip install pip-audit && pip-audit -r requirements.txt +``` +Given many heavy deps (Celery 5.3, google-cloud-*, faster-whisper, aiohttp), a CI scan is strongly advised. + +--- + +## 2. Outdated Packages + +### Frontend — npm outdated (many minor/major updates pending) + +**MAJOR version gaps (HIGH):** +| Package | Installed | Latest | Notes | +|---------|-----------|--------|-------| +| `@azure/msal-browser` | 4.25.0 | **5.9.0** | MSAL v5 has breaking API changes | +| `@azure/msal-react` | 3.0.20 | **5.3.2** | Paired with msal-browser, coordinated upgrade needed | +| `@sentry/react` | 8.55.0 | **10.51.0** | Sentry v10 has breaking changes | +| `typescript` | 5.8.3 | **6.0.3** | TS 6 has strictness changes | +| `vite` | 7.3.2 | **8.0.10** | Vite 8 breaking changes | +| `eslint` | 9.33.0 | **10.2.1** | ESLint 10 config format may change | +| `jsdom` | 26.1.0 | **29.1.1** | Test environment | + +**Minor updates (LOW-MEDIUM):** Most other packages have minor/patch updates pending (react 19.1→19.2, tailwindcss 4.1→4.2, etc.) + +**Recommendation:** Keep MSAL and Sentry on current major until dedicated upgrade sprint. React, TailwindCSS, react-query minor updates are safe to apply immediately. + +### Backend — pip outdated: pip-audit not available +Based on pyproject.toml dates vs ecosystem: +- `ruff ^0.1.6` → installed ruff is `0.15.12` (already updated, good) +- `google-genai ^1.56.0` → recently updated per git log +- `faster-whisper ^1.2.0` → check for 1.x updates + +--- + +## 3. Unused Dependencies + +### Backend — `sendgrid` (MEDIUM) +`pyproject.toml` lists `sendgrid = "^6.11.0"`. However: +- The actual emailer (`app/services/emailer.py`) uses **Mailgun** REST API via `httpx` +- `sendgrid` is referenced **only** in `app/core/config.py` as a dead config field `sendgrid_api_key: str = ""` with comment `# Email (Mailgun — primary; sendgrid_api_key kept for backward compat)` +- No `import sendgrid` anywhere in app code + +**Action:** Remove `sendgrid` from `pyproject.toml` dependencies and remove the `sendgrid_api_key` config field. + +### Frontend — no unused dependencies found +- `axios` → used in `lib/api.ts` +- `@azure/msal-*` → used in `main.tsx`, `routes/Login.tsx` +- `date-fns` → used in 5+ components +- `zustand`, `@tanstack/react-query`, `react-hook-form`, `zod` → all actively used +- `react-dropzone` → used in upload components + +--- + +## 4. Available Native Alternatives + +### Frontend — axios vs fetch (LOW) +`axios` is used for all API calls in `lib/api.ts`. The project targets modern browsers and uses Vite. Native `fetch` + `AbortController` could replace axios, reducing bundle by ~14kb gzipped. However, axios provides request/response interceptors that are actively used for auth token refresh — migration effort is medium. **Not urgent.** + +--- + +## 5. Custom Implementations + +No custom crypto or hand-rolled validation libraries found. All auth uses `python-jose` + `libpass` (bcrypt). VTT parsing is domain-specific and not replaceable by a library. No concerns. + +--- + +## Summary + +| Check | Result | Severity | +|-------|--------|---------| +| Frontend CVEs | ✓ 0 vulnerabilities | OK | +| Backend CVEs | ⚠ Not scanned | MEDIUM | +| Frontend major updates | MSAL×2, Sentry, TS, Vite, ESLint | HIGH | +| Frontend minor updates | Many | LOW | +| Backend unused dep | `sendgrid` in pyproject.toml | MEDIUM | +| Native alternatives | axios → fetch possible | LOW | +| Custom implementations | None found | OK | diff --git a/.hex-skills/audit-reports/ln-626--dead-code.md b/.hex-skills/audit-reports/ln-626--dead-code.md new file mode 100644 index 0000000..1161db3 --- /dev/null +++ b/.hex-skills/audit-reports/ln-626--dead-code.md @@ -0,0 +1,143 @@ +# Dead Code Audit — ln-626 +**Score: 7.0/10** | Issues: 14 (C:0 H:0 M:6 L:8) +**Date:** 2026-04-30 + +--- + +## 1. Unused Imports (Python — F401) + +ruff detected **58 unused import violations** across backend. Sample: + +| File | Unused import | +|------|--------------| +| `routes_admin.py:9` | `get_current_user` | +| `routes_admin.py:11` | `verify_password` | +| `routes_admin.py:16` | `ChangePasswordRequest` | +| `routes_admin.py:23` | `log_security_event` | +| (+ 54 more across all files) | | + +All are auto-fixable with `ruff check --fix --select F401`. The `__init__.py` files are correctly excluded via `per-file-ignores`. + +**Severity: MEDIUM** — clutters imports, increases cognitive load when reading files. + +--- + +## 2. Deprecated / Legacy Types (Frontend) + +`frontend/src/types/api.ts` contains 3 deprecated exported types with JSDoc markers: + +| Line | Type | Marker | +|------|------|--------| +| 96 | `TtsVoicesResponse` | `@deprecated Use ProviderVoicesResponse instead` | +| 137 | `TtsOptionsResponse` | `@deprecated Use ProviderOptionsResponse instead` | +| 555-566 | `Client` / `OrganizationLegacy` | `@deprecated Use Organization instead` + `export { Client as OrganizationLegacy }` | + +These types are still exported, meaning consumers could use them by mistake. If no external consumers exist (library not published), they should be deleted. + +**Severity: MEDIUM** — active deprecation markers indicate intent to remove. Leaving them causes confusion. + +--- + +## 3. Legacy Status Values (Frontend) + +`frontend/src/types/api.ts:12,14`: +```ts +| "tts_failed" // legacy: keep for back-compat +| "render_failed" // legacy: keep for back-compat +``` + +These job statuses are marked as legacy. If the backend no longer emits them, they are dead type branches. If it still does (for old jobs in MongoDB), they're valid — but should be clearly documented with a removal condition. + +**Severity: LOW** — no runtime impact, but requires clarification. + +--- + +## 4. Backward Compatibility Code (Frontend) + +### lib/api.ts:239 — Legacy approval method (MEDIUM) +```ts +// Legacy method - calls approve_source for backwards compatibility +``` +A backward-compat shim in the API client. If all callers have been updated to the new method, this should be removed. + +### VideoWithCaptions.tsx:16–43 — Legacy single-language props (MEDIUM) +```ts +// Legacy single-language props (still supported) +sourceLanguage?: string; // Language code for legacy props +// Legacy props +// Combine legacy props with tracks (use useMemo to prevent recreation) +``` +The component maintains backward-compat with old single-language prop API. If no callers use these legacy props, they can be removed. + +### JobDetail.tsx:41 — Legacy status mapping (LOW) +```ts +// Handle legacy approved_english/approved_source statuses (map to pending_final_review) +``` +Status mapping shim for old job records. Should be removed after all existing jobs are migrated. + +--- + +## 5. Commented-Out Code (Backend) + +| File | Line | Content | +|------|------|---------| +| `telemetry/tracing.py:5` | `# from opentelemetry.exporter.gcp.trace import CloudTraceSpanExporter # Disabled for local dev` | GCP trace exporter disabled | +| `telemetry/metrics.py:5` | `# from opentelemetry.exporter.prometheus import PrometheusMetricReader # Disabled for local dev` | Prometheus reader disabled | +| `pyproject.toml` | `# opentelemetry-exporter-prometheus = ... # Temporarily disabled - version conflicts` | Dep commented out | + +These are intentional (local dev vs prod config), not dead code. However, the conditional should be expressed via environment config, not source comments. **Low priority.** + +**Severity: LOW** + +--- + +## 6. Leftover .old Files (MEDIUM) + +| File | Age | Action | +|------|-----|--------| +| `docker-compose.yml.old` | Created 2026-03-03 (~2 months) | Delete | +| `backend/Dockerfile.old` | Created 2026-03-03 (~2 months) | Delete | +| `backend/.dockerignore.old` | — | Delete | + +These files have no build references. Git history preserves them. + +--- + +## 7. Unused Dockerfiles + +| File | Referenced in compose? | +|------|----------------------| +| `backend/Dockerfile.ffmpeg-service` | No — ffmpeg is embedded in main worker | +| `backend/Dockerfile.cloudrun` | Yes — referenced for Cloud Run deploys | +| `backend/Dockerfile.whisper-service` | Yes — whisper-worker service in compose | + +`Dockerfile.ffmpeg-service` appears to be dead — the main Dockerfile handles ffmpeg. Should be confirmed and deleted if unused. + +**Severity: LOW** + +--- + +## 8. Dead Config Field + +`backend/app/core/config.py:272`: +```python +sendgrid_api_key: str = "" # Email (Mailgun — primary; sendgrid_api_key kept for backward compat) +``` +`sendgrid` package not used. Config field and `secrets_config.py` secret reference both dead. + +**Severity: MEDIUM** — misleads ops into configuring a sendgrid secret that has no effect. + +--- + +## Summary + +| Check | Issues | Severity | +|-------|--------|---------| +| Unused Python imports | 58 (auto-fixable) | MEDIUM | +| Deprecated TS types | 3 types | MEDIUM | +| Backward-compat shims | 3 in frontend | MEDIUM | +| Commented-out code | 3 telemetry lines | LOW | +| .old files | 3 files | MEDIUM | +| Unused Dockerfile | Dockerfile.ffmpeg-service | LOW | +| Dead config field | sendgrid_api_key | MEDIUM | +| Legacy status values | 2 status strings | LOW | diff --git a/apache-config-snippet.conf b/apache-config-snippet.conf index a538f2b..4e583be 100644 --- a/apache-config-snippet.conf +++ b/apache-config-snippet.conf @@ -1,172 +1,96 @@ # ============================================================================= -# Apache Configuration for Accessible Video Platform -# ============================================================================= -# Add this configuration to your existing VirtualHost for ai-sandbox.oliver.solutions -# Location: /etc/apache2/sites-available/ai-sandbox.oliver.solutions-ssl.conf +# Apache config fragment — Accessible Video Platform +# Inject into: /etc/apache2/sites-available/ai-sandbox.oliver.solutions-ssl.conf +# +# Required modules: +# sudo a2enmod proxy proxy_http proxy_wstunnel rewrite headers +# +# Container port map: +# accessible-video-api → 0.0.0.0:8012->8000/tcp # ============================================================================= -# ----------------------------------------------------------------------------- -# Frontend - Static React SPA served from subdirectory -# ----------------------------------------------------------------------------- +# ── Timeouts for large video uploads (up to 2 GB, ~10 min) ────────────────── + + ProxyTimeout 600 + -# Serve frontend from /video-accessibility subdirectory +# ── WebSocket proxy (MUST be before /api/ HTTP proxy) ─────────────────────── +# disablereuse=on prevents long-lived WS connections from exhausting the pool +ProxyPassMatch ^/video-accessibility/api/v1/ws/(.*)$ ws://127.0.0.1:8012/api/v1/ws/$1 disablereuse=on +ProxyPassReverse /video-accessibility/api/v1/ws/ ws://127.0.0.1:8012/api/v1/ws/ + +# ── API proxy ──────────────────────────────────────────────────────────────── +# Strips /video-accessibility prefix — FastAPI sees /api/v1/... +ProxyPassMatch ^/video-accessibility/api/(.*)$ http://127.0.0.1:8012/api/$1 +ProxyPassReverse /video-accessibility/api/ http://127.0.0.1:8012/api/ + +# Swagger / OpenAPI +ProxyPassMatch ^/video-accessibility/docs(/.*)?$ http://127.0.0.1:8012/docs$1 +ProxyPassReverse /video-accessibility/docs http://127.0.0.1:8012/docs +ProxyPassMatch ^/video-accessibility/openapi\.json$ http://127.0.0.1:8012/openapi.json +ProxyPassReverse /video-accessibility/openapi.json http://127.0.0.1:8012/openapi.json + +# ── SPA static files ───────────────────────────────────────────────────────── Alias /video-accessibility /var/www/html/video-accessibility - - # Basic options Options -Indexes +FollowSymLinks - AllowOverride All + AllowOverride None Require all granted - # React SPA routing - rewrite all requests to index.html + # Allow video uploads up to 2 GB + LimitRequestBody 2147483648 + RewriteEngine On - RewriteBase /video-accessibility + RewriteBase /video-accessibility/ - # Don't rewrite files or directories that exist - RewriteCond %{REQUEST_FILENAME} !-f - RewriteCond %{REQUEST_FILENAME} !-d + # Serve real files/directories directly (JS, CSS, assets, fonts) + RewriteCond %{REQUEST_FILENAME} -f [OR] + RewriteCond %{REQUEST_FILENAME} -d + RewriteRule ^ - [L] - # Rewrite everything else to index.html - RewriteRule ^ /video-accessibility/index.html [L] + # Everything else → index.html (React Router handles client-side nav) + RewriteRule ^ index.html [L] - # Security headers - Header always set X-Frame-Options "SAMEORIGIN" - Header always set X-Content-Type-Options "nosniff" - Header always set X-XSS-Protection "1; mode=block" - Header always set Referrer-Policy "strict-origin-when-cross-origin" - - # Cache control for static assets - + # Cache-bust hashed assets indefinitely; never cache HTML + Header set Cache-Control "public, max-age=31536000, immutable" - - # No cache for HTML files - + Header set Cache-Control "no-cache, no-store, must-revalidate" - Header set Pragma "no-cache" - Header set Expires "0" - - -# ----------------------------------------------------------------------------- -# Backend API - Reverse proxy to Docker container -# ----------------------------------------------------------------------------- - -# Proxy backend API to Docker container on port 8000 - - # Preserve original host header - ProxyPreserveHost On - - # Proxy HTTP requests - ProxyPass http://localhost:8000 - ProxyPassReverse http://localhost:8000 - - # Proxy timeout settings (important for long-running video processing) - ProxyTimeout 300 - - # WebSocket support (CRITICAL for real-time job updates) - RewriteEngine On - RewriteCond %{HTTP:Upgrade} =websocket [NC] - RewriteRule /video-accessibility-back/(.*) ws://localhost:8000/$1 [P,L] - RewriteCond %{HTTP:Upgrade} !=websocket [NC] - RewriteRule /video-accessibility-back/(.*) http://localhost:8000/$1 [P,L] # Security headers Header always set X-Frame-Options "SAMEORIGIN" Header always set X-Content-Type-Options "nosniff" - - # CORS is handled by the backend, don't add headers here - - -# ----------------------------------------------------------------------------- -# Required Apache Modules -# ----------------------------------------------------------------------------- - -# Enable these modules with: -# sudo a2enmod rewrite -# sudo a2enmod proxy -# sudo a2enmod proxy_http -# sudo a2enmod proxy_wstunnel -# sudo a2enmod headers -# sudo systemctl restart apache2 - -# Verify modules are enabled: -# apache2ctl -M | grep -E '(rewrite|proxy|headers)' + Header always set Referrer-Policy "strict-origin-when-cross-origin" + # ============================================================================= -# Full VirtualHost Example +# Full VirtualHost skeleton (reference — values match optical-web-1) # ============================================================================= - -# Example of complete VirtualHost configuration: # # # ServerName ai-sandbox.oliver.solutions -# ServerAdmin admin@oliver.solutions -# # DocumentRoot /var/www/html # -# # SSL Configuration (with wildcard cert) # SSLEngine on -# SSLCertificateFile /path/to/wildcard-ai-sandbox.oliver.solutions.crt -# SSLCertificateKeyFile /path/to/wildcard-ai-sandbox.oliver.solutions.key -# SSLCertificateChainFile /path/to/chain.crt # If needed +# SSLCertificateFile /path/to/wildcard.crt +# SSLCertificateKeyFile /path/to/wildcard.key # -# # SSL Protocol and Cipher settings -# SSLProtocol all -SSLv2 -SSLv3 -TLSv1 -TLSv1.1 +# SSLProtocol all -SSLv2 -SSLv3 -TLSv1 -TLSv1.1 # SSLCipherSuite HIGH:!aNULL:!MD5 # -# # Frontend configuration (from above) -# Alias /video-accessibility /var/www/html/video-accessibility -# -# ... -# +# # — paste the block above here — # -# # Backend API configuration (from above) -# -# ... -# -# -# # Logging -# ErrorLog ${APACHE_LOG_DIR}/ai-sandbox-error.log +# ErrorLog ${APACHE_LOG_DIR}/ai-sandbox-error.log # CustomLog ${APACHE_LOG_DIR}/ai-sandbox-access.log combined # # ============================================================================= -# Testing & Verification +# Verify # ============================================================================= - -# Test Apache configuration: -# sudo apache2ctl configtest -# -# Restart Apache: -# sudo systemctl restart apache2 -# -# Test frontend: -# curl -I https://ai-sandbox.oliver.solutions/video-accessibility -# -# Test backend: -# curl https://ai-sandbox.oliver.solutions/video-accessibility-back/health -# -# Test WebSocket (requires wscat): -# wscat -c wss://ai-sandbox.oliver.solutions/video-accessibility-back/api/v1/ws/job-list - -# ============================================================================= -# Troubleshooting -# ============================================================================= - -# Check Apache logs: -# sudo tail -f /var/log/apache2/ai-sandbox-error.log -# sudo tail -f /var/log/apache2/ai-sandbox-access.log -# -# Check if backend is running: -# curl http://localhost:8000/health -# -# Check Docker containers: -# cd /opt/accessible-video -# docker-compose ps -# -# Common issues: -# - 502 Bad Gateway: Backend container not running -# - 404 Not Found: Frontend not deployed or Apache alias incorrect -# - WebSocket fails: mod_proxy_wstunnel not enabled -# - CORS errors: Check backend CORS configuration, not Apache +# sudo apache2ctl configtest +# sudo systemctl reload apache2 +# curl -I https://ai-sandbox.oliver.solutions/video-accessibility/ +# curl https://ai-sandbox.oliver.solutions/video-accessibility/api/v1/health +# wscat -c wss://ai-sandbox.oliver.solutions/video-accessibility/api/v1/ws/job-list diff --git a/backend/.dockerignore.old b/backend/.dockerignore.old deleted file mode 100644 index 53317a1..0000000 --- a/backend/.dockerignore.old +++ /dev/null @@ -1,92 +0,0 @@ -# Python -__pycache__/ -*.py[cod] -*$py.class -*.so -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# Poetry (keep poetry.lock for reproducible builds) -# poetry.lock - -# Virtual environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# IDE -.vscode/ -.idea/ -*.swp -*.swo -*~ - -# OS -.DS_Store -.DS_Store? -._* -.Spotlight-V100 -.Trashes -ehthumbs.db -Thumbs.db - -# Testing -.coverage -.pytest_cache/ -.mypy_cache/ -.tox/ -htmlcov/ -coverage.xml -*.cover -.hypothesis/ - -# Documentation -docs/ -*.md -README* - -# Logs -*.log -logs/ - -# Git -.git/ -.gitignore - -# Docker -Dockerfile* -.dockerignore -docker-compose* - -# CI/CD -.github/ - -# Local development -.env.local -.env.development -.env.test - -# Temporary files -tmp/ -temp/ -*.tmp -*.bak \ No newline at end of file diff --git a/backend/Dockerfile.old b/backend/Dockerfile.old deleted file mode 100644 index 2fa68fb..0000000 --- a/backend/Dockerfile.old +++ /dev/null @@ -1,127 +0,0 @@ -# Build stage - Install dependencies and build wheels -FROM python:3.11-slim AS builder - -# Install build dependencies -RUN apt-get update && apt-get install -y \ - build-essential \ - curl \ - && rm -rf /var/lib/apt/lists/* - -# Install Poetry -RUN pip install poetry==1.8.2 - -# Set Poetry configuration -ENV POETRY_NO_INTERACTION=1 \ - POETRY_VENV_IN_PROJECT=1 \ - POETRY_CACHE_DIR=/tmp/poetry_cache - -WORKDIR /app - -# Copy dependency files -COPY pyproject.toml poetry.lock ./ - -# Install dependencies into venv -RUN poetry config virtualenvs.in-project true && \ - poetry lock --no-update || true && \ - poetry install --only=main --no-root && \ - rm -rf $POETRY_CACHE_DIR - -# Base runtime stage -FROM python:3.11-slim AS base - -# Install runtime system dependencies -RUN apt-get update && apt-get install -y \ - ffmpeg \ - curl \ - tini \ - && rm -rf /var/lib/apt/lists/* \ - && apt-get clean - -# Create non-root user -RUN groupadd --gid 1000 app \ - && useradd --uid 1000 --gid app --shell /bin/bash --create-home app - -# Set working directory -WORKDIR /app - -# Copy virtual environment from builder stage -COPY --from=builder --chown=app:app /app/.venv /app/.venv - -# Ensure venv is in PATH -ENV PATH="/app/.venv/bin:$PATH" - -# Copy application code -COPY --chown=app:app . . - -# Switch to non-root user -USER app - -# Production API stage -FROM base AS production - -# Set environment variables for production -ENV APP_ENV=prod \ - PYTHONPATH=/app \ - PYTHONUNBUFFERED=1 \ - PYTHONDONTWRITEBYTECODE=1 - -# Health check -HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ - CMD curl -f http://localhost:8000/health || exit 1 - -# Expose port -EXPOSE 8000 - -# Use tini as init system for proper signal handling -ENTRYPOINT ["tini", "--"] - -# Default command for API server -CMD ["gunicorn", "-c", "gunicorn_conf.py"] - -# Worker stage for Celery workers -FROM base AS worker - -# Set environment variables for worker -ENV APP_ENV=prod \ - PYTHONPATH=/app \ - PYTHONUNBUFFERED=1 \ - PYTHONDONTWRITEBYTECODE=1 \ - C_FORCE_ROOT=1 - -# Health check for worker (check if Celery is responding) -HEALTHCHECK --interval=60s --timeout=15s --start-period=10s --retries=3 \ - CMD python -c "from celery import Celery; app=Celery('app'); print('Worker healthy')" || exit 1 - -# Use tini as init system for proper signal handling -ENTRYPOINT ["tini", "--"] - -# Default command for Celery worker -CMD ["celery", "-A", "app.tasks", "worker", "--loglevel=info", "--concurrency=1"] - -# Development stage with dev dependencies -FROM builder AS development - -# Install all dependencies including dev -RUN poetry install --no-root && rm -rf $POETRY_CACHE_DIR - -# Install additional dev tools -RUN apt-get update && apt-get install -y \ - git \ - vim \ - && rm -rf /var/lib/apt/lists/* - -# Copy application code -COPY --chown=app:app . . - -# Switch to non-root user -USER app - -# Set environment for development -ENV APP_ENV=dev \ - PYTHONPATH=/app \ - PYTHONUNBUFFERED=1 - -EXPOSE 8000 - -# Development command with hot reload -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] \ No newline at end of file diff --git a/backend/app/api/v1/routes_admin.py b/backend/app/api/v1/routes_admin.py index 6285cbf..dba6e41 100644 --- a/backend/app/api/v1/routes_admin.py +++ b/backend/app/api/v1/routes_admin.py @@ -1,26 +1,27 @@ from datetime import datetime, timedelta -from typing import Optional from bson import ObjectId from fastapi import APIRouter, Depends, HTTPException, Query, Request, status from motor.motor_asyncio import AsyncIOMotorDatabase from ...core.database import get_database -from ...core.dependencies import get_current_user, require_roles +from ...core.dependencies import require_roles from ...core.logging import get_logger -from ...core.security import get_password_hash, verify_password -from ...models.user import User, UserRole +from ...core.security import get_password_hash from ...models.audit_log import AuditAction, AuditLogQuery, AuditLogResponse +from ...models.user import User, UserRole from ...schemas.auth import ( AdminStatsResponse, - ChangePasswordRequest, CreateUserRequest, ResetPasswordRequest, UpdateUserRequest, UserListResponse, UserResponse, ) -from ...services.audit_logger import audit_logger, log_user_management, log_security_event +from ...services.audit_logger import ( + audit_logger, + log_user_management, +) from ...telemetry import app_metrics logger = get_logger(__name__) @@ -31,28 +32,28 @@ router = APIRouter(prefix="/admin", tags=["admin"]) async def list_users( page: int = Query(1, ge=1), size: int = Query(20, ge=1, le=500), - role: Optional[str] = Query(None), + role: str | None = Query(None), active_only: bool = Query(True), current_user: User = Depends(require_roles(UserRole.ADMIN)), db: AsyncIOMotorDatabase = Depends(get_database), ): """List users with filtering and pagination (admin only)""" query = {} - + if role: query["role"] = role - + if active_only: query["is_active"] = True - + # Get total count total = await db.users.count_documents(query) - + # Get paginated results skip = (page - 1) * size cursor = db.users.find(query, {"hashed_password": 0}).sort("created_at", -1).skip(skip).limit(size) users = await cursor.to_list(length=size) - + user_responses = [] for user_doc in users: user_responses.append(UserResponse( @@ -66,7 +67,7 @@ async def list_users( pm_client_ids=user_doc.get("pm_client_ids", []), languages=user_doc.get("languages", []), )) - + return UserListResponse( users=user_responses, total=total, @@ -88,7 +89,7 @@ async def get_user( status_code=status.HTTP_404_NOT_FOUND, detail="User not found" ) - + return UserResponse( id=str(user_doc["_id"]), email=user_doc["email"], @@ -117,7 +118,7 @@ async def create_user( status_code=status.HTTP_400_BAD_REQUEST, detail="User with this email already exists" ) - + # Create user document user_id = str(ObjectId()) user_doc = { @@ -131,12 +132,12 @@ async def create_user( "created_at": datetime.utcnow(), "updated_at": datetime.utcnow() } - + await db.users.insert_one(user_doc) - + # Record metrics app_metrics.record_auth_attempt("user_created", user_data.role.value) - + logger.info(f"Admin {current_user.id} created user {user_id} with role {user_data.role.value}") await log_user_management( AuditAction.USER_CREATE, user_id, current_user, request, @@ -172,7 +173,7 @@ async def update_user( status_code=status.HTTP_404_NOT_FOUND, detail="User not found" ) - + # Check if email is being changed and doesn't conflict if user_update.email and user_update.email != user_doc["email"]: existing_user = await db.users.find_one({"email": user_update.email, "_id": {"$ne": user_id}}) @@ -181,10 +182,10 @@ async def update_user( status_code=status.HTTP_400_BAD_REQUEST, detail="Email already in use by another user" ) - + # Build update document update_data = {"updated_at": datetime.utcnow()} - + if user_update.email: update_data["email"] = user_update.email if user_update.full_name: @@ -193,14 +194,14 @@ async def update_user( update_data["role"] = user_update.role.value if user_update.is_active is not None: update_data["is_active"] = user_update.is_active - + # Update user result = await db.users.find_one_and_update( {"_id": user_id}, {"$set": update_data}, return_document=True ) - + logger.info(f"Admin {current_user.id} updated user {user_id}") action = AuditAction.USER_ROLE_CHANGE if user_update.role else AuditAction.USER_UPDATE await log_user_management( @@ -234,7 +235,7 @@ async def deactivate_user( status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot deactivate your own account" ) - + result = await db.users.update_one( {"_id": user_id}, { @@ -244,13 +245,13 @@ async def deactivate_user( } } ) - + if result.matched_count == 0: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="User not found" ) - + logger.info(f"Admin {current_user.id} deactivated user {user_id}") await log_user_management(AuditAction.USER_DEACTIVATE, user_id, current_user, request) @@ -268,10 +269,10 @@ async def admin_reset_password( # Generate temporary password import secrets import string - + temp_password = ''.join(secrets.choice(string.ascii_letters + string.digits) for _ in range(12)) hashed_password = get_password_hash(temp_password) - + result = await db.users.update_one( {"_id": user_id}, { @@ -281,15 +282,15 @@ async def admin_reset_password( } } ) - + if result.matched_count == 0: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="User not found" ) - + logger.info(f"Admin {current_user.id} reset password for user {user_id}") - + # In production, send email with temp password instead of returning it return { "message": "Password reset successfully", @@ -305,23 +306,23 @@ async def get_admin_stats( """Get system statistics (production/admin only)""" # Get user count total_users = await db.users.count_documents({"is_active": True}) - + # Get job counts total_jobs = await db.jobs.count_documents({}) - + # Get jobs by status pipeline = [ {"$group": {"_id": "$status", "count": {"$sum": 1}}} ] status_counts = await db.jobs.aggregate(pipeline).to_list(None) jobs_by_status = {item["_id"]: item["count"] for item in status_counts} - + # Get jobs created today today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) active_jobs_today = await db.jobs.count_documents({ "created_at": {"$gte": today_start} }) - + # Calculate average processing time for completed jobs avg_processing_pipeline = [ {"$match": {"status": "completed", "created_at": {"$exists": True}, "updated_at": {"$exists": True}}}, @@ -342,10 +343,10 @@ async def get_admin_stats( } } ] - + avg_result = await db.jobs.aggregate(avg_processing_pipeline).to_list(None) avg_processing_time = avg_result[0]["avg_processing_time"] if avg_result else 0.0 - + return AdminStatsResponse( total_users=total_users, total_jobs=total_jobs, @@ -366,7 +367,7 @@ async def detailed_health_check( "timestamp": datetime.utcnow().isoformat(), "components": {} } - + # Check MongoDB try: await db.command("ping") @@ -374,7 +375,7 @@ async def detailed_health_check( except Exception as e: health_status["components"]["mongodb"] = {"status": "unhealthy", "error": str(e)} health_status["status"] = "degraded" - + # Check Redis (via import to avoid circular dependency) try: from ...core.redis import redis_client @@ -386,7 +387,7 @@ async def detailed_health_check( except Exception as e: health_status["components"]["redis"] = {"status": "unhealthy", "error": str(e)} health_status["status"] = "degraded" - + # Check GCS (basic check) try: from ...services.gcs import gcs_service @@ -396,13 +397,13 @@ async def detailed_health_check( except Exception as e: health_status["components"]["gcs"] = {"status": "unhealthy", "error": str(e)} health_status["status"] = "degraded" - + # Check job queue health try: from ...tasks import celery_app inspect = celery_app.control.inspect() active_tasks = inspect.active() - + if active_tasks: total_active = sum(len(tasks) for tasks in active_tasks.values()) health_status["components"]["celery"] = { @@ -419,7 +420,7 @@ async def detailed_health_check( except Exception as e: health_status["components"]["celery"] = {"status": "unhealthy", "error": str(e)} health_status["status"] = "degraded" - + return health_status @@ -431,18 +432,18 @@ async def get_job_statistics( ): """Get job processing statistics (reviewer/production/admin only)""" since_date = datetime.utcnow() - timedelta(days=days) - + # Jobs created in period jobs_in_period = await db.jobs.count_documents({ "created_at": {"$gte": since_date} }) - + # Jobs completed in period jobs_completed = await db.jobs.count_documents({ "status": "completed", "updated_at": {"$gte": since_date} }) - + # Average processing time for completed jobs avg_pipeline = [ { @@ -471,12 +472,12 @@ async def get_job_statistics( } } ] - + avg_result = await db.jobs.aggregate(avg_pipeline).to_list(None) processing_stats = avg_result[0] if avg_result else { "avg_time": 0, "min_time": 0, "max_time": 0 } - + # Current queue status current_queue_stats = {} pipeline = [ @@ -485,7 +486,7 @@ async def get_job_statistics( status_counts = await db.jobs.aggregate(pipeline).to_list(None) for item in status_counts: current_queue_stats[item["_id"]] = item["count"] - + return { "period_days": days, "jobs_created": jobs_in_period, @@ -510,7 +511,7 @@ async def admin_force_password_reset( status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot reset your own password this way" ) - + # Check if user exists user_doc = await db.users.find_one({"_id": user_id}) if not user_doc: @@ -518,15 +519,15 @@ async def admin_force_password_reset( status_code=status.HTTP_404_NOT_FOUND, detail="User not found" ) - + # Generate secure temporary password import secrets import string - + temp_password = ''.join(secrets.choice( string.ascii_letters + string.digits + "!@#$%" ) for _ in range(16)) - + # Update password await db.users.update_one( {"_id": user_id}, @@ -537,10 +538,10 @@ async def admin_force_password_reset( } } ) - + # TODO: In production, send via secure email instead of returning password logger.info(f"Admin {current_user.id} reset password for user {user_id}") - + return { "message": "Password reset successfully", "temporary_password": temp_password, @@ -563,7 +564,7 @@ async def reprocess_job( status_code=status.HTTP_404_NOT_FOUND, detail="Job not found" ) - + # Reset job to created status for reprocessing await db.jobs.update_one( {"_id": job_id}, @@ -583,7 +584,7 @@ async def reprocess_job( } } ) - + # Broadcast status update try: from ...services.websocket import connection_manager @@ -595,32 +596,32 @@ async def reprocess_job( ) except Exception as e: logger.warning(f"Failed to broadcast status update for job reset {job_id}: {e}") - + # Trigger ingestion task from ...tasks.ingest_and_ai import ingest_and_ai_task ingest_and_ai_task.delay(job_id) - + logger.warning(f"Admin {current_user.id} triggered reprocessing for job {job_id}") - + return {"message": f"Job {job_id} queued for reprocessing"} @router.get("/audit-logs", response_model=AuditLogResponse) async def get_audit_logs_detailed( # Time range - start_date: Optional[datetime] = Query(None, description="Start date for audit logs"), - end_date: Optional[datetime] = Query(None, description="End date for audit logs"), + start_date: datetime | None = Query(None, description="Start date for audit logs"), + end_date: datetime | None = Query(None, description="End date for audit logs"), # Filters - action: Optional[str] = Query(None, description="Filter by action type"), - severity: Optional[str] = Query(None, description="Filter by severity level"), - user_email: Optional[str] = Query(None, description="Filter by user email"), - resource_type: Optional[str] = Query(None, description="Filter by resource type"), - resource_id: Optional[str] = Query(None, description="Filter by resource ID"), - success: Optional[bool] = Query(None, description="Filter by success status"), + action: str | None = Query(None, description="Filter by action type"), + severity: str | None = Query(None, description="Filter by severity level"), + user_email: str | None = Query(None, description="Filter by user email"), + resource_type: str | None = Query(None, description="Filter by resource type"), + resource_id: str | None = Query(None, description="Filter by resource ID"), + success: bool | None = Query(None, description="Filter by success status"), # Search - search: Optional[str] = Query(None, description="Search in description and details"), + search: str | None = Query(None, description="Search in description and details"), # Pagination (skip/limit to match frontend AuditLogQuery) skip: int = Query(0, ge=0, description="Number of records to skip"), @@ -651,7 +652,7 @@ async def get_audit_logs_detailed( sort_by=sort_by, sort_order=sort_order ) - + return await audit_logger.query_logs(query) @@ -697,7 +698,7 @@ async def get_security_events( request: Request = None, ): """Get recent security events (production/admin only)""" - + # Log access to security events await audit_logger.log_action( action="admin.audit.access", @@ -706,7 +707,7 @@ async def get_security_events( request=request, details={"hours_requested": hours} ) - + logs = await audit_logger.get_security_events(hours) return logs @@ -718,7 +719,7 @@ async def cleanup_audit_logs( request: Request = None, ): """Clean up old audit logs (admin only)""" - + # Log audit cleanup action await audit_logger.log_action( action="admin.system.action", @@ -728,9 +729,9 @@ async def cleanup_audit_logs( details={"retention_days": retention_days}, severity="warning" ) - + deleted_count = await audit_logger.cleanup_old_logs(retention_days) - + # Log cleanup completion await audit_logger.log_action( action="admin.system.action", @@ -742,9 +743,9 @@ async def cleanup_audit_logs( "deleted_count": deleted_count } ) - + return { "message": f"Deleted {deleted_count} audit logs older than {retention_days} days", "deleted_count": deleted_count, "retention_days": retention_days - } \ No newline at end of file + } diff --git a/backend/app/api/v1/routes_clients.py b/backend/app/api/v1/routes_clients.py index ba29f82..6de6c72 100644 --- a/backend/app/api/v1/routes_clients.py +++ b/backend/app/api/v1/routes_clients.py @@ -9,7 +9,7 @@ Access rules: - List projects (read) → Admin, PM, or any team member of the client """ -from datetime import datetime, timezone +from datetime import UTC, datetime from bson import ObjectId from fastapi import APIRouter, Depends, HTTPException @@ -39,7 +39,7 @@ router = APIRouter(prefix="/clients", tags=["clients"]) # --------------------------------------------------------------------------- def _now() -> datetime: - return datetime.now(timezone.utc) + return datetime.now(UTC) async def _get_client_or_404(client_id: str, db: AsyncIOMotorDatabase) -> dict: diff --git a/backend/app/api/v1/routes_files.py b/backend/app/api/v1/routes_files.py index cddde27..072717d 100644 --- a/backend/app/api/v1/routes_files.py +++ b/backend/app/api/v1/routes_files.py @@ -3,11 +3,11 @@ from motor.motor_asyncio import AsyncIOMotorDatabase from ...core.database import get_database from ...core.dependencies import get_current_user +from ...models.audit_log import AuditAction from ...models.user import User from ...schemas.file import SignedUploadRequest, SignedUploadResponse -from ...services.gcs import generate_signed_upload_url from ...services.audit_logger import audit_logger -from ...models.audit_log import AuditAction +from ...services.gcs import generate_signed_upload_url router = APIRouter(prefix="/files", tags=["files"]) @@ -28,11 +28,11 @@ async def get_signed_upload_url( status_code=status.HTTP_400_BAD_REQUEST, detail="Only video files are supported" ) - + # Generate unique blob path from bson import ObjectId blob_path = f"temp/{ObjectId()}/{request.filename}" - + try: # Generate signed upload URL with form fields signed_data = await generate_signed_upload_url( @@ -40,7 +40,7 @@ async def get_signed_upload_url( content_type=request.content_type, max_size=request.max_size or 1024 * 1024 * 1024 # 1GB default ) - + await audit_logger.log_action( action=AuditAction.FILE_UPLOAD, description=f"Signed upload URL generated for {request.filename}", @@ -62,4 +62,4 @@ async def get_signed_upload_url( raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to generate signed upload URL: {str(e)}" - ) \ No newline at end of file + ) diff --git a/backend/app/api/v1/routes_glossaries.py b/backend/app/api/v1/routes_glossaries.py index e61805a..6e8763f 100644 --- a/backend/app/api/v1/routes_glossaries.py +++ b/backend/app/api/v1/routes_glossaries.py @@ -231,10 +231,11 @@ async def reembed_version( raise HTTPException(status_code=404, detail="Version not found") try: - from ...tasks.embed_glossary import embed_glossary_version_task - from bson import ObjectId import motor.motor_asyncio + from bson import ObjectId + from ...core.config import settings + from ...tasks.embed_glossary import embed_glossary_version_task client_db = motor.motor_asyncio.AsyncIOMotorClient(settings.mongodb_uri) db = client_db[settings.mongodb_db] diff --git a/backend/app/api/v1/routes_invitations.py b/backend/app/api/v1/routes_invitations.py index 41c49f5..8c6ef62 100644 --- a/backend/app/api/v1/routes_invitations.py +++ b/backend/app/api/v1/routes_invitations.py @@ -14,16 +14,20 @@ Protected endpoints: import hashlib import re import secrets -from datetime import datetime, timedelta, timezone +from datetime import UTC, datetime, timedelta -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, HTTPException from motor.motor_asyncio import AsyncIOMotorDatabase +from ...core.authz import bump_user_membership_cache from ...core.database import get_database from ...core.dependencies import get_current_user -from ...core.security import create_access_token, create_refresh_token, get_password_hash +from ...core.security import ( + create_access_token, + create_refresh_token, + get_password_hash, +) from ...models.invitation import ( - Invitation, InvitationAcceptRequest, InvitationCreate, InvitationPreviewResponse, @@ -31,7 +35,6 @@ from ...models.invitation import ( ) from ...models.organization import OrgRole from ...models.user import AuthProvider, User, UserRole -from ...core.authz import bump_user_membership_cache from ...services.emailer import email_service from ...services.membership_service import get_membership, upsert_membership @@ -39,7 +42,7 @@ router = APIRouter(tags=["invitations"]) def _now() -> datetime: - return datetime.now(timezone.utc) + return datetime.now(UTC) def _hash_token(plaintext: str) -> str: @@ -54,7 +57,7 @@ def _make_token() -> tuple[str, str]: def _inv_from_doc(doc: dict) -> InvitationResponse: now = _now() - expires_at = doc["expires_at"].replace(tzinfo=timezone.utc) if doc["expires_at"].tzinfo is None else doc["expires_at"] + expires_at = doc["expires_at"].replace(tzinfo=UTC) if doc["expires_at"].tzinfo is None else doc["expires_at"] return InvitationResponse( id=str(doc["_id"]), email=doc["email"], @@ -218,7 +221,7 @@ async def preview_invitation( raise HTTPException(status_code=410, detail="Invitation not found or has expired") now = _now() - expires_at = doc["expires_at"].replace(tzinfo=timezone.utc) if doc["expires_at"].tzinfo is None else doc["expires_at"] + expires_at = doc["expires_at"].replace(tzinfo=UTC) if doc["expires_at"].tzinfo is None else doc["expires_at"] if doc.get("revoked_at"): raise HTTPException(status_code=410, detail="This invitation has been revoked") diff --git a/backend/app/api/v1/routes_jobs.py b/backend/app/api/v1/routes_jobs.py index 2aa7c86..03011af 100644 --- a/backend/app/api/v1/routes_jobs.py +++ b/backend/app/api/v1/routes_jobs.py @@ -1,8 +1,6 @@ import hashlib from datetime import datetime -from ...services.cloud_run_dispatch import dispatch as _cr_dispatch - from bson import ObjectId from fastapi import ( APIRouter, @@ -17,9 +15,9 @@ from fastapi import ( from fastapi.responses import StreamingResponse from motor.motor_asyncio import AsyncIOMotorDatabase +from ...core.authz import MembershipContext, get_job_or_403, get_membership_context from ...core.config import settings from ...core.database import get_database -from ...core.authz import MembershipContext, get_job_or_403, get_membership_context from ...core.dependencies import ( assert_job_in_user_org, get_accessible_project_ids, @@ -72,6 +70,7 @@ from ...schemas.job import ( from ...services import language_qc as lqc from ...services import vtt_versioning from ...services.audit_logger import audit_logger, log_job_action +from ...services.cloud_run_dispatch import dispatch as _cr_dispatch from ...services.gcs import ( create_resumable_upload_session, gcs_service, @@ -2805,7 +2804,6 @@ async def update_tts_preferences( ) # Import tts_synthesis for parsing AD cues - from ...tasks.rerender_accessible_video import rerender_accessible_video_task from ...tasks.tts_synthesis import parse_ad_cues # For each language, get cue count and queue all cues, then trigger re-render diff --git a/backend/app/api/v1/routes_language_qc.py b/backend/app/api/v1/routes_language_qc.py index 1fb7e93..8e94b30 100644 --- a/backend/app/api/v1/routes_language_qc.py +++ b/backend/app/api/v1/routes_language_qc.py @@ -1,7 +1,6 @@ """Per-language QC endpoints — two-stage (linguist + reviewer) assignment, workflow, comments.""" from datetime import datetime -from typing import Optional from fastapi import APIRouter, Depends, Query, Request from motor.motor_asyncio import AsyncIOMotorDatabase @@ -20,39 +19,39 @@ router = APIRouter(tags=["language-qc"]) class AssignRequest(BaseModel): linguist_user_id: str - notes: Optional[str] = None - deadline: Optional[datetime] = None + notes: str | None = None + deadline: datetime | None = None class ReassignRequest(BaseModel): linguist_user_id: str - notes: Optional[str] = None - deadline: Optional[datetime] = None + notes: str | None = None + deadline: datetime | None = None class AssignReviewerRequest(BaseModel): reviewer_user_id: str - notes: Optional[str] = None - deadline: Optional[datetime] = None + notes: str | None = None + deadline: datetime | None = None class ReassignReviewerRequest(BaseModel): reviewer_user_id: str - notes: Optional[str] = None - deadline: Optional[datetime] = None + notes: str | None = None + deadline: datetime | None = None class ApproveLanguageRequest(BaseModel): - notes: Optional[str] = None + notes: str | None = None class RejectLanguageRequest(BaseModel): notes: str - category: Optional[str] = None # timing | mistranslation | terminology | profanity | length | other + category: str | None = None # timing | mistranslation | terminology | profanity | length | other class ReopenLanguageRequest(BaseModel): - notes: Optional[str] = None + notes: str | None = None class AddCommentRequest(BaseModel): @@ -75,8 +74,8 @@ class QueueItem(BaseModel): job_status: str lang: str lang_qc_status: str - assigned_at: Optional[str] = None - reviewed_at: Optional[str] = None + assigned_at: str | None = None + reviewed_at: str | None = None class QueueResponse(BaseModel): @@ -86,10 +85,10 @@ class QueueResponse(BaseModel): class BulkAssignRequest(BaseModel): linguist_user_id: str - reviewer_user_id: Optional[str] = None - languages: Optional[list[str]] = None # None = all available languages + reviewer_user_id: str | None = None + languages: list[str] | None = None # None = all available languages only_unassigned: bool = False # skip languages that already have an assignment - deadline: Optional[datetime] = None + deadline: datetime | None = None class BulkAssignResponse(BaseModel): @@ -337,7 +336,7 @@ async def reject_language( class MarkCueReviewedRequest(BaseModel): - total_cues: Optional[int] = None # client sends on first call to set total + total_cues: int | None = None # client sends on first call to set total @router.post("/jobs/{job_id}/languages/{lang}/mark-cue-reviewed", response_model=LanguageQCStateResponse) @@ -428,7 +427,7 @@ async def list_comments( @router.get("/me/language-qc-queue", response_model=QueueResponse) async def my_language_qc_queue( role: str = Query("linguist", description="'linguist' or 'reviewer'"), - qc_status: Optional[str] = Query(None, description="Filter by status"), + qc_status: str | None = Query(None, description="Filter by status"), skip: int = Query(0, ge=0), limit: int = Query(50, ge=1, le=200), current_user: User = Depends(require_roles( diff --git a/backend/app/api/v1/routes_organizations.py b/backend/app/api/v1/routes_organizations.py index 804d7fa..f9dae17 100644 --- a/backend/app/api/v1/routes_organizations.py +++ b/backend/app/api/v1/routes_organizations.py @@ -12,19 +12,23 @@ underlying MongoDB collections used by routes_clients.py so both endpoints coexist without data duplication. """ -from datetime import datetime, timezone +from datetime import UTC, datetime -from bson import ObjectId from fastapi import APIRouter, Depends, HTTPException from motor.motor_asyncio import AsyncIOMotorDatabase from pydantic import BaseModel +from ...core.authz import bump_user_membership_cache from ...core.database import get_database from ...core.dependencies import get_current_user, require_roles from ...models.membership import MemberDetail, MembershipCreate, MembershipUpdate -from ...models.organization import OrgRole, Organization, OrganizationCreate, OrganizationUpdate +from ...models.organization import ( + Organization, + OrganizationCreate, + OrganizationUpdate, + OrgRole, +) from ...models.user import User, UserRole -from ...core.authz import bump_user_membership_cache from ...services.membership_service import ( get_membership, get_memberships_for_user, @@ -39,7 +43,7 @@ ADMIN_ROLES = [UserRole.ADMIN] def _now() -> datetime: - return datetime.now(timezone.utc) + return datetime.now(UTC) # --------------------------------------------------------------------------- diff --git a/backend/app/api/v1/routes_review_notes.py b/backend/app/api/v1/routes_review_notes.py index 9618f77..cadf785 100644 --- a/backend/app/api/v1/routes_review_notes.py +++ b/backend/app/api/v1/routes_review_notes.py @@ -1,14 +1,13 @@ """API routes for review notes - timestamped notes on video assets during review.""" from datetime import datetime -from typing import Optional from bson import ObjectId from fastapi import APIRouter, Depends, HTTPException, Query, status from motor.motor_asyncio import AsyncIOMotorDatabase from ...core.database import get_database -from ...core.dependencies import get_current_user, require_roles +from ...core.dependencies import require_roles from ...core.logging import get_logger from ...models.user import User, UserRole from ...schemas.review_note import ( @@ -25,7 +24,7 @@ router = APIRouter(prefix="/jobs/{job_id}/review-notes", tags=["review-notes"]) @router.get("", response_model=ReviewNotesListResponse) async def list_review_notes( job_id: str, - asset_key: Optional[str] = Query(None, description="Filter notes by asset key"), + asset_key: str | None = Query(None, description="Filter notes by asset key"), current_user: User = Depends(require_roles(UserRole.REVIEWER, UserRole.LINGUIST, UserRole.PRODUCTION, UserRole.ADMIN)), db: AsyncIOMotorDatabase = Depends(get_database), ): diff --git a/backend/app/api/v1/routes_tts.py b/backend/app/api/v1/routes_tts.py index f5a619a..a6e1d79 100644 --- a/backend/app/api/v1/routes_tts.py +++ b/backend/app/api/v1/routes_tts.py @@ -1,18 +1,18 @@ import asyncio import time -from typing import Literal, Optional +from typing import Literal from fastapi import APIRouter, Depends, HTTPException, Query from fastapi.responses import Response from pydantic import BaseModel, Field from ...core.config import settings -from ...core.logging import get_logger -from ...services.gemini_tts import gemini_tts_service -from ...services.elevenlabs_voices import elevenlabs_voice_service -from ...services.tts import tts_service -from ...services import cost_tracker from ...core.dependencies import get_current_user +from ...core.logging import get_logger +from ...services import cost_tracker +from ...services.elevenlabs_voices import elevenlabs_voice_service +from ...services.gemini_tts import gemini_tts_service +from ...services.tts import tts_service logger = get_logger(__name__) @@ -30,20 +30,20 @@ class VoicePreviewRequest(BaseModel): style_preset: Literal[ "neutral", "calm", "energetic", "professional", "warm", "documentary", "custom" ] = "neutral" - custom_style_prompt: Optional[str] = None + custom_style_prompt: str | None = None # ElevenLabs-specific - stability: Optional[float] = Field(default=None, ge=0.0, le=1.0) - similarity_boost: Optional[float] = Field(default=None, ge=0.0, le=1.0) + stability: float | None = Field(default=None, ge=0.0, le=1.0) + similarity_boost: float | None = Field(default=None, ge=0.0, le=1.0) class VoiceInfo(BaseModel): """Structured voice information for any provider.""" id: str name: str - description: Optional[str] = None - preview_url: Optional[str] = None - labels: Optional[dict[str, str]] = None - category: Optional[str] = None + description: str | None = None + preview_url: str | None = None + labels: dict[str, str] | None = None + category: str | None = None class ProviderVoicesResponse(BaseModel): @@ -52,7 +52,7 @@ class ProviderVoicesResponse(BaseModel): voices: list[VoiceInfo] default: str available: bool = True - error: Optional[str] = None + error: str | None = None class LanguagesResponse(BaseModel): @@ -87,12 +87,12 @@ class ProviderOptionsResponse(BaseModel): """Available TTS configuration options for a provider.""" provider: str # Gemini-specific - models: Optional[list[TTSOptionItem]] = None - style_presets: Optional[list[TTSOptionItem]] = None - speed_range: Optional[SpeedRange] = None + models: list[TTSOptionItem] | None = None + style_presets: list[TTSOptionItem] | None = None + speed_range: SpeedRange | None = None # ElevenLabs-specific - stability_range: Optional[FloatRange] = None - similarity_boost_range: Optional[FloatRange] = None + stability_range: FloatRange | None = None + similarity_boost_range: FloatRange | None = None @router.get("/voices", response_model=ProviderVoicesResponse) diff --git a/backend/app/api/v1/routes_vtt_versions.py b/backend/app/api/v1/routes_vtt_versions.py index 90ce300..5c31871 100644 --- a/backend/app/api/v1/routes_vtt_versions.py +++ b/backend/app/api/v1/routes_vtt_versions.py @@ -3,15 +3,20 @@ from fastapi import APIRouter, Depends, HTTPException, Query, Request, status from motor.motor_asyncio import AsyncIOMotorDatabase +from ...core.config import settings from ...core.database import get_database from ...core.dependencies import require_roles +from ...models.audit_log import AuditAction from ...models.user import User, UserRole -from ...models.vtt_version import VttDiffResponse, VttKind, VttVersionListResponse, VttVersionSummary +from ...models.vtt_version import ( + VttDiffResponse, + VttKind, + VttVersionListResponse, + VttVersionSummary, +) from ...services import vtt_versioning from ...services.audit_logger import audit_logger -from ...models.audit_log import AuditAction from ...services.gcs import gcs_service -from ...core.config import settings router = APIRouter(prefix="/jobs", tags=["vtt-versions"]) diff --git a/backend/app/api/v1/routes_websockets.py b/backend/app/api/v1/routes_websockets.py index b1e3180..ea36e04 100644 --- a/backend/app/api/v1/routes_websockets.py +++ b/backend/app/api/v1/routes_websockets.py @@ -6,20 +6,23 @@ Provides WebSocket endpoints for: 2. Job list updates: /ws/jobs (all jobs for authenticated user) """ import logging -from typing import Optional -from fastapi import APIRouter, WebSocket, WebSocketDisconnect, HTTPException, Depends, Query +from fastapi import ( + APIRouter, + Depends, + Query, + WebSocket, + WebSocketDisconnect, +) from fastapi.security import HTTPBearer -from ...services.websocket import ( - connection_manager, - authenticate_websocket, - get_connection_manager, - ConnectionManager -) -from ...models.job import Job from ...core.database import get_database -from ...core.dependencies import get_current_user +from ...services.websocket import ( + ConnectionManager, + authenticate_websocket, + connection_manager, + get_connection_manager, +) logger = logging.getLogger(__name__) @@ -31,7 +34,7 @@ security = HTTPBearer() async def websocket_job_status( websocket: WebSocket, job_id: str, - token: Optional[str] = Query(None), + token: str | None = Query(None), manager: ConnectionManager = Depends(get_connection_manager) ): """ @@ -57,17 +60,17 @@ async def websocket_job_status( user_id = await authenticate_websocket(websocket, token) if not user_id: return - + try: # Verify user has access to this job db = await get_database() jobs_collection = db["jobs"] - + job = await jobs_collection.find_one({"_id": job_id}) if not job: await websocket.close(code=4004, reason="Job not found") return - + # Check permissions - users can only access their own jobs unless they're admin/reviewer user = await db["users"].find_one({"_id": user_id}) if not user: @@ -76,36 +79,36 @@ async def websocket_job_status( user = await db["users"].find_one({"_id": ObjectId(user_id)}) except Exception: pass # Invalid ObjectId format - + if not user: await websocket.close(code=4001, reason="User not found") return - + # Check access permissions if user["role"] == "client" and job.get("created_by") != user_id: await websocket.close(code=4003, reason="Access denied") return - + # Connect to job status updates await manager.connect_job_status(websocket, user_id, job_id) - + # Keep connection alive and handle incoming messages while True: try: # Wait for incoming WebSocket messages (for heartbeat, etc.) message = await websocket.receive_text() logger.debug(f"Received WebSocket message from user {user_id}: {message}") - + # Handle heartbeat or other client messages if needed if message == "ping": await websocket.send_text("pong") - + except WebSocketDisconnect: break except Exception as e: logger.error(f"Error in WebSocket message handling: {e}") break - + except WebSocketDisconnect: pass except Exception as e: @@ -117,7 +120,7 @@ async def websocket_job_status( @router.websocket("/ws/jobs") async def websocket_job_list( websocket: WebSocket, - token: Optional[str] = Query(None), + token: str | None = Query(None), manager: ConnectionManager = Depends(get_connection_manager) ): """ @@ -143,12 +146,12 @@ async def websocket_job_list( user_id = await authenticate_websocket(websocket, token) if not user_id: return - + try: # Verify user exists logger.info(f"WebSocket: Looking up user {user_id} in database") db = await get_database() - + # Try looking up user by string ID first, then by ObjectId user = await db["users"].find_one({"_id": user_id}) if not user: @@ -157,35 +160,35 @@ async def websocket_job_list( user = await db["users"].find_one({"_id": ObjectId(user_id)}) except Exception: pass # Invalid ObjectId format - + if not user: logger.warning(f"WebSocket: User {user_id} not found in database (tried both string and ObjectId)") await websocket.close(code=4001, reason="User not found") return - + logger.info(f"WebSocket: User {user_id} found, role: {user.get('role', 'unknown')}") - + logger.info(f"WebSocket: User {user_id} found, connecting to job list updates") # Connect to job list updates await manager.connect_job_list(websocket, user_id) - + # Keep connection alive and handle incoming messages while True: try: # Wait for incoming WebSocket messages message = await websocket.receive_text() logger.debug(f"Received WebSocket message from user {user_id}: {message}") - + # Handle heartbeat or other client messages if needed if message == "ping": await websocket.send_text("pong") - + except WebSocketDisconnect: break except Exception as e: logger.error(f"Error in WebSocket message handling: {e}") break - + except WebSocketDisconnect: pass except Exception as e: @@ -206,9 +209,9 @@ async def websocket_status(): "global_subscriptions": len(connection_manager.global_subscriptions), "redis_connected": connection_manager.redis_client is not None, "subscriber_running": ( - connection_manager.subscriber_task is not None and + connection_manager.subscriber_task is not None and not connection_manager.subscriber_task.done() ) } - - return stats \ No newline at end of file + + return stats diff --git a/backend/app/core/authz.py b/backend/app/core/authz.py index bd1e085..bb40214 100644 --- a/backend/app/core/authz.py +++ b/backend/app/core/authz.py @@ -11,7 +11,6 @@ Provides: import json from dataclasses import dataclass -from typing import Optional from fastapi import Depends, HTTPException, status from motor.motor_asyncio import AsyncIOMotorDatabase @@ -159,7 +158,7 @@ class OrgScopedQuery: def filter( self, base_query: dict, - org_id: Optional[str] = None, + org_id: str | None = None, org_field: str = "organization_id", ) -> dict: if self.ctx.is_platform_admin: diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 5eeb7ef..b74e922 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -22,7 +22,7 @@ class Settings(BaseSettings): # Redis redis_url: str - + # Celery celery_broker_url: str = "" celery_result_backend: str = "" @@ -36,7 +36,7 @@ class Settings(BaseSettings): gemini_api_key: str elevenlabs_api_key: str = "" google_tts_credentials: str = "" - + # TTS Voice Configuration tts_provider: str = "gemini" # "gemini", "google", or "elevenlabs" google_tts_voices: dict[str, str] = { @@ -265,11 +265,10 @@ class Settings(BaseSettings): ffmpeg_worker_concurrency: int = 4 # FFmpeg tasks on main worker tts_worker_concurrency: int = 8 # TTS worker - # Email (Mailgun — primary; sendgrid_api_key kept for backward compat) + # Email (Mailgun) mailgun_api_key: str = "" mailgun_domain: str = "mg.oliver.solutions" mailgun_from: str = "noreply@mg.oliver.solutions" - sendgrid_api_key: str = "" email_from: str = "noreply@mg.oliver.solutions" client_base_url: str diff --git a/backend/app/core/database.py b/backend/app/core/database.py index 9243e4b..3fc3a14 100644 --- a/backend/app/core/database.py +++ b/backend/app/core/database.py @@ -56,7 +56,7 @@ async def create_indexes(): await db.audit_logs.create_index([("resource_type", 1), ("resource_id", 1)]) # Resource tracking await db.audit_logs.create_index([("ip_address", 1), ("timestamp", -1)]) # IP-based analysis await db.audit_logs.create_index([("success", 1), ("timestamp", -1)]) # Failed operations - + # Text search index for description and details await db.audit_logs.create_index([ ("description", "text"), diff --git a/backend/app/core/dependencies.py b/backend/app/core/dependencies.py index 4a5b0f9..1db95c2 100644 --- a/backend/app/core/dependencies.py +++ b/backend/app/core/dependencies.py @@ -1,11 +1,9 @@ -from typing import Optional from fastapi import Depends, HTTPException, Request, status from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer from motor.motor_asyncio import AsyncIOMotorDatabase from ..models.user import User, UserRole -from .config import settings from .database import get_database from .security import decode_token @@ -78,7 +76,7 @@ def require_roles(*required_roles: UserRole): async def get_current_user_optional( request: Request, db: AsyncIOMotorDatabase = Depends(get_database), -) -> Optional[User]: +) -> User | None: authorization: str = request.headers.get("Authorization") if not authorization: return None @@ -109,7 +107,7 @@ async def get_current_user_optional( async def get_accessible_project_ids( user: User, db: AsyncIOMotorDatabase, -) -> Optional[list[str]]: +) -> list[str] | None: """ Returns project IDs the user may access, or None meaning "see everything". @@ -126,7 +124,9 @@ async def get_accessible_project_ids( user_id = str(user.id) # Primary path: use Redis-cached memberships (60s TTL, same cache as authz.py) - from .authz import _cached_memberships # local import to avoid circular dep at module level + from .authz import ( + _cached_memberships, # local import to avoid circular dep at module level + ) memberships_map = await _cached_memberships(user_id, db) org_ids = list(memberships_map.keys()) diff --git a/backend/app/core/secrets_config.py b/backend/app/core/secrets_config.py index d05153e..c7a976a 100644 --- a/backend/app/core/secrets_config.py +++ b/backend/app/core/secrets_config.py @@ -1,10 +1,6 @@ """Enhanced configuration system with Secret Manager integration.""" -import os -import asyncio -from typing import Dict, Optional, Any from functools import lru_cache -from pydantic_settings import BaseSettings from .config import Settings as BaseConfig from .logging import get_logger @@ -14,41 +10,40 @@ logger = get_logger(__name__) class SecretsConfig(BaseConfig): """Enhanced configuration that loads secrets from GCP Secret Manager.""" - + def __init__(self, **kwargs): # Initialize with base configuration first super().__init__(**kwargs) - + # Flag to track if secrets have been loaded self._secrets_loaded = False - self._secret_values: Dict[str, str] = {} - + self._secret_values: dict[str, str] = {} + async def load_secrets(self) -> None: """Load secrets from Secret Manager asynchronously.""" if self._secrets_loaded: return - + try: # Only import here to avoid circular imports from app.services.secrets_manager import secrets_manager - + # Define which config fields should be loaded from secrets secret_mappings = { # Config field -> Secret Manager name "jwt_secret": "jwt-secret", - "jwt_refresh_secret": "jwt-refresh-secret", + "jwt_refresh_secret": "jwt-refresh-secret", "mongodb_uri": "mongodb-url", "redis_url": "redis-url", "gemini_api_key": "gemini-api-key", - "sendgrid_api_key": "sendgrid-api-key", "elevenlabs_api_key": "elevenlabs-api-key", "sentry_dsn": "sentry-dsn" } - + # Get all secrets in batch secret_names = list(secret_mappings.values()) retrieved_secrets = await secrets_manager.get_secrets_batch(secret_names) - + # Map secrets back to config fields for config_field, secret_name in secret_mappings.items(): if secret_name in retrieved_secrets: @@ -58,50 +53,50 @@ class SecretsConfig(BaseConfig): logger.debug(f"Loaded secret for {config_field}") else: logger.warning(f"Secret {secret_name} not available, using environment/default") - + self._secrets_loaded = True logger.info(f"Successfully loaded {len(retrieved_secrets)} secrets from Secret Manager") - + except Exception as e: logger.warning(f"Failed to load secrets from Secret Manager: {e}") logger.warning("Falling back to environment variables") self._secrets_loaded = True # Mark as loaded to prevent retries - - def get_secret_value(self, field_name: str) -> Optional[str]: + + def get_secret_value(self, field_name: str) -> str | None: """Get a secret value if it was loaded from Secret Manager.""" return self._secret_values.get(field_name) - + async def refresh_secrets(self) -> None: """Force refresh secrets from Secret Manager.""" self._secrets_loaded = False self._secret_values.clear() - + # Clear the secrets manager cache from app.services.secrets_manager import secrets_manager secrets_manager.clear_cache() - + await self.load_secrets() - + @property def is_production(self) -> bool: """Check if running in production environment.""" return self.app_env == "prod" - + @property def is_development(self) -> bool: """Check if running in development environment.""" return self.app_env == "dev" - + @property def google_cloud_project(self) -> str: """Get Google Cloud Project ID.""" return self.gcp_project_id - + @property def jwt_refresh_secret(self) -> str: """Get JWT refresh secret (fallback to main secret if not set).""" return getattr(self, '_jwt_refresh_secret', self.jwt_secret) - + @jwt_refresh_secret.setter def jwt_refresh_secret(self, value: str) -> None: """Set JWT refresh secret.""" @@ -109,37 +104,37 @@ class SecretsConfig(BaseConfig): # Global configuration instance -_config_instance: Optional[SecretsConfig] = None +_config_instance: SecretsConfig | None = None async def initialize_config() -> SecretsConfig: """Initialize configuration with secrets loading.""" global _config_instance - + if _config_instance is None: _config_instance = SecretsConfig() await _config_instance.load_secrets() - + return _config_instance def get_settings() -> SecretsConfig: """Get settings instance (synchronous).""" global _config_instance - + if _config_instance is None: # Initialize without secrets for backwards compatibility _config_instance = SecretsConfig() logger.warning("Settings accessed before async initialization - secrets not loaded") - + return _config_instance -@lru_cache() +@lru_cache def get_settings_cached() -> SecretsConfig: """Get cached settings instance.""" return get_settings() # Backwards compatibility -settings = get_settings() \ No newline at end of file +settings = get_settings() diff --git a/backend/app/main.py b/backend/app/main.py index 3977d2e..63c950b 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -12,8 +12,8 @@ from sentry_sdk.integrations.redis import RedisIntegration from .api.v1.routes_admin import router as admin_router from .api.v1.routes_admin_production import router as admin_production_router -from .api.v1.routes_briefs import router as briefs_router from .api.v1.routes_auth import router as auth_router +from .api.v1.routes_briefs import router as briefs_router from .api.v1.routes_clients import router as clients_router from .api.v1.routes_files import router as files_router from .api.v1.routes_glossaries import router as glossaries_router @@ -23,9 +23,9 @@ from .api.v1.routes_jobs import router as jobs_router from .api.v1.routes_language_qc import router as language_qc_router from .api.v1.routes_organizations import router as organizations_router from .api.v1.routes_review_notes import router as review_notes_router +from .api.v1.routes_share import router as share_router from .api.v1.routes_tts import router as tts_router from .api.v1.routes_vtt_versions import router as vtt_versions_router -from .api.v1.routes_share import router as share_router from .api.v1.routes_websockets import router as websockets_router from .core.config import settings from .core.database import ( @@ -156,6 +156,7 @@ async def cors_error_handler(request, call_next): response = await call_next(request) except Exception as e: import traceback + from .core.logging import get_logger as _get_logger _get_logger(__name__).exception("🚨 CORS middleware caught: %s\n%s", e, traceback.format_exc()) diff --git a/backend/app/middleware/__init__.py b/backend/app/middleware/__init__.py index 3c361b0..3770d9a 100644 --- a/backend/app/middleware/__init__.py +++ b/backend/app/middleware/__init__.py @@ -1,12 +1,16 @@ """Middleware package for FastAPI application.""" -from .rate_limiting import RateLimitMiddleware, IPWhitelist, create_rate_limit_middleware +from .rate_limiting import ( + IPWhitelist, + RateLimitMiddleware, + create_rate_limit_middleware, +) from .validation import ValidationMiddleware, create_validation_middleware __all__ = [ "RateLimitMiddleware", - "IPWhitelist", + "IPWhitelist", "create_rate_limit_middleware", "ValidationMiddleware", "create_validation_middleware" -] \ No newline at end of file +] diff --git a/backend/app/middleware/rate_limiting.py b/backend/app/middleware/rate_limiting.py index af26447..fde3921 100644 --- a/backend/app/middleware/rate_limiting.py +++ b/backend/app/middleware/rate_limiting.py @@ -1,14 +1,10 @@ """Rate limiting middleware for API endpoints.""" import time -from collections import defaultdict -from typing import Dict, Optional, Tuple + import redis.asyncio as aioredis -from fastapi import HTTPException, Request, status +from fastapi import Request, status from fastapi.responses import JSONResponse -import json -import asyncio -from datetime import datetime, timedelta from app.core.config import get_settings from app.telemetry.metrics import track_rate_limit_metrics @@ -16,17 +12,17 @@ from app.telemetry.metrics import track_rate_limit_metrics class RateLimiter: """Redis-based rate limiter with sliding window algorithm.""" - + def __init__(self, redis_client: aioredis.Redis): self.redis = redis_client - + async def is_allowed( - self, - key: str, - limit: int, + self, + key: str, + limit: int, window_seconds: int, identifier: str = "" - ) -> Tuple[bool, Dict[str, int]]: + ) -> tuple[bool, dict[str, int]]: """ Check if request is allowed under rate limit. @@ -35,31 +31,31 @@ class RateLimiter: """ now = time.time() pipeline = self.redis.pipeline() - + # Remove expired entries pipeline.zremrangebyscore(key, 0, now - window_seconds) - + # Count current requests in window pipeline.zcard(key) - + # Add current request pipeline.zadd(key, {str(now): now}) - + # Set expiry pipeline.expire(key, window_seconds) - + results = await pipeline.execute() current_requests = results[1] - + rate_limit_info = { "limit": limit, "remaining": max(0, limit - current_requests), "reset_time": int(now + window_seconds), "retry_after": window_seconds if current_requests >= limit else 0 } - + is_allowed = current_requests <= limit - + # Track metrics track_rate_limit_metrics( identifier=identifier, @@ -67,17 +63,17 @@ class RateLimiter: current_requests=current_requests, limit=limit ) - + return is_allowed, rate_limit_info class RateLimitMiddleware: """FastAPI middleware for rate limiting.""" - + def __init__(self, redis_client: aioredis.Redis): self.limiter = RateLimiter(redis_client) self.settings = get_settings() - + # Rate limit configurations by endpoint pattern self.rate_limits = { # Authentication endpoints @@ -85,32 +81,32 @@ class RateLimitMiddleware: "POST:/api/v1/auth/register": (3, 3600), # 3 requests per hour "POST:/api/v1/auth/refresh": (10, 300), # 10 requests per 5 minutes "POST:/api/v1/auth/forgot-password": (3, 3600), # 3 requests per hour - + # File upload endpoints "POST:/api/v1/files/upload": (10, 3600), # 10 uploads per hour "POST:/api/v1/jobs": (20, 3600), # 20 job creations per hour - + # Job management endpoints "GET:/api/v1/jobs": (100, 300), # 100 requests per 5 minutes "PATCH:/api/v1/jobs/*/approve": (50, 3600), # 50 approvals per hour "PATCH:/api/v1/jobs/*/reject": (50, 3600), # 50 rejections per hour - + # VTT editing endpoints "PATCH:/api/v1/jobs/*/vtt": (100, 3600), # 100 VTT edits per hour - + # Admin endpoints (more restrictive) "GET:/api/v1/admin/*": (50, 300), # 50 requests per 5 minutes "POST:/api/v1/admin/*": (20, 3600), # 20 admin actions per hour "PATCH:/api/v1/admin/*": (20, 3600), # 20 admin updates per hour "DELETE:/api/v1/admin/*": (10, 3600), # 10 admin deletions per hour } - + # Default rate limits self.default_limits = { "authenticated": (1000, 3600), # 1000 requests per hour for authenticated users "anonymous": (100, 3600), # 100 requests per hour for anonymous users } - + def _get_client_identifier(self, request: Request) -> str: """Get client identifier for rate limiting.""" user = getattr(request.state, 'user', None) @@ -128,53 +124,53 @@ class RateLimitMiddleware: client_ip = request.client.host if request.client else "unknown" return f"ip:{client_ip}" - + def _get_endpoint_key(self, request: Request) -> str: """Get endpoint pattern for rate limiting.""" method = request.method path = request.url.path - + # Replace job IDs with wildcard for pattern matching import re path = re.sub(r'/jobs/[a-f0-9-]+/', '/jobs/*/', path) path = re.sub(r'/admin/users/[a-f0-9-]+', '/admin/users/*', path) - + return f"{method}:{path}" - - def _get_rate_limit(self, request: Request) -> Tuple[int, int]: + + def _get_rate_limit(self, request: Request) -> tuple[int, int]: """Get rate limit for the current request.""" endpoint_key = self._get_endpoint_key(request) - + # Check for specific endpoint limits if endpoint_key in self.rate_limits: return self.rate_limits[endpoint_key] - + # Check for wildcard matches for pattern, limits in self.rate_limits.items(): if pattern.endswith("*") and endpoint_key.startswith(pattern[:-1]): return limits - + # Use default limits based on authentication user = getattr(request.state, 'user', None) if user: return self.default_limits["authenticated"] else: return self.default_limits["anonymous"] - + async def __call__(self, request: Request, call_next): """Process rate limiting for the request.""" - + # Skip rate limiting for health checks and metrics only if request.url.path in ["/health", "/metrics"]: return await call_next(request) - + client_id = self._get_client_identifier(request) endpoint_key = self._get_endpoint_key(request) limit, window = self._get_rate_limit(request) - + # Create rate limit key rate_limit_key = f"rate_limit:{client_id}:{endpoint_key}" - + try: is_allowed, rate_info = await self.limiter.is_allowed( key=rate_limit_key, @@ -182,7 +178,7 @@ class RateLimitMiddleware: window_seconds=window, identifier=client_id ) - + if not is_allowed: # Return rate limit exceeded response return JSONResponse( @@ -199,17 +195,17 @@ class RateLimitMiddleware: "Retry-After": str(rate_info["retry_after"]) } ) - + # Process the request response = await call_next(request) - + # Add rate limit headers to response response.headers["X-RateLimit-Limit"] = str(rate_info["limit"]) response.headers["X-RateLimit-Remaining"] = str(rate_info["remaining"]) response.headers["X-RateLimit-Reset"] = str(rate_info["reset_time"]) - + return response - + except Exception as e: # Log error but don't block request if rate limiting fails print(f"Rate limiting error: {e}") @@ -218,30 +214,30 @@ class RateLimitMiddleware: class IPWhitelist: """IP whitelist for bypassing rate limits.""" - + def __init__(self, redis_client: aioredis.Redis): self.redis = redis_client self.whitelist_key = "ip_whitelist" - + # Default whitelisted IPs (health checks, monitoring) self.default_whitelist = { "127.0.0.1", "::1", "169.254.169.254", # GCP metadata server } - + async def is_whitelisted(self, ip: str) -> bool: """Check if IP is whitelisted.""" if ip in self.default_whitelist: return True - + try: is_member = await self.redis.sismember(self.whitelist_key, ip) return bool(is_member) except Exception: return False - - async def add_ip(self, ip: str, ttl_seconds: Optional[int] = None) -> bool: + + async def add_ip(self, ip: str, ttl_seconds: int | None = None) -> bool: """Add IP to whitelist.""" try: await self.redis.sadd(self.whitelist_key, ip) @@ -252,7 +248,7 @@ class IPWhitelist: return True except Exception: return False - + async def remove_ip(self, ip: str) -> bool: """Remove IP from whitelist.""" try: @@ -264,4 +260,4 @@ class IPWhitelist: async def create_rate_limit_middleware(redis_client: aioredis.Redis) -> RateLimitMiddleware: """Factory function to create rate limit middleware.""" - return RateLimitMiddleware(redis_client) \ No newline at end of file + return RateLimitMiddleware(redis_client) diff --git a/backend/app/middleware/validation.py b/backend/app/middleware/validation.py index 6a5bed9..2b8d6cd 100644 --- a/backend/app/middleware/validation.py +++ b/backend/app/middleware/validation.py @@ -3,16 +3,17 @@ import json import re import time -from ..core.config import settings -from typing import Any, Dict, List, Optional, Set -from fastapi import HTTPException, Request, status -from fastapi.responses import JSONResponse -from pydantic import BaseModel, ValidationError as PydanticValidationError -import magic +from typing import Any from urllib.parse import unquote +import magic +from fastapi import Request, status +from fastapi.responses import JSONResponse + from app.telemetry.metrics import track_validation_metrics +from ..core.config import settings + class ValidationError(Exception): """Custom validation error.""" @@ -26,20 +27,20 @@ class SecurityValidationError(Exception): class RequestValidator: """Enhanced request validation with security checks.""" - + def __init__(self): # File type restrictions self.allowed_video_types = { "video/mp4", - "video/quicktime", + "video/quicktime", "video/x-msvideo" # AVI } - + self.allowed_subtitle_types = { "text/vtt", "text/plain" } - + # Security patterns to block self.malicious_patterns = [ # SQL injection patterns @@ -49,70 +50,70 @@ class RequestValidator: r"<\s*script[^>]*>", r"javascript:", r"data:.*base64", - + # Path traversal r"\.\./", r"\.\.\\", r"%2e%2e%2f", r"%2e%2e\\", - + # Command injection (removed $ to allow MongoDB operators in controlled contexts) r"[;&|`](?!\s*$)", # Allow $ but not as command separator r"(rm|wget|curl|nc|bash|sh|cmd|powershell)\s+", - + # MongoDB injection — NoSQL operator abuse r"\$where|\$expr|\$function|\$accumulator" r"|\$ne|\$nin|\$not" r"|\$gt|\$gte|\$lt|\$lte" r"|\$regex|\$jsonSchema|\$mod", ] - + self.compiled_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in self.malicious_patterns] - + # Max file sizes (in bytes) — driven by central config (T-14) self.max_video_size = settings.upload_max_video_bytes self.max_subtitle_size = 10 * 1024 * 1024 # 10MB - + # Request size limits self.max_json_size = 1024 * 1024 # 1MB self.max_form_fields = 50 - + def validate_string_content(self, content: str, field_name: str = "input") -> None: """Validate string content for malicious patterns.""" if not isinstance(content, str): return - + for pattern in self.compiled_patterns: if pattern.search(content): raise SecurityValidationError( f"Potentially malicious content detected in {field_name}" ) - + def validate_filename(self, filename: str) -> str: """Validate and sanitize filename.""" if not filename: raise ValidationError("Filename cannot be empty") - + # Decode URL encoding filename = unquote(filename) - + # Check for malicious patterns self.validate_string_content(filename, "filename") - + # Remove dangerous characters safe_filename = re.sub(r'[^\w\-_\.]', '_', filename) - + # Prevent hidden files if safe_filename.startswith('.'): safe_filename = 'file_' + safe_filename[1:] - + # Limit length if len(safe_filename) > 255: name, ext = safe_filename.rsplit('.', 1) if '.' in safe_filename else (safe_filename, '') safe_filename = name[:250] + ('.' + ext if ext else '') - + return safe_filename - + def validate_file_type(self, content: bytes, expected_type: str, filename: str) -> None: """Validate file type using magic numbers.""" try: @@ -122,13 +123,13 @@ class RequestValidator: ext = filename.lower().split('.')[-1] if '.' in filename else '' video_extensions = {'mp4', 'mov', 'avi', 'mkv'} subtitle_extensions = {'vtt', 'srt', 'txt'} - + if expected_type == "video" and ext not in video_extensions: raise ValidationError(f"Invalid video file extension: {ext}") elif expected_type == "subtitle" and ext not in subtitle_extensions: raise ValidationError(f"Invalid subtitle file extension: {ext}") return - + if expected_type == "video" and detected_type not in self.allowed_video_types: raise ValidationError( f"Invalid video file type: {detected_type}. " @@ -139,7 +140,7 @@ class RequestValidator: f"Invalid subtitle file type: {detected_type}. " f"Allowed types: {', '.join(self.allowed_subtitle_types)}" ) - + def validate_file_size(self, size: int, file_type: str) -> None: """Validate file size limits.""" if file_type == "video" and size > self.max_video_size: @@ -152,16 +153,16 @@ class RequestValidator: f"Subtitle file too large: {size} bytes. " f"Maximum allowed: {self.max_subtitle_size} bytes" ) - - async def validate_json_payload(self, request: Request) -> Optional[Dict[str, Any]]: + + async def validate_json_payload(self, request: Request) -> dict[str, Any] | None: """Validate JSON request payload.""" if not request.headers.get("content-type", "").startswith("application/json"): return None - + content_length = request.headers.get("content-length") if content_length and int(content_length) > self.max_json_size: raise ValidationError(f"JSON payload too large: {content_length} bytes") - + try: # Check if body has already been read if hasattr(request, '_cached_body'): @@ -170,62 +171,62 @@ class RequestValidator: body = await request.body() # Cache the body so FastAPI can read it later request._cached_body = body - + if len(body) > self.max_json_size: raise ValidationError(f"JSON payload too large: {len(body)} bytes") - + if not body: return {} - + payload = json.loads(body) - + # Recursively validate all string values self._validate_json_values(payload) - + return payload - + except json.JSONDecodeError as e: raise ValidationError(f"Invalid JSON: {e}") - + def _validate_json_values(self, obj: Any, path: str = "root") -> None: """Recursively validate JSON values.""" if isinstance(obj, dict): if len(obj) > self.max_form_fields: raise ValidationError(f"Too many fields in object at {path}") - + for key, value in obj.items(): self.validate_string_content(key, f"{path}.key") self._validate_json_values(value, f"{path}.{key}") - + elif isinstance(obj, list): if len(obj) > 1000: # Prevent large arrays raise ValidationError(f"Array too large at {path}") - + for i, item in enumerate(obj): self._validate_json_values(item, f"{path}[{i}]") - + elif isinstance(obj, str): self.validate_string_content(obj, path) - + def validate_query_params(self, request: Request) -> None: """Validate query parameters.""" for key, value in request.query_params.items(): self.validate_string_content(key, f"query.{key}") self.validate_string_content(str(value), f"query.{key}") - + def validate_headers(self, request: Request) -> None: """Validate request headers.""" suspicious_headers = { "x-forwarded-host", - "x-original-host", + "x-original-host", "x-rewrite-url" } - + for header_name, header_value in request.headers.items(): # Check for suspicious headers if header_name.lower() in suspicious_headers: self.validate_string_content(header_value, f"header.{header_name}") - + # Validate user-agent length if header_name.lower() == "user-agent" and len(header_value) > 500: raise SecurityValidationError("User-Agent header too long") @@ -233,34 +234,34 @@ class RequestValidator: class ValidationMiddleware: """FastAPI middleware for enhanced request validation.""" - + def __init__(self): self.validator = RequestValidator() - + async def __call__(self, request: Request, call_next): """Process validation for the request.""" - + start_time = time.time() validation_errors = [] - + # Skip validation for timing adjustment endpoint temporarily if "/vtt/adjust-timing" in request.url.path: return await call_next(request) - + try: # Validate headers self.validator.validate_headers(request) - + # Validate query parameters self.validator.validate_query_params(request) - + # Validate JSON payload if present if request.method in ["POST", "PUT", "PATCH"]: await self.validator.validate_json_payload(request) - + # Process the request response = await call_next(request) - + # Track successful validation track_validation_metrics( endpoint=request.url.path, @@ -269,10 +270,10 @@ class ValidationMiddleware: validation_time=time.time() - start_time, error_types=[] ) - + return response - - except SecurityValidationError as e: + + except SecurityValidationError: validation_errors.append("security") track_validation_metrics( endpoint=request.url.path, @@ -281,7 +282,7 @@ class ValidationMiddleware: validation_time=time.time() - start_time, error_types=validation_errors ) - + return JSONResponse( status_code=status.HTTP_400_BAD_REQUEST, content={ @@ -289,7 +290,7 @@ class ValidationMiddleware: "error_code": "SECURITY_VALIDATION_ERROR" } ) - + except ValidationError as e: validation_errors.append("format") track_validation_metrics( @@ -299,7 +300,7 @@ class ValidationMiddleware: validation_time=time.time() - start_time, error_types=validation_errors ) - + return JSONResponse( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, content={ @@ -307,7 +308,7 @@ class ValidationMiddleware: "error_code": "VALIDATION_ERROR" } ) - + except Exception as e: validation_errors.append("unknown") track_validation_metrics( @@ -317,7 +318,7 @@ class ValidationMiddleware: validation_time=time.time() - start_time, error_types=validation_errors ) - + # Log unexpected error but continue processing print(f"Validation middleware error: {e}") return await call_next(request) @@ -325,4 +326,4 @@ class ValidationMiddleware: async def create_validation_middleware() -> ValidationMiddleware: """Factory function to create validation middleware.""" - return ValidationMiddleware() \ No newline at end of file + return ValidationMiddleware() diff --git a/backend/app/migrations/__init__.py b/backend/app/migrations/__init__.py index c8030a9..d0c5927 100644 --- a/backend/app/migrations/__init__.py +++ b/backend/app/migrations/__init__.py @@ -1,5 +1,5 @@ """Database migration framework for MongoDB.""" -from .migrator import MigrationManager, Migration +from .migrator import Migration, MigrationManager -__all__ = ["MigrationManager", "Migration"] \ No newline at end of file +__all__ = ["MigrationManager", "Migration"] diff --git a/backend/app/migrations/migrator.py b/backend/app/migrations/migrator.py index 95adc67..e1ab64d 100644 --- a/backend/app/migrations/migrator.py +++ b/backend/app/migrations/migrator.py @@ -1,11 +1,10 @@ """MongoDB migration framework.""" -import os import importlib.util from abc import ABC, abstractmethod from datetime import datetime from pathlib import Path -from typing import List, Optional + from motor.motor_asyncio import AsyncIOMotorDatabase from app.core.database import get_database @@ -22,18 +21,18 @@ class Migration(ABC): description: str = "" def __init__(self): - self.db: Optional[AsyncIOMotorDatabase] = None - + self.db: AsyncIOMotorDatabase | None = None + @abstractmethod async def up(self) -> None: """Apply the migration.""" pass - + @abstractmethod async def down(self) -> None: """Rollback the migration.""" pass - + async def set_database(self, db: AsyncIOMotorDatabase) -> None: """Set the database instance.""" self.db = db @@ -41,7 +40,7 @@ class Migration(ABC): class MigrationRecord: """Represents a migration record in the database.""" - + def __init__(self, version: str, description: str, applied_at: datetime): self.version = version self.description = description @@ -50,96 +49,96 @@ class MigrationRecord: class MigrationManager: """Manages database migrations.""" - + def __init__(self): - self.db: Optional[AsyncIOMotorDatabase] = None + self.db: AsyncIOMotorDatabase | None = None self.migrations_dir = Path(__file__).parent / "scripts" self.collection_name = "migration_history" - + async def initialize(self) -> None: """Initialize the migration manager.""" self.db = await get_database() await self._ensure_migration_collection() - + async def _ensure_migration_collection(self) -> None: """Ensure the migration history collection exists with proper indexes.""" collection = self.db[self.collection_name] - + # Create indexes for migration history await collection.create_index([("version", 1)], unique=True) await collection.create_index([("applied_at", -1)]) - + logger.info("Migration history collection initialized") - - def discover_migrations(self) -> List[str]: + + def discover_migrations(self) -> list[str]: """Discover all migration files in the migrations directory.""" if not self.migrations_dir.exists(): logger.warning(f"Migrations directory not found: {self.migrations_dir}") return [] - + migration_files = [] for file_path in self.migrations_dir.glob("*.py"): if file_path.name.startswith("migration_") and not file_path.name.startswith("__"): migration_files.append(file_path.stem) - + # Sort by version (filename should start with version) migration_files.sort() return migration_files - + async def load_migration(self, migration_name: str) -> Migration: """Dynamically load a migration class.""" migration_path = self.migrations_dir / f"{migration_name}.py" - + if not migration_path.exists(): raise FileNotFoundError(f"Migration file not found: {migration_path}") - + # Load the module spec = importlib.util.spec_from_file_location(migration_name, migration_path) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) - + # Get the migration class (assume it's named Migration) if not hasattr(module, 'Migration'): raise AttributeError(f"Migration class not found in {migration_name}") - - migration_class = getattr(module, 'Migration') + + migration_class = module.Migration migration = migration_class() await migration.set_database(self.db) - + return migration - - async def get_applied_migrations(self) -> List[str]: + + async def get_applied_migrations(self) -> list[str]: """Get list of applied migration versions.""" collection = self.db[self.collection_name] cursor = collection.find({}, {"version": 1}).sort("version", 1) - + applied = [] async for doc in cursor: applied.append(doc["version"]) - + return applied - + async def record_migration(self, migration: Migration) -> None: """Record a successful migration in the database.""" collection = self.db[self.collection_name] - + record = { "version": migration.version, "description": migration.description, "applied_at": datetime.utcnow() } - + await collection.insert_one(record) logger.info(f"Recorded migration: {migration.version} - {migration.description}") - + async def remove_migration_record(self, version: str) -> None: """Remove a migration record (for rollback).""" collection = self.db[self.collection_name] await collection.delete_one({"version": version}) logger.info(f"Removed migration record: {version}") - + @trace_async_operation("migration_manager.migrate_up") - async def migrate_up(self, target_version: Optional[str] = None) -> List[str]: + async def migrate_up(self, target_version: str | None = None) -> list[str]: """ Apply migrations up to the target version. @@ -150,44 +149,44 @@ class MigrationManager: List of applied migration versions. """ await self.initialize() - + # Discover all migrations all_migrations = self.discover_migrations() applied_migrations = await self.get_applied_migrations() - + # Find pending migrations pending_migrations = [] for migration_name in all_migrations: # Extract version from filename (assumes format: migration_YYYY-MM-DD-HHMMSS_description.py) version = migration_name.replace("migration_", "").split("_")[0] - + if version not in applied_migrations: if target_version is None or version <= target_version: pending_migrations.append((migration_name, version)) - + # Sort by version pending_migrations.sort(key=lambda x: x[1]) - + applied = [] for migration_name, version in pending_migrations: try: logger.info(f"Applying migration: {migration_name}") - + migration = await self.load_migration(migration_name) await migration.up() await self.record_migration(migration) - + applied.append(version) logger.info(f"Successfully applied migration: {version}") - + except Exception as e: logger.error(f"Failed to apply migration {migration_name}: {e}") raise - + return applied - + @trace_async_operation("migration_manager.migrate_down") - async def migrate_down(self, target_version: str) -> List[str]: + async def migrate_down(self, target_version: str) -> list[str]: """ Rollback migrations down to the target version. @@ -198,15 +197,15 @@ class MigrationManager: List of rolled back migration versions. """ await self.initialize() - + applied_migrations = await self.get_applied_migrations() - + # Find migrations to rollback (newer than target) to_rollback = [] for version in reversed(applied_migrations): if version > target_version: to_rollback.append(version) - + rolled_back = [] for version in to_rollback: try: @@ -216,39 +215,39 @@ class MigrationManager: if version in migration_file: migration_name = migration_file break - + if not migration_name: logger.warning(f"Migration file not found for version {version}") continue - + logger.info(f"Rolling back migration: {migration_name}") - + migration = await self.load_migration(migration_name) await migration.down() await self.remove_migration_record(version) - + rolled_back.append(version) logger.info(f"Successfully rolled back migration: {version}") - + except Exception as e: logger.error(f"Failed to rollback migration {version}: {e}") raise - + return rolled_back - + async def get_migration_status(self) -> dict: """Get current migration status.""" await self.initialize() - + all_migrations = self.discover_migrations() applied_migrations = await self.get_applied_migrations() - + pending_count = len(all_migrations) - len(applied_migrations) - + return { "total_migrations": len(all_migrations), "applied_migrations": len(applied_migrations), "pending_migrations": pending_count, "latest_applied": applied_migrations[-1] if applied_migrations else None, "all_applied": applied_migrations - } \ No newline at end of file + } diff --git a/backend/app/migrations/scripts/migration_2025-08-17-120000_initial_schema.py b/backend/app/migrations/scripts/migration_2025-08-17-120000_initial_schema.py index 667e76c..29bed5d 100644 --- a/backend/app/migrations/scripts/migration_2025-08-17-120000_initial_schema.py +++ b/backend/app/migrations/scripts/migration_2025-08-17-120000_initial_schema.py @@ -1,39 +1,38 @@ """Initial database schema setup migration.""" -from datetime import datetime from app.migrations.migrator import Migration class Migration(Migration): """Initial schema setup with all collections and indexes.""" - + def __init__(self): super().__init__() self.version = "2025-08-17-120000" self.description = "Initial database schema with users, jobs, and audit_logs collections" - + async def up(self) -> None: """Create initial collections and indexes.""" - + # Users collection setup await self.db.users.create_index([("email", 1)], unique=True) await self.db.users.create_index([("role", 1)]) await self.db.users.create_index([("is_active", 1)]) await self.db.users.create_index([("created_at", -1)]) - + # Jobs collection setup await self.db.jobs.create_index([("status", 1), ("created_at", -1)]) await self.db.jobs.create_index([("client_id", 1)]) await self.db.jobs.create_index([("updated_at", -1)]) await self.db.jobs.create_index([("languages", 1)]) - + # Create compound index for job queries await self.db.jobs.create_index([ ("status", 1), ("client_id", 1), ("created_at", -1) ]) - + # Audit logs collection setup await self.db.audit_logs.create_index([("timestamp", -1)]) await self.db.audit_logs.create_index([("action", 1), ("timestamp", -1)]) @@ -42,23 +41,23 @@ class Migration(Migration): await self.db.audit_logs.create_index([("resource_type", 1), ("resource_id", 1)]) await self.db.audit_logs.create_index([("ip_address", 1), ("timestamp", -1)]) await self.db.audit_logs.create_index([("success", 1), ("timestamp", -1)]) - + # Text search index for audit logs await self.db.audit_logs.create_index([ ("description", "text"), ("details", "text"), ("error_message", "text") ]) - + print(f"✅ Applied migration {self.version}: {self.description}") - + async def down(self) -> None: """Drop all collections (destructive - use with caution).""" - + # This is a destructive operation - in production, you might want to backup first await self.db.users.drop() await self.db.jobs.drop() await self.db.audit_logs.drop() - + print(f"⚠️ Rolled back migration {self.version}: {self.description}") - print("⚠️ WARNING: All data has been deleted!") \ No newline at end of file + print("⚠️ WARNING: All data has been deleted!") diff --git a/backend/app/migrations/scripts/migration_2025-08-17-120001_index_optimization.py b/backend/app/migrations/scripts/migration_2025-08-17-120001_index_optimization.py index fe7b143..3e6b817 100644 --- a/backend/app/migrations/scripts/migration_2025-08-17-120001_index_optimization.py +++ b/backend/app/migrations/scripts/migration_2025-08-17-120001_index_optimization.py @@ -5,75 +5,75 @@ from app.migrations.migrator import Migration class Migration(Migration): """Optimize indexes for better query performance.""" - + def __init__(self): super().__init__() self.version = "2025-08-17-120001" self.description = "Index optimization for query performance improvements" - + async def up(self) -> None: """Add optimized indexes for common query patterns.""" - + # Jobs collection optimizations - + # Index for job status transitions and monitoring await self.db.jobs.create_index([ ("status", 1), ("updated_at", -1), ("client_id", 1) ], name="jobs_status_updated_client_idx") - + # Index for queue management (pending jobs) await self.db.jobs.create_index([ ("status", 1), ("created_at", 1) ], name="jobs_queue_processing_idx") - + # Index for client job history await self.db.jobs.create_index([ ("client_id", 1), ("created_at", -1), ("status", 1) ], name="jobs_client_history_idx") - + # Sparse index for error tracking await self.db.jobs.create_index([ ("status", 1), ("error", 1) ], sparse=True, name="jobs_error_tracking_idx") - + # Users collection optimizations - + # Index for active user queries await self.db.users.create_index([ ("is_active", 1), ("role", 1), ("last_login_at", -1) ], name="users_active_role_login_idx") - + # Index for user search by email pattern await self.db.users.create_index([ ("email", "text"), ("first_name", "text"), ("last_name", "text") ], name="users_search_idx") - + # Audit logs collection optimizations - + # Compound index for security monitoring await self.db.audit_logs.create_index([ ("severity", 1), ("action", 1), ("timestamp", -1) ], name="audit_security_monitoring_idx") - + # Index for user activity analysis await self.db.audit_logs.create_index([ ("user_id", 1), ("action", 1), ("timestamp", -1) ], name="audit_user_activity_idx") - + # Index for resource access tracking await self.db.audit_logs.create_index([ ("resource_type", 1), @@ -81,30 +81,30 @@ class Migration(Migration): ("action", 1), ("timestamp", -1) ], name="audit_resource_access_idx") - + # Sparse index for failed operations await self.db.audit_logs.create_index([ ("success", 1), ("timestamp", -1) ], sparse=True, name="audit_failures_idx") - + # Add TTL index for automatic audit log cleanup (optional) # Uncomment if you want automatic cleanup after 2 years # await self.db.audit_logs.create_index( - # [("timestamp", 1)], + # [("timestamp", 1)], # expireAfterSeconds=63072000, # 2 years # name="audit_ttl_idx" # ) - + print(f"✅ Applied migration {self.version}: {self.description}") - + async def down(self) -> None: """Remove the optimized indexes.""" - + # Drop the indexes we created indexes_to_drop = [ "jobs_status_updated_client_idx", - "jobs_queue_processing_idx", + "jobs_queue_processing_idx", "jobs_client_history_idx", "jobs_error_tracking_idx", "users_active_role_login_idx", @@ -114,21 +114,21 @@ class Migration(Migration): "audit_resource_access_idx", "audit_failures_idx" ] - + for index_name in indexes_to_drop: try: await self.db.jobs.drop_index(index_name) except Exception: pass # Index might not exist on this collection - + try: await self.db.users.drop_index(index_name) except Exception: pass - + try: await self.db.audit_logs.drop_index(index_name) except Exception: pass - - print(f"⚠️ Rolled back migration {self.version}: {self.description}") \ No newline at end of file + + print(f"⚠️ Rolled back migration {self.version}: {self.description}") diff --git a/backend/app/migrations/scripts/migration_2025-08-17-120002_audit_log_schema_update.py b/backend/app/migrations/scripts/migration_2025-08-17-120002_audit_log_schema_update.py index fecf7b0..9362333 100644 --- a/backend/app/migrations/scripts/migration_2025-08-17-120002_audit_log_schema_update.py +++ b/backend/app/migrations/scripts/migration_2025-08-17-120002_audit_log_schema_update.py @@ -1,20 +1,21 @@ """Migrate audit log schema from basic to comprehensive format.""" from datetime import datetime + from app.migrations.migrator import Migration class Migration(Migration): """Update audit log schema to comprehensive format.""" - + def __init__(self): super().__init__() self.version = "2025-08-17-120002" self.description = "Update audit log schema from basic to comprehensive format" - + async def up(self) -> None: """Migrate existing audit logs to new schema format.""" - + # Find all existing audit logs with old schema old_logs_cursor = self.db.audit_logs.find({ # Look for logs that have the old schema structure @@ -24,9 +25,9 @@ class Migration(Migration): {"timestamp": {"$exists": False}} # Missing new timestamp field ] }) - + migration_count = 0 - + async for old_log in old_logs_cursor: try: # Map old fields to new schema @@ -38,82 +39,82 @@ class Migration(Migration): "description": old_log.get("action", "Legacy action"), "success": True, "environment": "prod", - "service_name": "accessible-video-api", + "service_name": "accessible-video-api", "api_version": "v1" } - + # Map optional fields if they exist if "user_id" in old_log: new_log["user_id"] = old_log["user_id"] - + if "job_id" in old_log: new_log["resource_type"] = "job" new_log["resource_id"] = old_log["job_id"] - + if "ip_address" in old_log: new_log["ip_address"] = old_log["ip_address"] - + if "user_agent" in old_log: new_log["user_agent"] = old_log["user_agent"] - + if "details" in old_log: new_log["details"] = old_log["details"] - + # Replace the old document with the new schema await self.db.audit_logs.replace_one( {"_id": old_log["_id"]}, new_log ) - + migration_count += 1 - + except Exception as e: print(f"Error migrating audit log {old_log.get('_id')}: {e}") continue - + print(f"✅ Applied migration {self.version}: Migrated {migration_count} audit log records") - + def _map_old_action(self, old_action: str) -> str: """Map old action strings to new AuditAction enum values.""" action_mapping = { # Job actions "job_created": "job.create", - "job_approved": "job.approve", + "job_approved": "job.approve", "job_rejected": "job.reject", "job_updated": "job.update", "job_cancelled": "job.cancel", - + # Auth actions "login": "auth.login.success", "logout": "auth.logout", "login_failed": "auth.login.failure", - + # File actions "file_uploaded": "file.upload", "file_downloaded": "file.download", - + # VTT actions "vtt_edited": "vtt.edit", - + # Admin actions "user_created": "user.create", "user_updated": "user.update", "user_deleted": "user.delete", } - + return action_mapping.get(old_action, old_action) - + async def down(self) -> None: """Rollback to old audit log schema format (limited).""" - + # Find all audit logs with new schema new_logs_cursor = self.db.audit_logs.find({ "timestamp": {"$exists": True}, "action": {"$exists": True} }) - + rollback_count = 0 - + async for new_log in new_logs_cursor: try: # Map new fields back to old schema (lossy conversion) @@ -122,34 +123,34 @@ class Migration(Migration): "when": new_log["timestamp"], "action": new_log["action"] } - + # Map back optional fields if "user_id" in new_log: old_log["user_id"] = new_log["user_id"] - + if "resource_type" in new_log and new_log["resource_type"] == "job": old_log["job_id"] = new_log.get("resource_id") - + if "ip_address" in new_log: old_log["ip_address"] = new_log["ip_address"] - + if "user_agent" in new_log: old_log["user_agent"] = new_log["user_agent"] - + if "details" in new_log: old_log["details"] = new_log["details"] - + # Replace with old schema await self.db.audit_logs.replace_one( {"_id": new_log["_id"]}, old_log ) - + rollback_count += 1 - + except Exception as e: print(f"Error rolling back audit log {new_log.get('_id')}: {e}") continue - + print(f"⚠️ Rolled back migration {self.version}: Reverted {rollback_count} audit log records") - print("⚠️ WARNING: Some audit log data may have been lost due to schema differences") \ No newline at end of file + print("⚠️ WARNING: Some audit log data may have been lost due to schema differences") diff --git a/backend/app/migrations/scripts/migration_2025-10-10-000000_add_auth_provider.py b/backend/app/migrations/scripts/migration_2025-10-10-000000_add_auth_provider.py index 96d52c9..5d550c3 100644 --- a/backend/app/migrations/scripts/migration_2025-10-10-000000_add_auth_provider.py +++ b/backend/app/migrations/scripts/migration_2025-10-10-000000_add_auth_provider.py @@ -24,7 +24,7 @@ class Migration(Migration): # Create index on auth_provider for faster queries await self.db.users.create_index([("auth_provider", 1)]) - print(f"✅ Created index on auth_provider field") + print("✅ Created index on auth_provider field") print(f"✅ Applied migration {self.version}: {self.description}") @@ -34,7 +34,7 @@ class Migration(Migration): # Drop the index try: await self.db.users.drop_index("auth_provider_1") - print(f"✅ Dropped index on auth_provider field") + print("✅ Dropped index on auth_provider field") except Exception as e: print(f"⚠️ Could not drop index: {e}") diff --git a/backend/app/migrations/scripts/migration_2025-10-10-000001_update_user_schema_validator.py b/backend/app/migrations/scripts/migration_2025-10-10-000001_update_user_schema_validator.py index 009ebd2..d5e4b81 100644 --- a/backend/app/migrations/scripts/migration_2025-10-10-000001_update_user_schema_validator.py +++ b/backend/app/migrations/scripts/migration_2025-10-10-000001_update_user_schema_validator.py @@ -75,7 +75,7 @@ class Migration(Migration): "validationLevel": "moderate", # moderate = only validate on insert/update, not existing docs "validationAction": "error" # error = reject invalid documents }) - print(f"✅ Updated users collection validator") + print("✅ Updated users collection validator") except Exception as e: print(f"⚠️ Could not update validator: {e}") # Try creating the collection if it doesn't exist @@ -86,7 +86,7 @@ class Migration(Migration): validationLevel="moderate", validationAction="error" ) - print(f"✅ Created users collection with validator") + print("✅ Created users collection with validator") except Exception as e2: print(f"⚠️ Could not create collection: {e2}") @@ -136,4 +136,4 @@ class Migration(Migration): }) print(f"⚠️ Rolled back migration {self.version}: {self.description}") - print(f"⚠️ WARNING: Production role users will fail validation!") + print("⚠️ WARNING: Production role users will fail validation!") diff --git a/backend/app/migrations/scripts/migration_2025-12-22-000000_add_approved_source_status.py b/backend/app/migrations/scripts/migration_2025-12-22-000000_add_approved_source_status.py index ee550b5..dbc94e4 100644 --- a/backend/app/migrations/scripts/migration_2025-12-22-000000_add_approved_source_status.py +++ b/backend/app/migrations/scripts/migration_2025-12-22-000000_add_approved_source_status.py @@ -53,7 +53,7 @@ class Migration(Migration): "validationLevel": "moderate", "validationAction": "error" }) - print(f" Updated jobs collection validator") + print(" Updated jobs collection validator") except Exception as e: print(f" Could not update validator: {e}") raise @@ -101,4 +101,4 @@ class Migration(Migration): }) print(f" Rolled back migration {self.version}: {self.description}") - print(f" WARNING: Jobs with approved_source or qc_feedback status will fail validation!") + print(" WARNING: Jobs with approved_source or qc_feedback status will fail validation!") diff --git a/backend/app/migrations/scripts/migration_2025-12-27-000000_add_rendering_video_status.py b/backend/app/migrations/scripts/migration_2025-12-27-000000_add_rendering_video_status.py index b5a2072..3c0f6ae 100644 --- a/backend/app/migrations/scripts/migration_2025-12-27-000000_add_rendering_video_status.py +++ b/backend/app/migrations/scripts/migration_2025-12-27-000000_add_rendering_video_status.py @@ -54,7 +54,7 @@ class Migration(Migration): "validationLevel": "moderate", "validationAction": "error" }) - print(f" Updated jobs collection validator") + print(" Updated jobs collection validator") except Exception as e: print(f" Could not update validator: {e}") raise @@ -104,4 +104,4 @@ class Migration(Migration): }) print(f" Rolled back migration {self.version}: {self.description}") - print(f" WARNING: Jobs with rendering_video status will fail validation!") + print(" WARNING: Jobs with rendering_video status will fail validation!") diff --git a/backend/app/migrations/scripts/migration_2026-01-05-000000_add_failed_statuses.py b/backend/app/migrations/scripts/migration_2026-01-05-000000_add_failed_statuses.py index d0718fe..2e0e0d8 100644 --- a/backend/app/migrations/scripts/migration_2026-01-05-000000_add_failed_statuses.py +++ b/backend/app/migrations/scripts/migration_2026-01-05-000000_add_failed_statuses.py @@ -60,7 +60,7 @@ class Migration(Migration): "validationLevel": "moderate", "validationAction": "error" }) - print(f" Updated jobs collection validator") + print(" Updated jobs collection validator") except Exception as e: print(f" Could not update validator: {e}") raise @@ -111,4 +111,4 @@ class Migration(Migration): }) print(f" Rolled back migration {self.version}: {self.description}") - print(f" WARNING: Jobs with tts_failed or render_failed status will fail validation!") + print(" WARNING: Jobs with tts_failed or render_failed status will fail validation!") diff --git a/backend/app/migrations/scripts/migration_2026-01-11-000000_add_rendering_qc_status.py b/backend/app/migrations/scripts/migration_2026-01-11-000000_add_rendering_qc_status.py index 8d1768b..419f7b9 100644 --- a/backend/app/migrations/scripts/migration_2026-01-11-000000_add_rendering_qc_status.py +++ b/backend/app/migrations/scripts/migration_2026-01-11-000000_add_rendering_qc_status.py @@ -61,7 +61,7 @@ class Migration(Migration): "validationLevel": "moderate", "validationAction": "error" }) - print(f" Updated jobs collection validator") + print(" Updated jobs collection validator") except Exception as e: print(f" Could not update validator: {e}") raise @@ -114,4 +114,4 @@ class Migration(Migration): }) print(f" Rolled back migration {self.version}: {self.description}") - print(f" WARNING: Jobs with rendering_qc status will fail validation!") + print(" WARNING: Jobs with rendering_qc status will fail validation!") diff --git a/backend/app/migrations/scripts/migration_2026-04-16-000000_add_linguist_role.py b/backend/app/migrations/scripts/migration_2026-04-16-000000_add_linguist_role.py index 0923081..cc2a9d8 100644 --- a/backend/app/migrations/scripts/migration_2026-04-16-000000_add_linguist_role.py +++ b/backend/app/migrations/scripts/migration_2026-04-16-000000_add_linguist_role.py @@ -64,7 +64,7 @@ class Migration(Migration): "validationLevel": "moderate", "validationAction": "error" }) - print(f"✅ Updated users collection validator") + print("✅ Updated users collection validator") except Exception as e: print(f"⚠️ Could not update validator: {e}") try: @@ -74,7 +74,7 @@ class Migration(Migration): validationLevel="moderate", validationAction="error" ) - print(f"✅ Created users collection with validator") + print("✅ Created users collection with validator") except Exception as e2: print(f"⚠️ Could not create collection: {e2}") @@ -134,4 +134,4 @@ class Migration(Migration): }) print(f"⚠️ Rolled back migration {self.version}: {self.description}") - print(f"⚠️ WARNING: Linguist role users will fail validation!") + print("⚠️ WARNING: Linguist role users will fail validation!") diff --git a/backend/app/migrations/scripts/migration_2026-04-27-000000_add_project_manager_role.py b/backend/app/migrations/scripts/migration_2026-04-27-000000_add_project_manager_role.py index c01f22c..3418d44 100644 --- a/backend/app/migrations/scripts/migration_2026-04-27-000000_add_project_manager_role.py +++ b/backend/app/migrations/scripts/migration_2026-04-27-000000_add_project_manager_role.py @@ -69,7 +69,7 @@ class Migration(Migration): "validationLevel": "moderate", "validationAction": "error" }) - print(f"✅ Updated users collection validator") + print("✅ Updated users collection validator") except Exception as e: print(f"⚠️ Could not update validator: {e}") try: @@ -79,7 +79,7 @@ class Migration(Migration): validationLevel="moderate", validationAction="error" ) - print(f"✅ Created users collection with validator") + print("✅ Created users collection with validator") except Exception as e2: print(f"⚠️ Could not create collection: {e2}") @@ -139,4 +139,4 @@ class Migration(Migration): }) print(f"⚠️ Rolled back migration {self.version}: {self.description}") - print(f"⚠️ WARNING: project_manager role users will fail validation!") + print("⚠️ WARNING: project_manager role users will fail validation!") diff --git a/backend/app/migrations/scripts/migration_2026-04-28-000001_backfill_memberships.py b/backend/app/migrations/scripts/migration_2026-04-28-000001_backfill_memberships.py index 5cfda98..885c481 100644 --- a/backend/app/migrations/scripts/migration_2026-04-28-000001_backfill_memberships.py +++ b/backend/app/migrations/scripts/migration_2026-04-28-000001_backfill_memberships.py @@ -1,6 +1,6 @@ """Backfill memberships collection from existing pm_client_ids and team.member_user_ids.""" -from datetime import datetime, timezone +from datetime import UTC, datetime from app.migrations.migrator import Migration @@ -13,7 +13,7 @@ class Migration(Migration): self.description = "Backfill memberships from pm_client_ids and team member lists" async def up(self) -> None: - now = datetime.now(timezone.utc) + now = datetime.now(UTC) upserted = 0 # 1. PROJECT_MANAGER users → MANAGER membership for each pm_client_id diff --git a/backend/app/models/audit_log.py b/backend/app/models/audit_log.py index 49f5284..9f6af99 100644 --- a/backend/app/models/audit_log.py +++ b/backend/app/models/audit_log.py @@ -2,7 +2,8 @@ from datetime import datetime from enum import Enum -from typing import Any, Dict, Optional +from typing import Any + from bson import ObjectId from pydantic import BaseModel, Field @@ -11,7 +12,7 @@ from .user import PyObjectId class AuditAction(str, Enum): """Enumeration of auditable actions.""" - + # Authentication actions LOGIN_SUCCESS = "auth.login.success" LOGIN_FAILURE = "auth.login.failure" @@ -19,7 +20,7 @@ class AuditAction(str, Enum): TOKEN_REFRESH = "auth.token.refresh" PASSWORD_CHANGE = "auth.password.change" PASSWORD_RESET = "auth.password.reset" - + # User management actions USER_CREATE = "user.create" USER_UPDATE = "user.update" @@ -27,7 +28,7 @@ class AuditAction(str, Enum): USER_ROLE_CHANGE = "user.role.change" USER_ACTIVATE = "user.activate" USER_DEACTIVATE = "user.deactivate" - + # Job management actions JOB_CREATE = "job.create" JOB_UPDATE = "job.update" @@ -39,13 +40,13 @@ class AuditAction(str, Enum): JOB_TASK_FAILED = "job.task.failed" JOB_RETRY = "job.retry" JOB_BULK_RETRY = "job.bulk_retry" - + # File operations FILE_UPLOAD = "file.upload" FILE_DOWNLOAD = "file.download" FILE_DELETE = "file.delete" FILE_ACCESS = "file.access" - + # VTT editing actions VTT_EDIT = "vtt.edit" VTT_APPROVE = "vtt.approve" @@ -62,13 +63,13 @@ class AuditAction(str, Enum): LANGUAGE_QC_REJECT = "language_qc.reject" LANGUAGE_QC_REOPEN = "language_qc.reopen" LANGUAGE_QC_COMMENT = "language_qc.comment" - + # Admin actions ADMIN_CONFIG_CHANGE = "admin.config.change" ADMIN_SYSTEM_ACTION = "admin.system.action" ADMIN_DATA_EXPORT = "admin.data.export" ADMIN_AUDIT_ACCESS = "admin.audit.access" - + # Glossary management GLOSSARY_UPLOAD = "glossary.upload" GLOSSARY_VERSION_UPLOAD = "glossary.version.upload" @@ -84,7 +85,7 @@ class AuditAction(str, Enum): class AuditLogSeverity(str, Enum): """Severity levels for audit events.""" - + INFO = "info" # Normal operations WARNING = "warning" # Suspicious but not critical ERROR = "error" # Failed operations @@ -93,43 +94,43 @@ class AuditLogSeverity(str, Enum): class AuditLog(BaseModel): """Audit log entry model.""" - - id: Optional[PyObjectId] = Field(default_factory=lambda: str(ObjectId()), alias="_id") - + + id: PyObjectId | None = Field(default_factory=lambda: str(ObjectId()), alias="_id") + # Core audit fields timestamp: datetime = Field(default_factory=datetime.utcnow) action: AuditAction severity: AuditLogSeverity = AuditLogSeverity.INFO - + # Actor information - user_id: Optional[PyObjectId] = None - user_email: Optional[str] = None - user_role: Optional[str] = None - + user_id: PyObjectId | None = None + user_email: str | None = None + user_role: str | None = None + # Request context - ip_address: Optional[str] = None - user_agent: Optional[str] = None - request_id: Optional[str] = None - session_id: Optional[str] = None - + ip_address: str | None = None + user_agent: str | None = None + request_id: str | None = None + session_id: str | None = None + # Resource information - resource_type: Optional[str] = None # e.g., "job", "user", "file" - resource_id: Optional[str] = None - resource_name: Optional[str] = None - + resource_type: str | None = None # e.g., "job", "user", "file" + resource_id: str | None = None + resource_name: str | None = None + # Action details description: str - details: Dict[str, Any] = Field(default_factory=dict) - + details: dict[str, Any] = Field(default_factory=dict) + # Outcome success: bool = True - error_message: Optional[str] = None - + error_message: str | None = None + # Additional metadata environment: str = "prod" service_name: str = "accessible-video-api" api_version: str = "v1" - + class Config: populate_by_name = True arbitrary_types_allowed = True @@ -138,49 +139,49 @@ class AuditLog(BaseModel): class AuditLogCreate(BaseModel): """Schema for creating audit log entries.""" - + action: AuditAction severity: AuditLogSeverity = AuditLogSeverity.INFO description: str - + # Optional fields that can be provided - user_id: Optional[PyObjectId] = None - user_email: Optional[str] = None - user_role: Optional[str] = None - ip_address: Optional[str] = None - user_agent: Optional[str] = None - request_id: Optional[str] = None - resource_type: Optional[str] = None - resource_id: Optional[str] = None - resource_name: Optional[str] = None - details: Dict[str, Any] = Field(default_factory=dict) + user_id: PyObjectId | None = None + user_email: str | None = None + user_role: str | None = None + ip_address: str | None = None + user_agent: str | None = None + request_id: str | None = None + resource_type: str | None = None + resource_id: str | None = None + resource_name: str | None = None + details: dict[str, Any] = Field(default_factory=dict) success: bool = True - error_message: Optional[str] = None + error_message: str | None = None class AuditLogQuery(BaseModel): """Schema for querying audit logs.""" - + # Time range - start_date: Optional[datetime] = None - end_date: Optional[datetime] = None - + start_date: datetime | None = None + end_date: datetime | None = None + # Filters - action: Optional[AuditAction] = None - severity: Optional[AuditLogSeverity] = None - user_id: Optional[PyObjectId] = None - user_email: Optional[str] = None - resource_type: Optional[str] = None - resource_id: Optional[str] = None - success: Optional[bool] = None - + action: AuditAction | None = None + severity: AuditLogSeverity | None = None + user_id: PyObjectId | None = None + user_email: str | None = None + resource_type: str | None = None + resource_id: str | None = None + success: bool | None = None + # Search - search: Optional[str] = None # Full-text search in description and details - + search: str | None = None # Full-text search in description and details + # Pagination skip: int = 0 limit: int = 100 - + # Sorting sort_by: str = "timestamp" sort_order: int = -1 # -1 for descending, 1 for ascending @@ -188,7 +189,7 @@ class AuditLogQuery(BaseModel): class AuditLogResponse(BaseModel): """Response schema for audit log queries.""" - + logs: list[AuditLog] total_count: int page: int diff --git a/backend/app/models/client.py b/backend/app/models/client.py index 1fcfd59..565ad36 100644 --- a/backend/app/models/client.py +++ b/backend/app/models/client.py @@ -1,5 +1,5 @@ from datetime import datetime -from typing import Optional, Annotated +from typing import Annotated from bson import ObjectId from pydantic import BaseModel, BeforeValidator @@ -17,12 +17,12 @@ PyObjectId = Annotated[str, BeforeValidator(validate_object_id)] class Client(BaseModel): - id: Optional[str] = None + id: str | None = None name: str slug: str is_active: bool = True - created_at: Optional[datetime] = None - updated_at: Optional[datetime] = None + created_at: datetime | None = None + updated_at: datetime | None = None class ClientCreate(BaseModel): @@ -31,18 +31,18 @@ class ClientCreate(BaseModel): class ClientUpdate(BaseModel): - name: Optional[str] = None - slug: Optional[str] = None - is_active: Optional[bool] = None + name: str | None = None + slug: str | None = None + is_active: bool | None = None class Team(BaseModel): - id: Optional[str] = None + id: str | None = None name: str client_id: str member_user_ids: list[str] = [] - created_at: Optional[datetime] = None - updated_at: Optional[datetime] = None + created_at: datetime | None = None + updated_at: datetime | None = None class TeamCreate(BaseModel): @@ -50,31 +50,31 @@ class TeamCreate(BaseModel): class TeamUpdate(BaseModel): - name: Optional[str] = None + name: str | None = None class Project(BaseModel): - id: Optional[str] = None + id: str | None = None name: str client_id: str is_active: bool = True default_languages: list[str] = [] - default_linguist_id: Optional[str] = None - default_reviewer_id: Optional[str] = None - created_at: Optional[datetime] = None - updated_at: Optional[datetime] = None + default_linguist_id: str | None = None + default_reviewer_id: str | None = None + created_at: datetime | None = None + updated_at: datetime | None = None class ProjectCreate(BaseModel): name: str default_languages: list[str] = [] - default_linguist_id: Optional[str] = None - default_reviewer_id: Optional[str] = None + default_linguist_id: str | None = None + default_reviewer_id: str | None = None class ProjectUpdate(BaseModel): - name: Optional[str] = None - is_active: Optional[bool] = None - default_languages: Optional[list[str]] = None - default_linguist_id: Optional[str] = None - default_reviewer_id: Optional[str] = None + name: str | None = None + is_active: bool | None = None + default_languages: list[str] | None = None + default_linguist_id: str | None = None + default_reviewer_id: str | None = None diff --git a/backend/app/models/invitation.py b/backend/app/models/invitation.py index 42dbfd5..a89e9e0 100644 --- a/backend/app/models/invitation.py +++ b/backend/app/models/invitation.py @@ -1,5 +1,4 @@ from datetime import datetime -from typing import Optional from pydantic import BaseModel, EmailStr @@ -7,7 +6,7 @@ from .organization import OrgRole class Invitation(BaseModel): - id: Optional[str] = None + id: str | None = None email: str organization_id: str role_in_org: OrgRole @@ -15,9 +14,9 @@ class Invitation(BaseModel): token_hash: str invited_by_user_id: str expires_at: datetime - accepted_at: Optional[datetime] = None - revoked_at: Optional[datetime] = None - created_at: Optional[datetime] = None + accepted_at: datetime | None = None + revoked_at: datetime | None = None + created_at: datetime | None = None class InvitationCreate(BaseModel): @@ -40,9 +39,9 @@ class InvitationPreviewResponse(BaseModel): class InvitationAcceptRequest(BaseModel): token: str - full_name: Optional[str] = None - password: Optional[str] = None - ms_id_token: Optional[str] = None + full_name: str | None = None + password: str | None = None + ms_id_token: str | None = None class InvitationResponse(BaseModel): @@ -52,9 +51,9 @@ class InvitationResponse(BaseModel): role_in_org: OrgRole invited_by_user_id: str expires_at: datetime - accepted_at: Optional[datetime] = None - revoked_at: Optional[datetime] = None - created_at: Optional[datetime] = None + accepted_at: datetime | None = None + revoked_at: datetime | None = None + created_at: datetime | None = None is_expired: bool = False is_accepted: bool = False is_revoked: bool = False diff --git a/backend/app/models/job.py b/backend/app/models/job.py index cabf5be..9fbc564 100644 --- a/backend/app/models/job.py +++ b/backend/app/models/job.py @@ -1,6 +1,6 @@ from datetime import datetime from enum import Enum -from typing import Any, Literal, Optional +from typing import Any, Literal from pydantic import BaseModel, Field, constr @@ -43,12 +43,12 @@ class JobFailure(BaseModel): class Source(BaseModel): filename: str - original_filename: Optional[str] = None + original_filename: str | None = None gcs_uri: str - duration_s: Optional[float] = None + duration_s: float | None = None language: constr(min_length=2, max_length=10) = "en" # Final source language (from detection or explicit) - language_hint: Optional[str] = None # User-provided hint for non-English videos - detected_language: Optional[str] = None # AI-detected language from Gemini + language_hint: str | None = None # User-provided hint for non-English videos + detected_language: str | None = None # AI-detected language from Gemini class TTSPreferences(BaseModel): @@ -62,10 +62,10 @@ class TTSPreferences(BaseModel): style_preset: Literal[ "neutral", "calm", "energetic", "professional", "warm", "documentary", "custom" ] = "neutral" - custom_style_prompt: Optional[str] = None # Used when style_preset is "custom" + custom_style_prompt: str | None = None # Used when style_preset is "custom" # ElevenLabs-specific settings - stability: Optional[float] = None # 0.0-1.0, default 0.5 when used - similarity_boost: Optional[float] = None # 0.0-1.0, default 0.5 when used + stability: float | None = None # 0.0-1.0, default 0.5 when used + similarity_boost: float | None = None # 0.0-1.0, default 0.5 when used class RequestedOutputs(BaseModel): @@ -73,11 +73,11 @@ class RequestedOutputs(BaseModel): audio_description_vtt: bool = True audio_description_mp3: bool = True accessible_video_mp4: bool = False # Rendered video with embedded audio descriptions - accessible_video_method: Optional[Literal["overlay", "pause_insert"]] = None # User-selected method + accessible_video_method: Literal["overlay", "pause_insert"] | None = None # User-selected method sdh_vtt: bool = False # SDH (Subtitles for Deaf and Hard of Hearing) captions with speaker labels, sound effects, music notation languages: list[str] = [] transcreation: list[str] = [] - tts_preferences: Optional[TTSPreferences] = None + tts_preferences: TTSPreferences | None = None translation_mode: Literal["traditional", "video_native"] = "video_native" @@ -85,8 +85,8 @@ class PausePointData(BaseModel): """Pause point timing data for accessible video editing during QC.""" cue_index: int # AD cue index this pause point belongs to original_ms: float # Rendered timeline position (ms) - for UI display - source_ms: Optional[float] = None # Source video cut point (ms) - for re-rendering (None = use original_ms) - adjusted_ms: Optional[float] = None # User-adjusted timestamp (ms), None = use original + source_ms: float | None = None # Source video cut point (ms) - for re-rendering (None = use original_ms) + adjusted_ms: float | None = None # User-adjusted timestamp (ms), None = use original min_bound_ms: float # Minimum allowed value (end of previous AD segment) max_bound_ms: float # Maximum allowed value (start of next AD segment) @@ -99,16 +99,16 @@ class VideoSegmentMetadata(BaseModel): gcs_uri: str # GCS path to segment MP4 duration_ms: float # Actual segment duration (ms) is_freeze_frame: bool = False # True if this is a freeze frame segment with AD audio - cue_index: Optional[int] = None # AD cue index (only for freeze frame segments) + cue_index: int | None = None # AD cue index (only for freeze frame segments) class TTSRegenerationRequest(BaseModel): """Request to regenerate TTS for a specific cue during QC.""" cue_index: int requested_at: datetime - new_text: Optional[str] = None # If provided, use this text instead of current VTT + new_text: str | None = None # If provided, use this text instead of current VTT status: Literal["pending", "processing", "completed", "failed"] = "pending" - error_message: Optional[str] = None + error_message: str | None = None class AccessibleVideoEditState(BaseModel): @@ -116,39 +116,39 @@ class AccessibleVideoEditState(BaseModel): pause_points: list[PausePointData] = [] video_segments: list[VideoSegmentMetadata] = [] tts_regeneration_queue: list[TTSRegenerationRequest] = [] - last_render_at: Optional[datetime] = None + last_render_at: datetime | None = None whisper_refine_enabled: bool = False # Default: off (user enables if cue positions changed) class LangOutput(BaseModel): - captions_vtt_gcs: Optional[str] = None - sdh_captions_vtt_gcs: Optional[str] = None # SDH-format captions (speaker labels, sound effects, music) - ad_vtt_gcs: Optional[str] = None - ad_mp3_gcs: Optional[str] = None + captions_vtt_gcs: str | None = None + sdh_captions_vtt_gcs: str | None = None # SDH-format captions (speaker labels, sound effects, music) + ad_vtt_gcs: str | None = None + ad_mp3_gcs: str | None = None # Accessible video outputs - accessible_video_gcs: Optional[str] = None # Rendered accessible MP4 - accessible_video_method: Optional[Literal["overlay", "pause_insert"]] = None - retimed_captions_vtt_gcs: Optional[str] = None # Re-timed captions for pause-insert method - ad_cues_gcs_prefix: Optional[str] = None # GCS path prefix for per-cue MP3 segments - ad_cue_manifest: Optional[list[dict]] = None # Per-cue manifest: [{cue_index, gcs_uri, text, duration_s}] + accessible_video_gcs: str | None = None # Rendered accessible MP4 + accessible_video_method: Literal["overlay", "pause_insert"] | None = None + retimed_captions_vtt_gcs: str | None = None # Re-timed captions for pause-insert method + ad_cues_gcs_prefix: str | None = None # GCS path prefix for per-cue MP3 segments + ad_cue_manifest: list[dict] | None = None # Per-cue manifest: [{cue_index, gcs_uri, text, duration_s}] # QC editing state for accessible video - video_segments_gcs_prefix: Optional[str] = None # GCS prefix for persisted video segments - accessible_video_edit_state: Optional[AccessibleVideoEditState] = None - origin: Optional[Literal["translate", "transcreate", "gemini_translate", "video_native"]] = None - qa_notes: Optional[str] = None - descriptive_transcript_gcs: Optional[str] = None # WCAG-compliant combined speech+description transcript + video_segments_gcs_prefix: str | None = None # GCS prefix for persisted video segments + accessible_video_edit_state: AccessibleVideoEditState | None = None + origin: Literal["translate", "transcreate", "gemini_translate", "video_native"] | None = None + qa_notes: str | None = None + descriptive_transcript_gcs: str | None = None # WCAG-compliant combined speech+description transcript class ReviewHistoryItem(BaseModel): at: datetime status: str - by: Optional[str] = None - notes: Optional[str] = None + by: str | None = None + notes: str | None = None class Review(BaseModel): - notes: Optional[str] = "" - reviewer_id: Optional[str] = None + notes: str | None = "" + reviewer_id: str | None = None history: list[ReviewHistoryItem] = [] @@ -174,8 +174,8 @@ class LanguageQCEvent(BaseModel): "approve", "reject", "reopen", "comment_added", ] - notes: Optional[str] = None - previous_assignee_id: Optional[str] = None + notes: str | None = None + previous_assignee_id: str | None = None class LanguageQCComment(BaseModel): @@ -190,29 +190,29 @@ class LanguageQCComment(BaseModel): class LanguageQCState(BaseModel): status: LanguageQCStatus = LanguageQCStatus.PENDING # Linguist slot - assigned_linguist_id: Optional[str] = None - assigned_linguist_email: Optional[str] = None - assigned_linguist_name: Optional[str] = None - assigned_at: Optional[datetime] = None - assigned_by_user_id: Optional[str] = None - submitted_for_review_at: Optional[datetime] = None - linguist_deadline: Optional[datetime] = None # when linguist must submit + assigned_linguist_id: str | None = None + assigned_linguist_email: str | None = None + assigned_linguist_name: str | None = None + assigned_at: datetime | None = None + assigned_by_user_id: str | None = None + submitted_for_review_at: datetime | None = None + linguist_deadline: datetime | None = None # when linguist must submit # Reviewer slot - assigned_reviewer_id: Optional[str] = None - assigned_reviewer_email: Optional[str] = None - assigned_reviewer_name: Optional[str] = None - assigned_reviewer_at: Optional[datetime] = None - review_started_at: Optional[datetime] = None - reviewer_deadline: Optional[datetime] = None # when reviewer must decide + assigned_reviewer_id: str | None = None + assigned_reviewer_email: str | None = None + assigned_reviewer_name: str | None = None + assigned_reviewer_at: datetime | None = None + review_started_at: datetime | None = None + reviewer_deadline: datetime | None = None # when reviewer must decide # Reviewer progress - total_cues: Optional[int] = None # set when reviewer opens the job + total_cues: int | None = None # set when reviewer opens the job reviewed_cues: int = 0 # incremented as reviewer marks cues reviewed # Final outcome - reviewed_at: Optional[datetime] = None - reviewed_by_user_id: Optional[str] = None - reviewed_by_email: Optional[str] = None - notes: Optional[str] = None - reject_category: Optional[str] = None # e.g. timing/mistranslation/terminology/profanity/length + reviewed_at: datetime | None = None + reviewed_by_user_id: str | None = None + reviewed_by_email: str | None = None + notes: str | None = None + reject_category: str | None = None # e.g. timing/mistranslation/terminology/profanity/length history: list[LanguageQCEvent] = [] comments: list[LanguageQCComment] = [] @@ -225,47 +225,47 @@ class QCAssignment(BaseModel): class AISection(BaseModel): - ingestion_json: Optional[dict[str, Any]] = None - confidence: Optional[float] = None + ingestion_json: dict[str, Any] | None = None + confidence: float | None = None class AccessibleVideoProgressItem(BaseModel): """Progress tracking for accessible video rendering per language.""" status: Literal["pending", "rendering", "completed", "failed"] = "pending" - method: Optional[Literal["overlay", "pause_insert"]] = None - error_message: Optional[str] = None - started_at: Optional[datetime] = None - completed_at: Optional[datetime] = None + method: Literal["overlay", "pause_insert"] | None = None + error_message: str | None = None + started_at: datetime | None = None + completed_at: datetime | None = None class Job(BaseModel): - id: Optional[str] = Field(None, alias="_id") + id: str | None = Field(None, alias="_id") client_id: str title: str source: Source requested_outputs: RequestedOutputs status: JobStatus = JobStatus.CREATED review: Review = Review() - outputs: Optional[dict[str, LangOutput]] = None - accessible_video_progress: Optional[dict[str, AccessibleVideoProgressItem]] = None - ai: Optional[AISection] = None - error: Optional[dict[str, Any]] = None - failure: Optional[JobFailure] = None # structured failure info; see failure.step for pipeline stage + outputs: dict[str, LangOutput] | None = None + accessible_video_progress: dict[str, AccessibleVideoProgressItem] | None = None + ai: AISection | None = None + error: dict[str, Any] | None = None + failure: JobFailure | None = None # structured failure info; see failure.step for pipeline stage retry_count: int = 0 # total number of manual retries attempted - tts_rewrites: Optional[list[dict[str, Any]]] = None # Track auto-rewritten TTS cues - project_id: Optional[str] = None # Platform project this job belongs to (Client → Project → Job) - organization_id: Optional[str] = None # org-tenant ID; backfilled by 2026-04-28-000003 migration - brief_id: Optional[str] = None # JobBrief that originated this job (W-12) - gcs_prefix: Optional[str] = None # GCS path prefix; None = legacy flat {job_id}/ layout - initial_linguist_id: Optional[str] = None - initial_reviewer_id: Optional[str] = None - brand_context: Optional[str] = None # Brand names present in the video for accurate product identification - cost_tracker_project_id: Optional[str] = None # External project ID for AI cost attribution - deadline: Optional[datetime] = None # job-level PM deadline (overdue if past and not completed) + tts_rewrites: list[dict[str, Any]] | None = None # Track auto-rewritten TTS cues + project_id: str | None = None # Platform project this job belongs to (Client → Project → Job) + organization_id: str | None = None # org-tenant ID; backfilled by 2026-04-28-000003 migration + brief_id: str | None = None # JobBrief that originated this job (W-12) + gcs_prefix: str | None = None # GCS path prefix; None = legacy flat {job_id}/ layout + initial_linguist_id: str | None = None + initial_reviewer_id: str | None = None + brand_context: str | None = None # Brand names present in the video for accurate product identification + cost_tracker_project_id: str | None = None # External project ID for AI cost attribution + deadline: datetime | None = None # job-level PM deadline (overdue if past and not completed) language_qc: dict[str, LanguageQCState] = {} # per-language QC state, keyed by lang code qc_assignments: list[QCAssignment] = [] # denormalized for linguist-queue queries - created_at: Optional[datetime] = None - updated_at: Optional[datetime] = None + created_at: datetime | None = None + updated_at: datetime | None = None class Config: populate_by_name = True @@ -275,16 +275,16 @@ class Job(BaseModel): class JobCreate(BaseModel): title: str source_is_english: bool = True # True = English source, False = other language (auto-detect) - language_hint: Optional[str] = None # Optional hint when source_is_english=False + language_hint: str | None = None # Optional hint when source_is_english=False requested_outputs: RequestedOutputs - brand_context: Optional[str] = None # Comma-separated brand names present in the video (e.g. "Sellotape, Coca-Cola") + brand_context: str | None = None # Comma-separated brand names present in the video (e.g. "Sellotape, Coca-Cola") class JobUpdate(BaseModel): - title: Optional[str] = None - status: Optional[JobStatus] = None - review: Optional[Review] = None - outputs: Optional[dict[str, LangOutput]] = None - ai: Optional[AISection] = None - error: Optional[dict[str, Any]] = None - deadline: Optional[datetime] = None + title: str | None = None + status: JobStatus | None = None + review: Review | None = None + outputs: dict[str, LangOutput] | None = None + ai: AISection | None = None + error: dict[str, Any] | None = None + deadline: datetime | None = None diff --git a/backend/app/models/job_brief.py b/backend/app/models/job_brief.py index 29f2d45..8ebf6fd 100644 --- a/backend/app/models/job_brief.py +++ b/backend/app/models/job_brief.py @@ -1,7 +1,6 @@ """Job Brief model — pre-approved work order submitted before job creation.""" from datetime import datetime from enum import Enum -from typing import Optional from pydantic import BaseModel, Field @@ -17,22 +16,22 @@ class BriefStatus(str, Enum): class JobBrief(BaseModel): - id: Optional[str] = Field(None, alias="_id") + id: str | None = Field(None, alias="_id") organization_id: str - project_id: Optional[str] = None + project_id: str | None = None title: str - description: Optional[str] = None + description: str | None = None requested_outputs: RequestedOutputs languages: list[str] = [] - deadline: Optional[datetime] = None + deadline: datetime | None = None status: BriefStatus = BriefStatus.DRAFT created_by: str - job_id: Optional[str] = None + job_id: str | None = None created_at: datetime = Field(default_factory=datetime.utcnow) updated_at: datetime = Field(default_factory=datetime.utcnow) - submitted_at: Optional[datetime] = None - approved_by: Optional[str] = None - reject_reason: Optional[str] = None + submitted_at: datetime | None = None + approved_by: str | None = None + reject_reason: str | None = None class Config: populate_by_name = True @@ -40,34 +39,34 @@ class JobBrief(BaseModel): class JobBriefCreate(BaseModel): title: str - description: Optional[str] = None + description: str | None = None requested_outputs: RequestedOutputs languages: list[str] = [] - deadline: Optional[datetime] = None - project_id: Optional[str] = None + deadline: datetime | None = None + project_id: str | None = None class JobBriefUpdate(BaseModel): - title: Optional[str] = None - description: Optional[str] = None - requested_outputs: Optional[RequestedOutputs] = None - languages: Optional[list[str]] = None - deadline: Optional[datetime] = None + title: str | None = None + description: str | None = None + requested_outputs: RequestedOutputs | None = None + languages: list[str] | None = None + deadline: datetime | None = None class JobBriefResponse(BaseModel): id: str organization_id: str - project_id: Optional[str] = None + project_id: str | None = None title: str - description: Optional[str] = None + description: str | None = None requested_outputs: RequestedOutputs languages: list[str] - deadline: Optional[datetime] = None + deadline: datetime | None = None status: BriefStatus created_by: str - job_id: Optional[str] = None + job_id: str | None = None created_at: str updated_at: str - submitted_at: Optional[str] = None - approved_by: Optional[str] = None + submitted_at: str | None = None + approved_by: str | None = None diff --git a/backend/app/models/membership.py b/backend/app/models/membership.py index d3abfa6..72df915 100644 --- a/backend/app/models/membership.py +++ b/backend/app/models/membership.py @@ -1,5 +1,4 @@ from datetime import datetime -from typing import Optional from pydantic import BaseModel @@ -7,13 +6,13 @@ from .organization import OrgRole class Membership(BaseModel): - id: Optional[str] = None + id: str | None = None user_id: str organization_id: str role_in_org: OrgRole team_ids: list[str] = [] # teams the user belongs to within this org (MT-17) - created_at: Optional[datetime] = None - created_by: Optional[str] = None + created_at: datetime | None = None + created_by: str | None = None class MembershipCreate(BaseModel): @@ -32,4 +31,4 @@ class MemberDetail(BaseModel): email: str full_name: str role_in_org: OrgRole - created_at: Optional[datetime] = None + created_at: datetime | None = None diff --git a/backend/app/models/organization.py b/backend/app/models/organization.py index 4568a1d..5b39f32 100644 --- a/backend/app/models/organization.py +++ b/backend/app/models/organization.py @@ -1,6 +1,5 @@ from datetime import datetime from enum import Enum -from typing import Optional from pydantic import BaseModel @@ -30,13 +29,13 @@ class OrgRole(str, Enum): class Organization(BaseModel): - id: Optional[str] = None + id: str | None = None name: str slug: str is_active: bool = True plan: str = "standard" - created_at: Optional[datetime] = None - updated_at: Optional[datetime] = None + created_at: datetime | None = None + updated_at: datetime | None = None class OrganizationCreate(BaseModel): @@ -45,7 +44,7 @@ class OrganizationCreate(BaseModel): class OrganizationUpdate(BaseModel): - name: Optional[str] = None - slug: Optional[str] = None - is_active: Optional[bool] = None - plan: Optional[str] = None + name: str | None = None + slug: str | None = None + is_active: bool | None = None + plan: str | None = None diff --git a/backend/app/models/review_note.py b/backend/app/models/review_note.py index 4611264..c6b859a 100644 --- a/backend/app/models/review_note.py +++ b/backend/app/models/review_note.py @@ -1,7 +1,6 @@ """Review Note model for timestamped video review notes.""" from datetime import datetime -from typing import Optional from pydantic import BaseModel, Field @@ -9,7 +8,7 @@ from pydantic import BaseModel, Field class ReviewNote(BaseModel): """A timestamped note attached to a video asset during review.""" - id: Optional[str] = Field(None, alias="_id") + id: str | None = Field(None, alias="_id") job_id: str asset_key: str # e.g., "en", "es", "en_accessible" timestamp_seconds: float # Video timestamp when note was created @@ -17,7 +16,7 @@ class ReviewNote(BaseModel): user_id: str # Author's user ID user_name: str # Author's display name (denormalized for display) created_at: datetime - updated_at: Optional[datetime] = None + updated_at: datetime | None = None class Config: populate_by_name = True diff --git a/backend/app/models/user.py b/backend/app/models/user.py index 22a9990..6490965 100644 --- a/backend/app/models/user.py +++ b/backend/app/models/user.py @@ -1,9 +1,9 @@ from datetime import datetime from enum import Enum -from typing import Optional, Annotated +from typing import Annotated from bson import ObjectId -from pydantic import BaseModel, EmailStr, Field, BeforeValidator +from pydantic import BaseModel, BeforeValidator, EmailStr, Field def validate_object_id(v) -> str: @@ -33,17 +33,17 @@ class AuthProvider(str, Enum): class User(BaseModel): - id: Optional[PyObjectId] = Field(None, alias="_id") + id: PyObjectId | None = Field(None, alias="_id") email: EmailStr - hashed_password: Optional[str] = None # Optional for Microsoft users + hashed_password: str | None = None # Optional for Microsoft users full_name: str role: UserRole = UserRole.CLIENT auth_provider: AuthProvider = AuthProvider.LOCAL is_active: bool = True pm_client_ids: list[str] = [] # Client IDs where this user is Project Manager (admin-assigned) languages: list[str] = [] # BCP-47 language codes the user is competent in (R-8) - created_at: Optional[datetime] = None - updated_at: Optional[datetime] = None + created_at: datetime | None = None + updated_at: datetime | None = None class Config: populate_by_name = True @@ -62,9 +62,9 @@ class UserCreate(BaseModel): class UserUpdate(BaseModel): - email: Optional[EmailStr] = None - full_name: Optional[str] = None - role: Optional[UserRole] = None - is_active: Optional[bool] = None - pm_client_ids: Optional[list[str]] = None - languages: Optional[list[str]] = None + email: EmailStr | None = None + full_name: str | None = None + role: UserRole | None = None + is_active: bool | None = None + pm_client_ids: list[str] | None = None + languages: list[str] | None = None diff --git a/backend/app/models/vtt_version.py b/backend/app/models/vtt_version.py index acecdfa..36b355a 100644 --- a/backend/app/models/vtt_version.py +++ b/backend/app/models/vtt_version.py @@ -1,9 +1,8 @@ from datetime import datetime -from typing import Literal, Optional +from typing import Literal from pydantic import BaseModel, Field - VttKind = Literal["captions", "ad"] @@ -13,7 +12,7 @@ class VttVersionActor(BaseModel): class VttVersion(BaseModel): - id: Optional[str] = Field(None, alias="_id") + id: str | None = Field(None, alias="_id") job_id: str lang: str kind: VttKind @@ -22,8 +21,8 @@ class VttVersion(BaseModel): gcs_uri: str created_at: datetime = Field(default_factory=datetime.utcnow) created_by: VttVersionActor - note: Optional[str] = None - parent_version: Optional[int] = None + note: str | None = None + parent_version: int | None = None cue_count: int = 0 byte_size: int = 0 @@ -33,7 +32,7 @@ class VttVersion(BaseModel): class VttVersionSummary(BaseModel): """Lightweight version entry for list responses (no content).""" - id: Optional[str] = Field(None, alias="_id") + id: str | None = Field(None, alias="_id") job_id: str lang: str kind: VttKind @@ -41,8 +40,8 @@ class VttVersionSummary(BaseModel): gcs_uri: str created_at: datetime created_by: VttVersionActor - note: Optional[str] = None - parent_version: Optional[int] = None + note: str | None = None + parent_version: int | None = None cue_count: int = 0 byte_size: int = 0 @@ -58,8 +57,8 @@ class VttVersionListResponse(BaseModel): class DiffLine(BaseModel): type: Literal["unchanged", "added", "removed"] content: str - line_no_old: Optional[int] = None - line_no_new: Optional[int] = None + line_no_old: int | None = None + line_no_new: int | None = None class VttDiffResponse(BaseModel): diff --git a/backend/app/schemas/accessible_video.py b/backend/app/schemas/accessible_video.py index 62bc2fa..18ad81d 100644 --- a/backend/app/schemas/accessible_video.py +++ b/backend/app/schemas/accessible_video.py @@ -1,7 +1,6 @@ """Schemas for accessible video generation with embedded audio descriptions.""" from enum import Enum -from typing import Optional from pydantic import BaseModel, Field @@ -32,29 +31,29 @@ class ADPlacementCue(BaseModel): target_start_time: float = Field(..., description="Target time in output video (seconds)") ad_duration: float = Field(..., description="Duration of the AD TTS audio in seconds") # For pause-insert method - pause_point: Optional[float] = Field( + pause_point: float | None = Field( None, description="Where to pause the video - just before the next sentence starts (gap.end - buffer). Used for pause-insert method." ) - resume_from: Optional[float] = Field( + resume_from: float | None = Field( None, description="Where to resume video after AD plays - just after the previous sentence ends (gap.start + buffer). Creates a small overlap for natural transitions." ) - pause_point_rationale: Optional[str] = Field( + pause_point_rationale: str | None = Field( None, description="Explanation of why this pause point was chosen, referencing the sentence boundary." ) # Whisper refinement tracking - original_pause_point: Optional[float] = Field( + original_pause_point: float | None = Field( None, description="Original pause point from Gemini before Whisper refinement (seconds)." ) # For overlay method - duck_start: Optional[float] = Field( + duck_start: float | None = Field( None, description="When to start ducking original audio (seconds). Used for overlay method." ) - duck_end: Optional[float] = Field( + duck_end: float | None = Field( None, description="When to end ducking original audio (seconds). Used for overlay method." ) @@ -118,10 +117,10 @@ class AccessibleVideoRenderRequest(BaseModel): class AccessibleVideoProgress(BaseModel): """Progress status for accessible video rendering.""" status: str = Field(..., description="pending | rendering | completed | failed") - method: Optional[AccessibleVideoMethod] = None - error_message: Optional[str] = None - started_at: Optional[str] = None - completed_at: Optional[str] = None + method: AccessibleVideoMethod | None = None + error_message: str | None = None + started_at: str | None = None + completed_at: str | None = None # === QC Review Accessible Video Editing Schemas === @@ -131,8 +130,8 @@ class PausePointResponse(BaseModel): """Pause point timing data for QC editing.""" cue_index: int = Field(..., description="AD cue index this pause point belongs to") original_ms: float = Field(..., description="Rendered timeline position (ms) - for display") - source_ms: Optional[float] = Field(None, description="Source video cut point (ms) - for re-rendering (None = use original_ms)") - adjusted_ms: Optional[float] = Field(None, description="User-adjusted timestamp (ms)") + source_ms: float | None = Field(None, description="Source video cut point (ms) - for re-rendering (None = use original_ms)") + adjusted_ms: float | None = Field(None, description="User-adjusted timestamp (ms)") min_bound_ms: float = Field(..., description="Minimum allowed value (ms)") max_bound_ms: float = Field(..., description="Maximum allowed value (ms)") @@ -145,16 +144,16 @@ class VideoSegmentResponse(BaseModel): gcs_uri: str = Field(..., description="GCS path to segment MP4") duration_ms: float = Field(..., description="Actual segment duration (ms)") is_freeze_frame: bool = Field(False, description="True if freeze frame with AD audio") - cue_index: Optional[int] = Field(None, description="AD cue index (freeze frames only)") + cue_index: int | None = Field(None, description="AD cue index (freeze frames only)") class TTSRegenerationItem(BaseModel): """A queued TTS regeneration request.""" cue_index: int = Field(..., description="AD cue index to regenerate") requested_at: str = Field(..., description="ISO timestamp when requested") - new_text: Optional[str] = Field(None, description="Override text (if provided)") + new_text: str | None = Field(None, description="Override text (if provided)") status: str = Field("pending", description="pending | processing | completed | failed") - error_message: Optional[str] = None + error_message: str | None = None class AccessibleVideoEditStateResponse(BaseModel): @@ -171,12 +170,12 @@ class AccessibleVideoEditStateResponse(BaseModel): default_factory=list, description="Queued TTS regeneration requests" ) - last_render_at: Optional[str] = Field( + last_render_at: str | None = Field( None, description="ISO timestamp of last accessible video render" ) total_duration_ms: float = Field(..., description="Total accessible video duration (ms)") - accessible_video_url: Optional[str] = Field( + accessible_video_url: str | None = Field( None, description="Signed URL for accessible video preview" ) diff --git a/backend/app/schemas/auth.py b/backend/app/schemas/auth.py index dee4e27..b71de2e 100644 --- a/backend/app/schemas/auth.py +++ b/backend/app/schemas/auth.py @@ -1,6 +1,7 @@ -from typing import Optional + from pydantic import BaseModel, EmailStr -from ..models.user import UserRole, AuthProvider + +from ..models.user import AuthProvider, UserRole class LoginRequest(BaseModel): @@ -52,7 +53,7 @@ class UserResponse(BaseModel): role: UserRole auth_provider: AuthProvider is_active: bool - created_at: Optional[str] = None + created_at: str | None = None pm_client_ids: list[str] = [] languages: list[str] = [] # BCP-47 codes for R-8 linguist competence check @@ -72,10 +73,10 @@ class CreateUserRequest(BaseModel): class UpdateUserRequest(BaseModel): - email: Optional[EmailStr] = None - full_name: Optional[str] = None - role: Optional[UserRole] = None - is_active: Optional[bool] = None + email: EmailStr | None = None + full_name: str | None = None + role: UserRole | None = None + is_active: bool | None = None class ChangePasswordRequest(BaseModel): diff --git a/backend/app/schemas/file.py b/backend/app/schemas/file.py index f195b37..6e64ae1 100644 --- a/backend/app/schemas/file.py +++ b/backend/app/schemas/file.py @@ -1,4 +1,3 @@ -from typing import Optional from pydantic import BaseModel @@ -6,10 +5,10 @@ from pydantic import BaseModel class SignedUploadRequest(BaseModel): filename: str content_type: str - max_size: Optional[int] = None + max_size: int | None = None class SignedUploadResponse(BaseModel): upload_url: str fields: dict[str, str] - blob_path: str \ No newline at end of file + blob_path: str diff --git a/backend/app/schemas/job.py b/backend/app/schemas/job.py index a75feac..73a7699 100644 --- a/backend/app/schemas/job.py +++ b/backend/app/schemas/job.py @@ -1,4 +1,4 @@ -from typing import Any, Literal, Optional, Union +from typing import Any from pydantic import BaseModel @@ -15,18 +15,18 @@ from ..schemas.accessible_video import AccessibleVideoMethod class JobResponse(BaseModel): id: str - client_id: Optional[str] = None # ID of the user who created the job + client_id: str | None = None # ID of the user who created the job title: str status: JobStatus source: dict[str, Any] requested_outputs: RequestedOutputs review: Review - outputs: Optional[dict[str, LangOutput]] = None - accessible_video_progress: Optional[dict[str, AccessibleVideoProgressItem]] = None - created_at: Optional[str] = None - updated_at: Optional[str] = None - created_by_name: Optional[str] = None # User's full_name who created the job - cost_tracker_project_id: Optional[str] = None + outputs: dict[str, LangOutput] | None = None + accessible_video_progress: dict[str, AccessibleVideoProgressItem] | None = None + created_at: str | None = None + updated_at: str | None = None + created_by_name: str | None = None # User's full_name who created the job + cost_tracker_project_id: str | None = None class JobListResponse(BaseModel): @@ -42,20 +42,20 @@ class JobCreateRequest(BaseModel): class JobUpdateRequest(BaseModel): - title: Optional[str] = None - review_notes: Optional[str] = None - cost_tracker_project_id: Optional[str] = None + title: str | None = None + review_notes: str | None = None + cost_tracker_project_id: str | None = None class ApproveEnglishRequest(BaseModel): - notes: Optional[str] = None + notes: str | None = None class ApproveSourceRequest(BaseModel): """Request to approve source language content (works for any language)""" - notes: Optional[str] = None - tts_preferences: Optional[TTSPreferences] = None # Override TTS voice settings - accessible_video_method: Optional[AccessibleVideoMethod] = None # User-selected method for accessible video + notes: str | None = None + tts_preferences: TTSPreferences | None = None # Override TTS voice settings + accessible_video_method: AccessibleVideoMethod | None = None # User-selected method for accessible video class UpdateTTSPreferencesRequest(BaseModel): @@ -68,14 +68,14 @@ class RejectJobRequest(BaseModel): class CompleteJobRequest(BaseModel): - notes: Optional[str] = None + notes: str | None = None class VttUpdateRequest(BaseModel): - captions_vtt: Optional[str] = None - audio_description_vtt: Optional[str] = None - language: Optional[str] = None # If None, defaults to source language - if_match: Optional[str] = None # Optimistic locking — SHA1 of expected current content + captions_vtt: str | None = None + audio_description_vtt: str | None = None + language: str | None = None # If None, defaults to source language + if_match: str | None = None # Optimistic locking — SHA1 of expected current content class VttTimingAdjustRequest(BaseModel): @@ -86,14 +86,14 @@ class VttTimingAdjustRequest(BaseModel): class JobDownloadsResponse(BaseModel): - downloads: dict[str, Union[dict[str, str], str]] # language -> {file_type: signed_url} OR source_video -> signed_url + downloads: dict[str, dict[str, str] | str] # language -> {file_type: signed_url} OR source_video -> signed_url class VttContentResponse(BaseModel): - captions_vtt: Optional[str] = None - audio_description_vtt: Optional[str] = None - retimed_captions_vtt: Optional[str] = None # Re-timed captions for accessible videos - etag: Optional[str] = None # SHA1 hash for optimistic locking (If-Match on PATCH) + captions_vtt: str | None = None + audio_description_vtt: str | None = None + retimed_captions_vtt: str | None = None # Re-timed captions for accessible videos + etag: str | None = None # SHA1 hash for optimistic locking (If-Match on PATCH) class AssetValidationResponse(BaseModel): @@ -119,9 +119,9 @@ class BulkDeleteResponse(BaseModel): class BulkApproveRequest(BaseModel): """Request to bulk approve multiple jobs with optional settings""" job_ids: list[str] - notes: Optional[str] = None - accessible_video_method: Optional[AccessibleVideoMethod] = None # Method for accessible video - tts_preferences: Optional[TTSPreferences] = None + notes: str | None = None + accessible_video_method: AccessibleVideoMethod | None = None # Method for accessible video + tts_preferences: TTSPreferences | None = None class BulkApproveResponse(BaseModel): diff --git a/backend/app/schemas/review_note.py b/backend/app/schemas/review_note.py index ecd922c..e524894 100644 --- a/backend/app/schemas/review_note.py +++ b/backend/app/schemas/review_note.py @@ -1,7 +1,6 @@ """Pydantic schemas for Review Note API requests and responses.""" from datetime import datetime -from typing import Optional from pydantic import BaseModel, Field @@ -31,7 +30,7 @@ class ReviewNoteResponse(BaseModel): user_id: str user_name: str created_at: str # ISO format - updated_at: Optional[str] = None # ISO format + updated_at: str | None = None # ISO format @classmethod def from_model(cls, note: dict) -> "ReviewNoteResponse": diff --git a/backend/app/services/audit_logger.py b/backend/app/services/audit_logger.py index 39261b4..81378de 100644 --- a/backend/app/services/audit_logger.py +++ b/backend/app/services/audit_logger.py @@ -2,19 +2,19 @@ import uuid from datetime import datetime, timedelta -from typing import Any, Dict, List, Optional +from typing import Any + from fastapi import Request from motor.motor_asyncio import AsyncIOMotorCollection -from app.core.database import get_database from app.core.config import get_settings +from app.core.database import get_database from app.models.audit_log import ( - AuditLog, - AuditLogCreate, - AuditLogQuery, + AuditAction, + AuditLog, + AuditLogQuery, AuditLogResponse, - AuditAction, - AuditLogSeverity + AuditLogSeverity, ) from app.models.user import User from app.telemetry.tracing import trace_async_operation @@ -22,32 +22,32 @@ from app.telemetry.tracing import trace_async_operation class AuditLogger: """Service for managing audit logs.""" - + def __init__(self): self.settings = get_settings() - self.collection: Optional[AsyncIOMotorCollection] = None - + self.collection: AsyncIOMotorCollection | None = None + async def _get_collection(self) -> AsyncIOMotorCollection: """Get the audit logs collection.""" if self.collection is None: db = await get_database() self.collection = db.audit_logs return self.collection - + @trace_async_operation("audit_logger.log_action") async def log_action( self, action: AuditAction, description: str, - user: Optional[User] = None, - request: Optional[Request] = None, - resource_type: Optional[str] = None, - resource_id: Optional[str] = None, - resource_name: Optional[str] = None, - details: Optional[Dict[str, Any]] = None, + user: User | None = None, + request: Request | None = None, + resource_type: str | None = None, + resource_id: str | None = None, + resource_name: str | None = None, + details: dict[str, Any] | None = None, severity: AuditLogSeverity = AuditLogSeverity.INFO, success: bool = True, - error_message: Optional[str] = None + error_message: str | None = None ) -> str: """ Log an audit event. @@ -55,12 +55,12 @@ class AuditLogger: Returns: The ID of the created audit log entry. """ - + # Extract request context ip_address = None user_agent = None request_id = None - + if request: # Get IP address (handle forwarded headers) forwarded_for = request.headers.get("X-Forwarded-For") @@ -68,10 +68,10 @@ class AuditLogger: ip_address = forwarded_for.split(',')[0].strip() elif request.client: ip_address = request.client.host - + user_agent = request.headers.get("User-Agent") request_id = request.headers.get("X-Request-ID", str(uuid.uuid4())) - + # Create audit log entry audit_log = AuditLog( action=action, @@ -93,22 +93,22 @@ class AuditLogger: service_name="accessible-video-api", api_version="v1" ) - + # Save to database collection = await self._get_collection() result = await collection.insert_one(audit_log.dict(by_alias=True)) - + return str(result.inserted_id) - + @trace_async_operation("audit_logger.query_logs") async def query_logs(self, query: AuditLogQuery) -> AuditLogResponse: """Query audit logs with filtering and pagination.""" - + collection = await self._get_collection() - + # Build MongoDB query mongo_query = {} - + # Time range filter if query.start_date or query.end_date: timestamp_filter = {} @@ -117,7 +117,7 @@ class AuditLogger: if query.end_date: timestamp_filter["$lte"] = query.end_date mongo_query["timestamp"] = timestamp_filter - + # Exact match filters if query.action: mongo_query["action"] = query.action @@ -136,7 +136,7 @@ class AuditLogger: mongo_query["resource_id"] = query.resource_id if query.success is not None: mongo_query["success"] = query.success - + # Text search if query.search: mongo_query["$or"] = [ @@ -144,23 +144,23 @@ class AuditLogger: {"details": {"$regex": query.search, "$options": "i"}}, {"error_message": {"$regex": query.search, "$options": "i"}} ] - + # Get total count total_count = await collection.count_documents(mongo_query) - + # Execute query with pagination and sorting cursor = collection.find(mongo_query) - + # Apply sorting sort_direction = query.sort_order cursor = cursor.sort(query.sort_by, sort_direction) - + # Apply pagination cursor = cursor.skip(query.skip).limit(query.limit) - + # Execute query documents = await cursor.to_list(length=query.limit) - + # Convert to Pydantic models logs = [] for doc in documents: @@ -170,11 +170,11 @@ class AuditLogger: # Log conversion error but continue print(f"Error converting audit log document: {e}") continue - + # Calculate pagination info page = (query.skip // query.limit) + 1 has_more = (query.skip + len(logs)) < total_count - + return AuditLogResponse( logs=logs, total_count=total_count, @@ -182,14 +182,14 @@ class AuditLogger: page_size=len(logs), has_more=has_more ) - - async def get_user_activity(self, user_id: str, days: int = 30) -> List[AuditLog]: + + async def get_user_activity(self, user_id: str, days: int = 30) -> list[AuditLog]: """Get recent activity for a specific user.""" - + from_date = datetime.utcnow().replace( hour=0, minute=0, second=0, microsecond=0 ) - timedelta(days=days) - + query = AuditLogQuery( user_id=user_id, start_date=from_date, @@ -197,15 +197,15 @@ class AuditLogger: sort_by="timestamp", sort_order=-1 ) - + response = await self.query_logs(query) return response.logs - - async def get_security_events(self, hours: int = 24) -> List[AuditLog]: + + async def get_security_events(self, hours: int = 24) -> list[AuditLog]: """Get recent security-related events.""" - + from_date = datetime.utcnow() - timedelta(hours=hours) - + security_actions = [ AuditAction.LOGIN_FAILURE, AuditAction.RATE_LIMIT_EXCEEDED, @@ -213,38 +213,38 @@ class AuditLogger: AuditAction.UNAUTHORIZED_ACCESS, AuditAction.SUSPICIOUS_ACTIVITY ] - + collection = await self._get_collection() - + query = { "timestamp": {"$gte": from_date}, "action": {"$in": security_actions} } - + cursor = collection.find(query).sort("timestamp", -1).limit(1000) documents = await cursor.to_list(length=1000) - + logs = [] for doc in documents: try: logs.append(AuditLog(**doc)) except Exception: continue - + return logs - + async def cleanup_old_logs(self, retention_days: int = 365) -> int: """Clean up audit logs older than retention period.""" - + cutoff_date = datetime.utcnow().replace( hour=0, minute=0, second=0, microsecond=0 ) - timedelta(days=retention_days) - + collection = await self._get_collection() result = await collection.delete_many({ "timestamp": {"$lt": cutoff_date} }) - + return result.deleted_count @@ -277,16 +277,16 @@ async def log_auth_failure(email: str, request: Request, reason: str): ) -async def log_job_action(action: AuditAction, job_id: str, user: User, request: Request, details: Optional[Dict] = None): +async def log_job_action(action: AuditAction, job_id: str, user: User, request: Request, details: dict | None = None): """Log job-related actions.""" action_descriptions = { AuditAction.JOB_CREATE: "Job created", - AuditAction.JOB_APPROVE: "Job approved", + AuditAction.JOB_APPROVE: "Job approved", AuditAction.JOB_REJECT: "Job rejected", AuditAction.JOB_CANCEL: "Job cancelled", AuditAction.JOB_UPDATE: "Job updated" } - + await audit_logger.log_action( action=action, description=f"{action_descriptions.get(action, str(action))} by {user.email}", @@ -298,7 +298,7 @@ async def log_job_action(action: AuditAction, job_id: str, user: User, request: ) -async def log_user_management(action: AuditAction, target_user_id: str, admin_user: User, request: Request, details: Optional[Dict] = None): +async def log_user_management(action: AuditAction, target_user_id: str, admin_user: User, request: Request, details: dict | None = None): """Log user management actions.""" action_descriptions = { AuditAction.USER_CREATE: "User created", @@ -308,7 +308,7 @@ async def log_user_management(action: AuditAction, target_user_id: str, admin_us AuditAction.USER_ACTIVATE: "User activated", AuditAction.USER_DEACTIVATE: "User deactivated" } - + await audit_logger.log_action( action=action, description=f"{action_descriptions.get(action, str(action))} by admin {admin_user.email}", @@ -321,7 +321,7 @@ async def log_user_management(action: AuditAction, target_user_id: str, admin_us ) -async def log_security_event(action: AuditAction, description: str, request: Request, user: Optional[User] = None, details: Optional[Dict] = None): +async def log_security_event(action: AuditAction, description: str, request: Request, user: User | None = None, details: dict | None = None): """Log security-related events.""" await audit_logger.log_action( action=action, @@ -331,4 +331,4 @@ async def log_security_event(action: AuditAction, description: str, request: Req severity=AuditLogSeverity.WARNING if action != AuditAction.SUSPICIOUS_ACTIVITY else AuditLogSeverity.CRITICAL, success=False, details=details - ) \ No newline at end of file + ) diff --git a/backend/app/services/cloud_run_dispatch.py b/backend/app/services/cloud_run_dispatch.py index b22f21d..a9f9bc2 100644 --- a/backend/app/services/cloud_run_dispatch.py +++ b/backend/app/services/cloud_run_dispatch.py @@ -43,9 +43,10 @@ async def dispatch(task: str, job_id: str, **extra_args: str | list) -> str: if _USE_CELERY: return _celery_fallback(task, job_id, **extra_args) - from ..core.config import settings from google.cloud import run_v2 # type: ignore[import] + from ..core.config import settings + args = ["--task", task, "--job-id", job_id] for key, val in extra_args.items(): cli_key = f"--{key.replace('_', '-')}" diff --git a/backend/app/services/cost_tracker.py b/backend/app/services/cost_tracker.py index a7c014c..7986ee3 100644 --- a/backend/app/services/cost_tracker.py +++ b/backend/app/services/cost_tracker.py @@ -1,7 +1,6 @@ """Thin HTTP client for the centralized Oliver AI Cost Tracker.""" import asyncio -from typing import Optional import httpx @@ -19,7 +18,7 @@ def preflight( *, model: str, user_external_id: str, - project_id: Optional[str] = None, + project_id: str | None = None, ) -> None: if not settings.cost_tracker_base_url or not settings.cost_tracker_enabled: return @@ -51,7 +50,7 @@ async def aio_preflight( *, model: str, user_external_id: str, - project_id: Optional[str] = None, + project_id: str | None = None, ) -> None: await asyncio.to_thread(preflight, model=model, user_external_id=user_external_id, project_id=project_id) @@ -61,11 +60,11 @@ def record( model: str, provider: str, user_external_id: str, - project_id: Optional[str] = None, + project_id: str | None = None, job_external_id: str = "", input_tokens: int = 0, output_tokens: int = 0, - chars: Optional[int] = None, + chars: int | None = None, latency_ms: int = 0, status: str = "success", ) -> None: diff --git a/backend/app/services/descriptive_transcript.py b/backend/app/services/descriptive_transcript.py index 6b07e78..aef3426 100644 --- a/backend/app/services/descriptive_transcript.py +++ b/backend/app/services/descriptive_transcript.py @@ -16,8 +16,8 @@ Format: Reference: WCAG 2.1 Success Criterion 1.2.1 """ -from ..lib.vtt import VTTCue, VTTParser from ..core.logging import get_logger +from ..lib.vtt import VTTCue, VTTParser logger = get_logger(__name__) diff --git a/backend/app/services/elevenlabs_voices.py b/backend/app/services/elevenlabs_voices.py index 873821c..dde106c 100644 --- a/backend/app/services/elevenlabs_voices.py +++ b/backend/app/services/elevenlabs_voices.py @@ -6,7 +6,6 @@ Fetches and caches available voices from the ElevenLabs API. import time from dataclasses import dataclass, field -from typing import Optional import aiohttp @@ -90,7 +89,7 @@ class ElevenLabsVoiceService: return voices - async def get_voice_by_id(self, voice_id: str) -> Optional[ElevenLabsVoice]: + async def get_voice_by_id(self, voice_id: str) -> ElevenLabsVoice | None: """Look up a specific voice by ID.""" voices = await self.get_voices() for v in voices: diff --git a/backend/app/services/ffmpeg_http_service.py b/backend/app/services/ffmpeg_http_service.py index afed87f..ffde5bf 100644 --- a/backend/app/services/ffmpeg_http_service.py +++ b/backend/app/services/ffmpeg_http_service.py @@ -13,8 +13,6 @@ import logging import os import subprocess import tempfile -import uuid -from typing import Any, Optional from fastapi import FastAPI, HTTPException from google.cloud import storage diff --git a/backend/app/services/gemini.py b/backend/app/services/gemini.py index b281175..fbb42a1 100644 --- a/backend/app/services/gemini.py +++ b/backend/app/services/gemini.py @@ -1,8 +1,8 @@ -import json import asyncio +import json import time from pathlib import Path -from typing import Any, Optional +from typing import Any import google.genai as genai @@ -21,7 +21,7 @@ async def _record_gemini_usage( model: str, user_id: str, job_id: str, - project_id: Optional[str], + project_id: str | None, elapsed_ms: int, ) -> None: try: @@ -61,31 +61,31 @@ class GeminiService: """Wait for uploaded file to become ACTIVE state""" wait_time = 1 # Start with 1 second total_waited = 0 - + while total_waited < max_wait_seconds: try: # Get file status - use asyncio.to_thread to avoid blocking event loop file_info = await asyncio.to_thread(client.files.get, name=file_name) logger.info(f"File {file_name} status: {file_info.state} (waited {total_waited}s)") - + if file_info.state == "ACTIVE": logger.info(f"File {file_name} is now ACTIVE!") return True elif file_info.state == "FAILED": logger.error(f"File {file_name} processing FAILED") return False - + # Wait with exponential backoff (max 30s) logger.info(f"File not ready, waiting {wait_time}s...") await asyncio.sleep(wait_time) total_waited += wait_time wait_time = min(wait_time * 1.5, 30) # Exponential backoff, max 30s - + except Exception as e: logger.error(f"Error checking file status: {e}") await asyncio.sleep(5) # Wait 5s on error total_waited += 5 - + logger.error(f"File {file_name} did not become ACTIVE within {max_wait_seconds}s") return False @@ -107,13 +107,13 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w - Maintain the same timestamp format as captions_vtt (HH:MM:SS.mmm --> HH:MM:SS.mmm) - Only add sound effect cues where they add meaningful context; do not annotate every minor sound""" - def _build_glossary_block(self, glossary_block: Optional[str]) -> str: + def _build_glossary_block(self, glossary_block: str | None) -> str: """Return the pre-built glossary block (from glossary_service.build_glossary_prompt_block), or empty string.""" if glossary_block and glossary_block.strip(): return glossary_block.strip() return "" - def _build_brand_context_block(self, brand_context: Optional[str]) -> str: + def _build_brand_context_block(self, brand_context: str | None) -> str: """Build the brand context instruction block for injection into prompts.""" if brand_context and brand_context.strip(): brands = [b.strip() for b in brand_context.split(",") if b.strip()] @@ -125,7 +125,7 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w ) return "No specific brand names have been provided for this video." - async def extract_accessibility(self, video_file_path: str, brand_context: Optional[str] = None, sdh_requested: bool = False, glossary_block: Optional[str] = None, _cost_ctx: Optional[dict] = None) -> dict[str, Any]: + async def extract_accessibility(self, video_file_path: str, brand_context: str | None = None, sdh_requested: bool = False, glossary_block: str | None = None, _cost_ctx: dict | None = None) -> dict[str, Any]: """ Extract captions and audio descriptions from video using Gemini 2.0 Returns structured JSON with transcript, captions VTT, and audio description VTT @@ -142,7 +142,7 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w try: logger.info(f"Starting Gemini processing for video: {video_file_path}") - + # Upload video file to Gemini using new API - use asyncio.to_thread to avoid blocking logger.info("Uploading video file to Gemini API...") uploaded_file = await asyncio.to_thread( @@ -154,13 +154,13 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w } ) logger.info(f"Successfully uploaded file: {uploaded_file.name} (URI: {uploaded_file.uri})") - + # Wait for file to become ACTIVE before using it logger.info("Waiting for file to become ACTIVE...") file_ready = await self._wait_for_file_active(uploaded_file.name) if not file_ready: raise Exception("File failed to become ACTIVE within timeout") - + # Generate content using new API - use asyncio.to_thread to avoid blocking logger.info("Generating content with Gemini model...") _t0 = time.monotonic() @@ -191,10 +191,10 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w if response_text.startswith("```json"): response_text = response_text.replace("```json", "").replace("```", "").strip() logger.info("Cleaned markdown formatting from response") - + # Additional cleanup for common JSON issues response_text = response_text.strip() - + logger.info("Parsing JSON response...") try: result = json.loads(response_text) @@ -253,7 +253,7 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w async def _self_heal_response(self, video_file_path: str, invalid_response: str) -> dict[str, Any]: """Attempt to self-heal invalid JSON response from Gemini""" logger.info("Attempting to self-heal JSON response without re-uploading video") - + # Try to fix common JSON issues first try: fixed_response = self._attempt_json_fix(invalid_response) @@ -262,7 +262,7 @@ Generate sdh_captions_vtt using the same cue timings as captions_vtt, enriched w return fixed_response except Exception as e: logger.warning(f"JSON fix attempt failed: {e}") - + # If simple fixes don't work, try a text-only self-heal prompt with more context self_heal_prompt = f""" SYSTEM: You are a JSON repair service. Fix the malformed JSON below and return ONLY the corrected JSON. @@ -289,19 +289,19 @@ Fix the JSON and return it: ) response_text = response.text.strip() - + # Handle potential markdown formatting if response_text.startswith("```json"): response_text = response_text.replace("```json", "").replace("```", "").strip() result = json.loads(response_text) - + # Validate that all required fields are present after healing required_fields = [ "language", "confidence", "summary", "transcript_plaintext", "captions_vtt", "audio_description_vtt" ] - + missing_fields = [field for field in required_fields if field not in result] if missing_fields: logger.error(f"Self-heal lost required fields: {missing_fields}") @@ -309,12 +309,12 @@ Fix the JSON and return it: if "audio_description_vtt" in missing_fields: logger.info("Creating fallback audio_description_vtt") result["audio_description_vtt"] = "WEBVTT\n\n00:00:00.000 --> 00:00:05.000\nVideo content with visual elements described." - + # If other critical fields are missing, raise an error remaining_missing = [f for f in missing_fields if f != "audio_description_vtt"] if remaining_missing: raise ValueError(f"Self-heal failed to preserve required fields: {remaining_missing}") - + logger.info("Successfully self-healed Gemini response with all required fields") return result @@ -326,10 +326,10 @@ Fix the JSON and return it: self, video_file_path: str, target_language: str, - brand_context: Optional[str] = None, + brand_context: str | None = None, sdh_requested: bool = False, - glossary_block: Optional[str] = None, - _cost_ctx: Optional[dict] = None, + glossary_block: str | None = None, + _cost_ctx: dict | None = None, ) -> dict[str, Any]: """ Extract captions and audio descriptions from video using Gemini, @@ -541,17 +541,17 @@ Fix the JSON and return it: fixes_tried = [] fixed_text = json_text import re - + # Fix 1: Remove trailing commas fixed_text = re.sub(r',(\s*[}\]])', r'\1', fixed_text) fixes_tried.append("removed trailing commas") - + # Fix 2: Try to fix unterminated strings by adding closing quote and brace if fixed_text.count('"') % 2 != 0: # Odd number of quotes suggests unterminated string # Find the last quote and see if we need to close the JSON last_quote_pos = fixed_text.rfind('"') remainder = fixed_text[last_quote_pos + 1:].strip() - + # If there's no closing brace after the last quote, try to fix it if remainder and not remainder.endswith('}'): # Try to intelligently close the JSON @@ -562,27 +562,27 @@ Fix the JSON and return it: else: fixed_text += '"' fixes_tried.append("closed unterminated string") - + # Fix 3: Ensure JSON ends with closing brace if not fixed_text.rstrip().endswith('}'): fixed_text = fixed_text.rstrip() + '\n}' fixes_tried.append("added closing brace") - + try: result = json.loads(fixed_text) logger.info(f"JSON fixed with: {', '.join(fixes_tried)}") - + # Validate that we have the required fields required_fields = [ "language", "confidence", "summary", "transcript_plaintext", "captions_vtt", "audio_description_vtt" ] - + missing_fields = [field for field in required_fields if field not in result] if missing_fields: logger.warning(f"Fixed JSON is missing required fields: {missing_fields}") return None # Let the more advanced self-healing handle this - + return result except json.JSONDecodeError as e: logger.debug(f"JSON fix attempt failed: {e}") @@ -765,9 +765,9 @@ Fix the JSON and return it: captions_vtt: str, ad_vtt: str, target_language: str, - brief: Optional[str] = None, - glossary_block: Optional[str] = None, - _cost_ctx: Optional[dict] = None, + brief: str | None = None, + glossary_block: str | None = None, + _cost_ctx: dict | None = None, ) -> dict[str, str]: """ Transcreate English VTT content to target language with cultural adaptation @@ -829,8 +829,8 @@ JSON: vtt_content: str, target_language: str, source_language: str = "en", - glossary_block: Optional[str] = None, - _cost_ctx: Optional[dict] = None, + glossary_block: str | None = None, + _cost_ctx: dict | None = None, ) -> str: """ Translate VTT content using Gemini, preserving timing programmatically. @@ -841,7 +841,7 @@ JSON: This avoids any possibility of Gemini drifting or altering timestamps. """ - from ..lib.vtt import VTTParser, VTTEditor + from ..lib.vtt import VTTEditor, VTTParser source_cues = VTTParser.parse(vtt_content) if not source_cues: @@ -939,7 +939,7 @@ Segments to translate: self, original_text: str, language: str = "en", - _cost_ctx: Optional[dict] = None, + _cost_ctx: dict | None = None, ) -> str: """ Rewrite an audio description cue to be TTS-friendly. diff --git a/backend/app/services/glossary_service.py b/backend/app/services/glossary_service.py index 18d7b39..53fb23b 100644 --- a/backend/app/services/glossary_service.py +++ b/backend/app/services/glossary_service.py @@ -26,7 +26,6 @@ from ..models.glossary import ( EmbeddingStatus, Glossary, GlossaryStatus, - GlossaryTerm, GlossaryVersion, MatchedTerm, glossary_from_doc, diff --git a/backend/app/services/language_qc.py b/backend/app/services/language_qc.py index b6b8634..55e3e81 100644 --- a/backend/app/services/language_qc.py +++ b/backend/app/services/language_qc.py @@ -2,7 +2,7 @@ import asyncio from datetime import datetime -from typing import Any, Optional +from typing import Any from uuid import uuid4 from fastapi import HTTPException @@ -98,13 +98,13 @@ def _rebuild_qc_assignments(language_qc: dict) -> list[dict]: def _qc_recipients( job_doc: dict, lang_state: dict, - exclude_user_id: Optional[str], + exclude_user_id: str | None, ) -> list[tuple[str, str]]: """Return [(email, full_name)] for linguist + reviewer assigned to a language, minus the actor.""" seen: set[str] = set() result: list[tuple[str, str]] = [] - def _add(email: Optional[str], name: Optional[str]) -> None: + def _add(email: str | None, name: str | None) -> None: if email and email not in seen and email != exclude_user_id: seen.add(email) result.append((email, name or email.split("@")[0])) @@ -183,7 +183,7 @@ async def auto_assign_defaults(db: AsyncIOMotorDatabase, job_id: str) -> int: # ── Core mutations ──────────────────────────────────────────────────────────── -async def get_state(db: AsyncIOMotorDatabase, job_id: str, lang: str) -> Optional[LanguageQCState]: +async def get_state(db: AsyncIOMotorDatabase, job_id: str, lang: str) -> LanguageQCState | None: job_doc = await db[_JOBS].find_one({"_id": job_id}, {f"language_qc.{lang}": 1}) if not job_doc: return None @@ -213,8 +213,8 @@ async def assign_linguist( actor: User, *, http_request=None, - notes: Optional[str] = None, - deadline: Optional[datetime] = None, + notes: str | None = None, + deadline: datetime | None = None, ) -> LanguageQCState: """PM/PROD/ADMIN assigns a linguist to a language. Creates per-lang state if missing.""" job_doc = await db[_JOBS].find_one({"_id": job_id}) @@ -310,8 +310,8 @@ async def reassign_linguist( actor: User, *, http_request=None, - notes: Optional[str] = None, - deadline: Optional[datetime] = None, + notes: str | None = None, + deadline: datetime | None = None, ) -> LanguageQCState: """Currently-assigned linguist OR PM/PROD/ADMIN hands off to a colleague.""" job_doc = await db[_JOBS].find_one({"_id": job_id}) @@ -340,8 +340,8 @@ async def assign_reviewer( actor: User, *, http_request=None, - notes: Optional[str] = None, - deadline: Optional[datetime] = None, + notes: str | None = None, + deadline: datetime | None = None, ) -> LanguageQCState: """PM/PROD/ADMIN assigns a reviewer to a language.""" job_doc = await db[_JOBS].find_one({"_id": job_id}) @@ -426,8 +426,8 @@ async def reassign_reviewer( actor: User, *, http_request=None, - notes: Optional[str] = None, - deadline: Optional[datetime] = None, + notes: str | None = None, + deadline: datetime | None = None, ) -> LanguageQCState: if actor.role not in (UserRole.PRODUCTION, UserRole.ADMIN, UserRole.PROJECT_MANAGER): raise HTTPException(status_code=403, detail="Only PM/PROD/ADMIN can reassign reviewer") @@ -627,7 +627,7 @@ async def approve_language( actor: User, *, http_request=None, - notes: Optional[str] = None, + notes: str | None = None, ) -> LanguageQCState: job_doc = await db[_JOBS].find_one({"_id": job_id}) if not job_doc: @@ -801,7 +801,7 @@ async def reopen_language( actor: User, *, http_request=None, - notes: Optional[str] = None, + notes: str | None = None, ) -> LanguageQCState: """PROD/ADMIN only — resets an approved language back to pending for re-review.""" if actor.role not in (UserRole.PRODUCTION, UserRole.ADMIN): @@ -961,7 +961,7 @@ async def list_for_linguist( linguist_id: str, *, accessible_org_ids: list[str] | None = None, - status_filter: Optional[str] = None, + status_filter: str | None = None, skip: int = 0, limit: int = 50, ) -> list[dict]: @@ -987,7 +987,7 @@ async def list_for_reviewer( reviewer_id: str, *, accessible_org_ids: list[str] | None = None, - status_filter: Optional[str] = None, + status_filter: str | None = None, skip: int = 0, limit: int = 50, ) -> list[dict]: diff --git a/backend/app/services/membership_service.py b/backend/app/services/membership_service.py index 1650dc8..fce4b02 100644 --- a/backend/app/services/membership_service.py +++ b/backend/app/services/membership_service.py @@ -1,16 +1,15 @@ """Membership service — queries the memberships collection.""" -from datetime import datetime, timezone -from typing import Optional +from datetime import UTC, datetime from motor.motor_asyncio import AsyncIOMotorDatabase -from ..models.membership import Membership, MemberDetail +from ..models.membership import MemberDetail, Membership from ..models.organization import OrgRole def _now() -> datetime: - return datetime.now(timezone.utc) + return datetime.now(UTC) def _membership_from_doc(doc: dict) -> Membership: @@ -36,7 +35,7 @@ async def get_membership( user_id: str, organization_id: str, db: AsyncIOMotorDatabase, -) -> Optional[Membership]: +) -> Membership | None: doc = await db.memberships.find_one( {"user_id": user_id, "organization_id": organization_id} ) @@ -59,7 +58,7 @@ async def upsert_membership( user_id: str, organization_id: str, role_in_org: OrgRole, - created_by: Optional[str], + created_by: str | None, db: AsyncIOMotorDatabase, ) -> Membership: now = _now() diff --git a/backend/app/services/microsoft_auth.py b/backend/app/services/microsoft_auth.py index 77cd114..78139f8 100644 --- a/backend/app/services/microsoft_auth.py +++ b/backend/app/services/microsoft_auth.py @@ -3,7 +3,6 @@ Validates Microsoft ID tokens and extracts user information. """ import time -from typing import Dict, Optional import httpx from jose import JWTError, jwt @@ -50,11 +49,11 @@ class MicrosoftAuthService: self.openid_config_url = f"{self.authority}/v2.0/.well-known/openid-configuration" # Cache for JWKS (public keys) - self._jwks_cache: Optional[Dict] = None + self._jwks_cache: dict | None = None self._jwks_cache_time: float = 0 self._jwks_cache_ttl: int = 3600 # Cache for 1 hour - async def _get_openid_config(self) -> Dict: + async def _get_openid_config(self) -> dict: """Fetch OpenID Connect configuration from Microsoft.""" try: async with httpx.AsyncClient(timeout=10) as client: @@ -65,7 +64,7 @@ class MicrosoftAuthService: logger.error(f"Failed to fetch OpenID configuration: {e}") raise MicrosoftAuthError("Failed to fetch Microsoft authentication configuration") - async def _get_jwks(self, force_refresh: bool = False) -> Dict: + async def _get_jwks(self, force_refresh: bool = False) -> dict: """Fetch JSON Web Key Set (JWKS) from Microsoft. Args: @@ -121,7 +120,7 @@ class MicrosoftAuthService: if not kid: raise MicrosoftTokenValidationError("Token header missing 'kid' claim") - def _find_key(keys: list) -> Optional[Dict]: + def _find_key(keys: list) -> dict | None: for key in keys: if key.get('kid') == kid: return {'kty': key['kty'], 'kid': key['kid'], 'use': key.get('use'), diff --git a/backend/app/services/secrets_manager.py b/backend/app/services/secrets_manager.py index c337f88..f05e125 100644 --- a/backend/app/services/secrets_manager.py +++ b/backend/app/services/secrets_manager.py @@ -1,11 +1,10 @@ """Google Cloud Secret Manager integration service.""" -import os import asyncio -from typing import Dict, List, Optional, Any -from functools import lru_cache -from google.cloud import secretmanager +import os + from google.api_core import exceptions as gcp_exceptions +from google.cloud import secretmanager from app.core.config import get_settings from app.core.logging import get_logger @@ -21,14 +20,14 @@ class SecretManagerError(Exception): class SecretsManager: """Service for managing secrets via Google Cloud Secret Manager.""" - + def __init__(self): self.settings = get_settings() - self.client: Optional[secretmanager.SecretManagerServiceClient] = None + self.client: secretmanager.SecretManagerServiceClient | None = None self.project_id = self.settings.google_cloud_project - self._cache: Dict[str, str] = {} + self._cache: dict[str, str] = {} self._cache_ttl = 300 # 5 minutes cache - + def _get_client(self) -> secretmanager.SecretManagerServiceClient: """Get or create Secret Manager client.""" if not self.client: @@ -38,9 +37,9 @@ class SecretsManager: except Exception as e: logger.error(f"Failed to initialize Secret Manager client: {e}") raise SecretManagerError(f"Failed to initialize Secret Manager: {e}") - + return self.client - + @trace_async_operation("secrets_manager.get_secret") async def get_secret(self, secret_name: str, version: str = "latest") -> str: """ @@ -56,54 +55,54 @@ class SecretsManager: Raises: SecretManagerError: If secret cannot be retrieved """ - + cache_key = f"{secret_name}:{version}" - + # Check cache first if cache_key in self._cache: logger.debug(f"Secret {secret_name} retrieved from cache") return self._cache[cache_key] - + try: # Build the secret name name = f"projects/{self.project_id}/secrets/{secret_name}/versions/{version}" - + # Get the secret client = self._get_client() - + # Run in thread pool since Secret Manager client is synchronous loop = asyncio.get_event_loop() response = await loop.run_in_executor( - None, + None, client.access_secret_version, {"name": name} ) - + secret_value = response.payload.data.decode("UTF-8") - + # Cache the secret (with TTL handled by application restart) self._cache[cache_key] = secret_value - + logger.info(f"Successfully retrieved secret: {secret_name}") return secret_value - + except gcp_exceptions.NotFound: error_msg = f"Secret not found: {secret_name}" logger.error(error_msg) raise SecretManagerError(error_msg) - + except gcp_exceptions.PermissionDenied: error_msg = f"Permission denied accessing secret: {secret_name}" logger.error(error_msg) raise SecretManagerError(error_msg) - + except Exception as e: error_msg = f"Failed to retrieve secret {secret_name}: {e}" logger.error(error_msg) raise SecretManagerError(error_msg) - + @trace_async_operation("secrets_manager.get_secrets_batch") - async def get_secrets_batch(self, secret_names: List[str]) -> Dict[str, str]: + async def get_secrets_batch(self, secret_names: list[str]) -> dict[str, str]: """ Retrieve multiple secrets efficiently. @@ -113,17 +112,17 @@ class SecretsManager: Returns: Dictionary mapping secret names to their values """ - + secrets = {} tasks = [] - + for secret_name in secret_names: task = asyncio.create_task( self.get_secret(secret_name), name=f"get_secret_{secret_name}" ) tasks.append((secret_name, task)) - + # Wait for all tasks to complete for secret_name, task in tasks: try: @@ -132,10 +131,10 @@ class SecretsManager: logger.warning(f"Failed to retrieve secret {secret_name}: {e}") # Continue with other secrets continue - + return secrets - - async def create_secret(self, secret_name: str, secret_value: str, labels: Optional[Dict[str, str]] = None) -> str: + + async def create_secret(self, secret_name: str, secret_value: str, labels: dict[str, str] | None = None) -> str: """ Create a new secret in Secret Manager. @@ -147,19 +146,19 @@ class SecretsManager: Returns: The full secret resource name """ - + try: client = self._get_client() parent = f"projects/{self.project_id}" - + # Create the secret secret = { "labels": labels or {}, "replication": {"automatic": {}} } - + loop = asyncio.get_event_loop() - + # Create secret resource create_response = await loop.run_in_executor( None, @@ -170,7 +169,7 @@ class SecretsManager: "secret": secret } ) - + # Add secret version with the actual value version_response = await loop.run_in_executor( None, @@ -180,20 +179,20 @@ class SecretsManager: "payload": {"data": secret_value.encode("UTF-8")} } ) - + logger.info(f"Successfully created secret: {secret_name}") return version_response.name - + except gcp_exceptions.AlreadyExists: error_msg = f"Secret already exists: {secret_name}" logger.error(error_msg) raise SecretManagerError(error_msg) - + except Exception as e: error_msg = f"Failed to create secret {secret_name}: {e}" logger.error(error_msg) raise SecretManagerError(error_msg) - + def clear_cache(self) -> None: """Clear the secrets cache.""" self._cache.clear() @@ -234,7 +233,7 @@ async def get_redis_url() -> str: return url -async def get_jwt_secrets() -> Dict[str, str]: +async def get_jwt_secrets() -> dict[str, str]: """Get JWT secrets from Secret Manager.""" try: return await secrets_manager.get_secrets_batch([ @@ -249,22 +248,22 @@ async def get_jwt_secrets() -> Dict[str, str]: } -async def get_api_keys() -> Dict[str, str]: +async def get_api_keys() -> dict[str, str]: """Get all API keys from Secret Manager.""" api_keys = {} - + secret_names = [ "gemini-api-key", - "sendgrid-api-key", + "sendgrid-api-key", "elevenlabs-api-key", "sentry-dsn" ] - + try: api_keys = await secrets_manager.get_secrets_batch(secret_names) except SecretManagerError: logger.warning("Failed to retrieve some API keys from Secret Manager, using environment fallback") - + # Fallback to environment variables for missing keys env_mapping = { "gemini-api-key": "GEMINI_API_KEY", @@ -272,7 +271,7 @@ async def get_api_keys() -> Dict[str, str]: "elevenlabs-api-key": "ELEVENLABS_API_KEY", "sentry-dsn": "SENTRY_DSN" } - + for secret_name, env_var in env_mapping.items(): if secret_name not in api_keys: env_value = os.getenv(env_var) @@ -280,5 +279,5 @@ async def get_api_keys() -> Dict[str, str]: api_keys[secret_name] = env_value else: logger.warning(f"API key {secret_name} not available in secrets or environment") - - return api_keys \ No newline at end of file + + return api_keys diff --git a/backend/app/services/validation.py b/backend/app/services/validation.py index 453ff33..50ec49a 100644 --- a/backend/app/services/validation.py +++ b/backend/app/services/validation.py @@ -1,4 +1,4 @@ -from typing import Dict, List, Any +from typing import Any from ..core.logging import get_logger from ..lib.vtt import VTTEditor @@ -11,7 +11,7 @@ class AssetValidationService: """Service for validating job assets before completion""" @staticmethod - async def validate_job_assets(job_doc: Dict[str, Any]) -> tuple[bool, List[str]]: + async def validate_job_assets(job_doc: dict[str, Any]) -> tuple[bool, list[str]]: """ Validate all assets for a job before allowing completion Returns (is_valid, list_of_errors) @@ -19,7 +19,7 @@ class AssetValidationService: errors = [] outputs = job_doc.get("outputs", {}) requested_outputs = job_doc.get("requested_outputs", {}) - + if not outputs: errors.append("No outputs generated for this job") return False, errors @@ -88,13 +88,13 @@ class AssetValidationService: # Download and validate VTT content blob_path = gcs_uri.replace(f"gs://{gcs_service.bucket.name}/", "") blob = gcs_service.bucket.blob(blob_path) - + if not blob.exists(): return f"{asset_name} file not found in storage" vtt_content = blob.download_as_text() is_valid, vtt_errors = VTTEditor.validate_vtt(vtt_content) - + if not is_valid: return f"{asset_name} validation failed: {'; '.join(vtt_errors[:3])}" @@ -118,13 +118,13 @@ class AssetValidationService: try: blob_path = gcs_uri.replace(f"gs://{gcs_service.bucket.name}/", "") blob = gcs_service.bucket.blob(blob_path) - + if not blob.exists(): return f"{asset_name} file not found in storage" # Reload blob to get metadata (including size) blob.reload() - + # Check file size (should be reasonable for audio) size_mb = blob.size / (1024 * 1024) if blob.size else 0 if size_mb < 0.01: # Less than 10KB @@ -169,4 +169,4 @@ class AssetValidationService: # Global service instance -asset_validation_service = AssetValidationService() \ No newline at end of file +asset_validation_service = AssetValidationService() diff --git a/backend/app/services/video_renderer.py b/backend/app/services/video_renderer.py index 744fc2a..b031673 100644 --- a/backend/app/services/video_renderer.py +++ b/backend/app/services/video_renderer.py @@ -23,7 +23,6 @@ from google.oauth2 import id_token from ..core.config import settings from ..core.logging import get_logger from ..models.job import PausePointData, VideoSegmentMetadata -from ..schemas.accessible_video import AccessibleVideoMethod, GeminiAccessibleVideoAnalysis logger = get_logger(__name__) @@ -249,6 +248,7 @@ class VideoRendererService: FFmpegExecutionError: If the command fails """ from celery.result import allow_join_result + from ..tasks.ffmpeg_operations import run_ffmpeg_command # Dispatch to ffmpeg queue @@ -292,6 +292,7 @@ class VideoRendererService: FFmpegExecutionError: If the command fails """ from celery.result import allow_join_result + from ..tasks.ffmpeg_operations import run_ffprobe_command # Dispatch to ffmpeg queue @@ -478,7 +479,7 @@ class VideoRendererService: output_path ]) - logger.info(f"Running ffmpeg overlay command...") + logger.info("Running ffmpeg overlay command...") await self._run_ffmpeg(cmd) logger.info(f"Overlay render complete: {output_path}") diff --git a/backend/app/services/vtt_versioning.py b/backend/app/services/vtt_versioning.py index 389a99e..ebfaada 100644 --- a/backend/app/services/vtt_versioning.py +++ b/backend/app/services/vtt_versioning.py @@ -2,7 +2,6 @@ import difflib from datetime import datetime -from typing import Optional from motor.motor_asyncio import AsyncIOMotorDatabase @@ -54,8 +53,8 @@ async def create_version( kind: VttKind, content: str, user: User, - note: Optional[str] = None, - parent_version: Optional[int] = None, + note: str | None = None, + parent_version: int | None = None, ) -> VttVersionSummary: """Snapshot VTT content as a new immutable version. Returns summary (no content field).""" version_num = await _next_version(db, job_id, lang, kind) @@ -119,7 +118,7 @@ async def get_version( lang: str, kind: VttKind, version: int, -) -> Optional[VttVersion]: +) -> VttVersion | None: doc = await db[_VERSION_COLLECTION].find_one( {"job_id": job_id, "lang": lang, "kind": kind, "version": version} ) @@ -136,7 +135,7 @@ async def restore_version( kind: VttKind, version: int, user: User, -) -> Optional[VttVersionSummary]: +) -> VttVersionSummary | None: """Create a new version whose content is a copy of an older version (non-destructive).""" src = await get_version(db, job_id, lang, kind, version) if not src: diff --git a/backend/app/services/websocket.py b/backend/app/services/websocket.py index 8d4ba80..0f4130d 100644 --- a/backend/app/services/websocket.py +++ b/backend/app/services/websocket.py @@ -9,7 +9,7 @@ import asyncio import json import logging from datetime import datetime -from typing import Any, Optional +from typing import Any import redis.asyncio as redis from fastapi import WebSocket @@ -25,11 +25,11 @@ class JobStatusUpdate(BaseModel): job_id: str status: str updated_at: datetime - job_title: Optional[str] = None # Job title for better user experience - message: Optional[str] = None - progress: Optional[int] = None # 0-100 percentage - metadata: Optional[dict[str, Any]] = None - eligible_users: Optional[set[str]] = None # Pre-computed eligible users + job_title: str | None = None # Job title for better user experience + message: str | None = None + progress: int | None = None # 0-100 percentage + metadata: dict[str, Any] | None = None + eligible_users: set[str] | None = None # Pre-computed eligible users class ConnectionManager: @@ -45,9 +45,9 @@ class ConnectionManager: # Lock for thread safety self.lock = asyncio.Lock() # Redis client for pub/sub - self.redis_client: Optional[redis.Redis] = None - self.pubsub: Optional[redis.client.PubSub] = None - self.subscriber_task: Optional[asyncio.Task] = None + self.redis_client: redis.Redis | None = None + self.pubsub: redis.client.PubSub | None = None + self.subscriber_task: asyncio.Task | None = None async def start(self): """Initialize Redis pub/sub subscriber""" @@ -178,10 +178,10 @@ class ConnectionManager: self, job_id: str, status: str, - job_title: Optional[str] = None, - message: Optional[str] = None, - progress: Optional[int] = None, - metadata: Optional[dict[str, Any]] = None + job_title: str | None = None, + message: str | None = None, + progress: int | None = None, + metadata: dict[str, Any] | None = None ): """ Async wrapper for broadcasting job status updates from API routes @@ -405,7 +405,7 @@ class ConnectionManager: connection_manager = ConnectionManager() -async def authenticate_websocket(websocket: WebSocket, token: Optional[str]) -> Optional[str]: +async def authenticate_websocket(websocket: WebSocket, token: str | None) -> str | None: """ Authenticate a WebSocket connection using a JWT token Returns user_id if valid, None if invalid diff --git a/backend/app/services/websocket_publisher.py b/backend/app/services/websocket_publisher.py index 9a82b6c..690873e 100644 --- a/backend/app/services/websocket_publisher.py +++ b/backend/app/services/websocket_publisher.py @@ -8,7 +8,7 @@ to avoid connection overhead per publish operation. import logging import threading from datetime import datetime -from typing import Any, Optional +from typing import Any import redis from pydantic import BaseModel @@ -27,18 +27,18 @@ class JobStatusUpdate(BaseModel): job_id: str status: str updated_at: datetime - job_title: Optional[str] = None - message: Optional[str] = None - progress: Optional[int] = None - metadata: Optional[dict[str, Any]] = None - eligible_users: Optional[set[str]] = None # Pre-computed eligible users + job_title: str | None = None + message: str | None = None + progress: int | None = None + metadata: dict[str, Any] | None = None + eligible_users: set[str] | None = None # Pre-computed eligible users class WebSocketPublisher: """Synchronous Redis publisher for WebSocket updates from Celery workers""" def __init__(self): - self._redis_client: Optional[redis.Redis] = None + self._redis_client: redis.Redis | None = None self._lock = threading.Lock() def _get_client(self) -> redis.Redis: @@ -57,11 +57,11 @@ class WebSocketPublisher: self, job_id: str, status: str, - job_title: Optional[str] = None, - message: Optional[str] = None, - progress: Optional[int] = None, - metadata: Optional[dict[str, Any]] = None, - eligible_users: Optional[set[str]] = None + job_title: str | None = None, + message: str | None = None, + progress: int | None = None, + metadata: dict[str, Any] | None = None, + eligible_users: set[str] | None = None ) -> bool: """ Publish job status update to Redis pub/sub channels @@ -115,11 +115,11 @@ _publisher = WebSocketPublisher() def publish_job_status_update( job_id: str, status: str, - job_title: Optional[str] = None, - message: Optional[str] = None, - progress: Optional[int] = None, - metadata: Optional[dict[str, Any]] = None, - eligible_users: Optional[set[str]] = None + job_title: str | None = None, + message: str | None = None, + progress: int | None = None, + metadata: dict[str, Any] | None = None, + eligible_users: set[str] | None = None ) -> bool: """ Convenience function to publish job status update @@ -193,10 +193,10 @@ def get_job_eligible_users(job_id: str) -> set[str]: def publish_job_update_with_eligibility( job_id: str, status: str, - job_title: Optional[str] = None, - message: Optional[str] = None, - progress: Optional[int] = None, - metadata: Optional[dict[str, Any]] = None + job_title: str | None = None, + message: str | None = None, + progress: int | None = None, + metadata: dict[str, Any] | None = None ) -> bool: """ Convenience function that computes eligible users and publishes diff --git a/backend/app/services/whisper_http_service.py b/backend/app/services/whisper_http_service.py index 815dcd4..354c899 100644 --- a/backend/app/services/whisper_http_service.py +++ b/backend/app/services/whisper_http_service.py @@ -11,7 +11,6 @@ This module uses minimal configuration to avoid importing the full app Settings. import logging import os import tempfile -from typing import Optional from fastapi import FastAPI, HTTPException from google.cloud import storage diff --git a/backend/app/tasks/__init__.py b/backend/app/tasks/__init__.py index 14e0287..577f353 100644 --- a/backend/app/tasks/__init__.py +++ b/backend/app/tasks/__init__.py @@ -1,5 +1,5 @@ from celery import Celery -from celery.signals import task_failure, task_success, task_retry +from celery.signals import task_failure, task_retry, task_success from ..core.config import settings from ..core.logging import get_logger @@ -36,7 +36,7 @@ celery_app.conf.update( }, task_default_queue="default", task_create_missing_queues=True, - # Task-specific timeout overrides + # Task-specific timeout overrides task_annotations={}, ) @@ -50,19 +50,21 @@ def test_task(message="test"): # Add task received handler for debugging -from celery.signals import task_received, task_prerun, worker_ready import threading import time +from celery.signals import task_prerun, task_received, worker_ready + + @worker_ready.connect def worker_ready_handler(sender=None, **kwargs): """Log when worker is ready and start heartbeat""" logger.info(f"🟢 WORKER READY: {sender}") print(f"🟢 WORKER READY: {sender} - Worker is online and listening!") - + # Change stream monitoring has been removed - workflow triggering now handled directly by API endpoints logger.info("Workflow triggering handled directly by API endpoints - no change stream monitoring needed") - + @task_received.connect def task_received_handler(sender=None, task_id=None, task=None, args=None, kwargs=None, retries=None, eta=None, **kwds): @@ -82,7 +84,7 @@ def task_failure_handler(sender=None, task_id=None, exception=None, traceback=No """Log task failures to centralized logging""" exception_type = exception.__class__.__name__ if exception else "Unknown" exception_msg = str(exception) if exception else "No details" - + # Log comprehensive error details error_details = f""" === CELERY TASK FAILURE === @@ -96,14 +98,14 @@ Additional Info: {einfo} ============================= """ logger.error(error_details) - + # Also log to stdout for immediate visibility print(f"🚨 TASK FAILURE: {sender} [{task_id}] - {exception_type}: {exception_msg}") if traceback: print(f"Full traceback:\n{traceback}") -@task_success.connect +@task_success.connect def task_success_handler(sender=None, result=None, **kwargs): """Log task success""" result_str = str(result)[:100] if result else "No result" @@ -120,15 +122,17 @@ def task_retry_handler(sender=None, task_id=None, reason=None, einfo=None, **kwa def import_task_modules(): """Import all task modules to register them with Celery""" try: - from . import ingest_and_ai # noqa: E402, F401 - from . import translate_and_synthesize # noqa: E402, F401 - from . import tts_synthesis # noqa: E402, F401 - from . import render_accessible_video # noqa: E402, F401 - from . import rerender_accessible_video # noqa: E402, F401 - from . import notify # noqa: E402, F401 - from . import ffmpeg_operations # noqa: E402, F401 - from . import whisper_transcribe # noqa: E402, F401 - from . import embed_glossary # noqa: E402, F401 + from . import ( + embed_glossary, # noqa: E402, F401 + ffmpeg_operations, # noqa: E402, F401 + ingest_and_ai, # noqa: E402, F401 + notify, # noqa: E402, F401 + render_accessible_video, # noqa: E402, F401 + rerender_accessible_video, # noqa: E402, F401 + translate_and_synthesize, # noqa: E402, F401 + tts_synthesis, # noqa: E402, F401 + whisper_transcribe, # noqa: E402, F401 + ) logger.info("Successfully imported all task modules") except Exception as e: logger.error(f"Error importing task modules: {e}") diff --git a/backend/app/tasks/_websocket_bridge.py b/backend/app/tasks/_websocket_bridge.py index 7c11daf..6590fa3 100644 --- a/backend/app/tasks/_websocket_bridge.py +++ b/backend/app/tasks/_websocket_bridge.py @@ -5,7 +5,6 @@ sync Redis publish so tasks can notify connected clients without asyncio. """ import traceback from datetime import datetime -from typing import Optional import redis as sync_redis @@ -19,9 +18,9 @@ logger = get_logger(__name__) def broadcast_status_update( job_id: str, status: str, - job_title: Optional[str] = None, - message: Optional[str] = None, - progress: Optional[int] = None, + job_title: str | None = None, + message: str | None = None, + progress: int | None = None, ) -> None: """Publish a job-status event to Redis so WebSocket subscribers receive it. diff --git a/backend/app/tasks/embed_glossary.py b/backend/app/tasks/embed_glossary.py index c6b64c2..ba9941f 100644 --- a/backend/app/tasks/embed_glossary.py +++ b/backend/app/tasks/embed_glossary.py @@ -42,6 +42,7 @@ async def _embed_batch( total: int, ) -> None: from pymongo import UpdateOne + from ..services.embedding_service import embedding_service async with sem: diff --git a/backend/app/tasks/ffmpeg_operations.py b/backend/app/tasks/ffmpeg_operations.py index f1c36b7..94c0e65 100644 --- a/backend/app/tasks/ffmpeg_operations.py +++ b/backend/app/tasks/ffmpeg_operations.py @@ -49,7 +49,7 @@ def run_ffmpeg_command(self, cmd: list[str], timeout: int = 3600) -> dict[str, A 'returncode': result.returncode } - logger.info(f"[FFmpeg Queue] Command completed successfully") + logger.info("[FFmpeg Queue] Command completed successfully") return { 'success': True, 'stdout': result.stdout, diff --git a/backend/app/tasks/ingest_and_ai.py b/backend/app/tasks/ingest_and_ai.py index 83f0136..1a78715 100644 --- a/backend/app/tasks/ingest_and_ai.py +++ b/backend/app/tasks/ingest_and_ai.py @@ -11,10 +11,8 @@ from ..core.config import settings from ..core.logging import get_logger from ..models.job import JobStatus from ..services import cost_tracker -from ..services.cost_tracker import BudgetExceeded -from ..services.gcs import gcs_service, gcs_path, upload_vtt_to_gcs +from ..services.gcs import gcs_path, gcs_service, upload_vtt_to_gcs from ..services.gemini import gemini_service -from ..services.websocket import connection_manager from . import celery_app from ._websocket_bridge import broadcast_status_update @@ -71,7 +69,7 @@ async def ingest_and_ai_task_impl(job_id: str): if not job_doc: logger.error(f"Job {job_id} not found in database") return - + job_title = job_doc.get("title", "Untitled Job") logger.info(f"Processing job: {job_title}") @@ -226,7 +224,9 @@ async def ingest_and_ai_task_impl(job_id: str): # Generate descriptive transcript (WCAG 2.1 1.2.1) transcript_gcs_uri = None try: - from ..services.descriptive_transcript import generate_descriptive_transcript + from ..services.descriptive_transcript import ( + generate_descriptive_transcript, + ) transcript_text = generate_descriptive_transcript( ai_result["captions_vtt"], ai_result["audio_description_vtt"] diff --git a/backend/app/tasks/notify.py b/backend/app/tasks/notify.py index 3ebca33..a5c5c41 100644 --- a/backend/app/tasks/notify.py +++ b/backend/app/tasks/notify.py @@ -3,12 +3,11 @@ from datetime import datetime from bson import ObjectId from celery import Task -from celery.exceptions import Retry from motor.motor_asyncio import AsyncIOMotorClient from ..core.config import settings from ..core.logging import get_logger -from ..models.audit_log import AuditLogCreate, AuditAction +from ..models.audit_log import AuditAction, AuditLogCreate from ..services.emailer import email_service from ..services.gcs import get_signed_download_url from . import celery_app @@ -51,7 +50,7 @@ class NotifyClientTask(Task): # Get client ID and ensure proper ObjectId format client_id = job_doc["client_id"] logger.info(f"Looking up client {client_id} for job {job_id}") - + # Try looking up client by string ID first client_doc = await db.users.find_one({"_id": client_id}) if not client_doc: @@ -60,7 +59,7 @@ class NotifyClientTask(Task): client_doc = await db.users.find_one({"_id": ObjectId(client_id)}) except: pass # Invalid ObjectId format - + if not client_doc: logger.error(f"Client {client_id} not found in database for job {job_id}") # Don't retry for missing users - this is likely a data issue @@ -107,7 +106,7 @@ class NotifyClientTask(Task): download_links[language] = lang_downloads email_enabled = True - + if email_enabled: try: success = await email_service.send_completion_email( @@ -120,11 +119,11 @@ class NotifyClientTask(Task): logger.info(f"Successfully sent completion email to {client_doc['email']} for job {job_id}") else: logger.warning(f"Email service returned failure for job {job_id} - treating as non-retryable") - + except Exception as email_error: error_msg = str(email_error) logger.error(f"Email sending exception for job {job_id}: {error_msg}") - + # Check if this is an authentication error (non-retryable) if "401" in error_msg or "Unauthorized" in error_msg or "authentication" in error_msg.lower(): logger.warning(f"Email authentication failed for job {job_id} - treating as non-retryable configuration error") @@ -177,15 +176,15 @@ class NotifyClientTask(Task): # Only retry for transient errors, not configuration or data errors non_retryable_patterns = [ - "not found", - "401", - "unauthorized", + "not found", + "401", + "unauthorized", "authentication", "failed to send completion email" ] - + should_not_retry = any(pattern in error_msg.lower() for pattern in non_retryable_patterns) - + if should_not_retry: logger.info(f"Skipping retry for job {job_id} due to non-retryable error: {error_msg}") return diff --git a/backend/app/tasks/render_accessible_video.py b/backend/app/tasks/render_accessible_video.py index f825d4c..6289830 100644 --- a/backend/app/tasks/render_accessible_video.py +++ b/backend/app/tasks/render_accessible_video.py @@ -11,9 +11,12 @@ from motor.motor_asyncio import AsyncIOMotorClient from ..core.config import settings from ..core.logging import get_logger from ..lib.vtt import VTTParser -from ..models.job import AccessibleVideoEditState, JobStatus, PausePointData, VideoSegmentMetadata +from ..models.job import ( + AccessibleVideoEditState, + JobStatus, +) from ..schemas.whisper import CachedWhisperTranscript, CachedWordTimestamp -from ..services.gcs import gcs_service, gcs_path +from ..services.gcs import gcs_path, gcs_service from ..services.video_renderer import video_renderer_service from ..services.vtt_retimer import vtt_retimer_service from ..services.whisper_service import WordTimestamp, whisper_service diff --git a/backend/app/tasks/rerender_accessible_video.py b/backend/app/tasks/rerender_accessible_video.py index fe81f19..e19d53e 100644 --- a/backend/app/tasks/rerender_accessible_video.py +++ b/backend/app/tasks/rerender_accessible_video.py @@ -1,7 +1,6 @@ """Celery task for re-rendering accessible video with QC changes.""" import asyncio -import io import os import tempfile from datetime import datetime @@ -13,15 +12,25 @@ from pydub import AudioSegment from ..core.config import settings from ..core.logging import get_logger from ..lib.vtt import VTTParser -from ..models.job import AccessibleVideoEditState, JobStatus, PausePointData, VideoSegmentMetadata -from ..services.gcs import gcs_service, gcs_path +from ..models.job import ( + AccessibleVideoEditState, + JobStatus, +) +from ..services.gcs import gcs_path, gcs_service from ..services.video_renderer import video_renderer_service from ..services.vtt_retimer import vtt_retimer_service -from ..services.whisper_service import WordTimestamp, whisper_service +from ..services.whisper_service import whisper_service from . import celery_app -from .render_accessible_video import _extract_audio_for_whisper, _dispatch_whisper_transcription from ._websocket_bridge import broadcast_status_update -from .tts_synthesis import dispatch_language_tts, parse_ad_cues, parse_cue_index_from_blob_name, synthesize_cue_task +from .render_accessible_video import ( + _dispatch_whisper_transcription, + _extract_audio_for_whisper, +) +from .tts_synthesis import ( + parse_ad_cues, + parse_cue_index_from_blob_name, + synthesize_cue_task, +) logger = get_logger(__name__) diff --git a/backend/app/tasks/runner.py b/backend/app/tasks/runner.py index 9271354..f534c0d 100644 --- a/backend/app/tasks/runner.py +++ b/backend/app/tasks/runner.py @@ -46,7 +46,9 @@ def main() -> None: asyncio.run(ingest_and_ai_task_impl(job_id)) elif task == "translate": - from app.tasks.translate_and_synthesize import _async_translate_and_synthesize + from app.tasks.translate_and_synthesize import ( + _async_translate_and_synthesize, + ) asyncio.run(_async_translate_and_synthesize(job_id)) elif task == "render": @@ -65,7 +67,9 @@ def main() -> None: if args.regenerate_cues else [] ) - from app.tasks.rerender_accessible_video import _async_rerender_accessible_video + from app.tasks.rerender_accessible_video import ( + _async_rerender_accessible_video, + ) asyncio.run( _async_rerender_accessible_video( job_id, diff --git a/backend/app/tasks/translate_and_synthesize.py b/backend/app/tasks/translate_and_synthesize.py index f6afe54..02a1665 100644 --- a/backend/app/tasks/translate_and_synthesize.py +++ b/backend/app/tasks/translate_and_synthesize.py @@ -1,12 +1,10 @@ import asyncio import os +import random import tempfile from datetime import datetime from typing import Any -import time -import random -from celery import Task from celery.exceptions import SoftTimeLimitExceeded from motor.motor_asyncio import AsyncIOMotorClient @@ -14,10 +12,9 @@ from ..core.config import settings from ..core.logging import get_logger from ..models.job import JobStatus from ..services import cost_tracker -from ..services.gcs import gcs_service, gcs_path, upload_vtt_to_gcs +from ..services.gcs import gcs_path, gcs_service, upload_vtt_to_gcs from ..services.gemini import gemini_service from ..services.gemini_tts import TTSSynthesisError -from ..services.websocket import connection_manager from . import celery_app from ._websocket_bridge import broadcast_status_update @@ -30,7 +27,7 @@ MAX_CONCURRENT_VIDEO_NATIVE = 3 async def retry_with_backoff(func, max_retries=3, base_delay=1): """Retry a function with exponential backoff""" last_exception = None - + for attempt in range(max_retries): try: return await func() @@ -38,12 +35,12 @@ async def retry_with_backoff(func, max_retries=3, base_delay=1): last_exception = e if attempt == max_retries - 1: break - + # Exponential backoff with jitter delay = base_delay * (2 ** attempt) + random.uniform(0, 1) logger.warning(f"Attempt {attempt + 1} failed, retrying in {delay:.2f}s: {e}") await asyncio.sleep(delay) - + raise last_exception @@ -86,7 +83,7 @@ def translate_and_synthesize_task(self, job_id: str): Triggered when job status changes to 'approved_english' """ logger.info(f"🚀 CELERY TASK STARTED: translate_and_synthesize_task for job {job_id}") - + try: logger.info(f"📝 About to call asyncio.run for job {job_id}") result = asyncio.run(_async_translate_and_synthesize(job_id)) @@ -109,7 +106,7 @@ def translate_and_synthesize_task(self, job_id: str): async def _async_translate_and_synthesize(job_id: str): """Async implementation of translation and synthesis""" logger.info(f"🔄 ASYNC FUNCTION STARTED: _async_translate_and_synthesize for job {job_id}") - + # Connect to MongoDB logger.info(f"📡 Connecting to MongoDB for job {job_id}") client = AsyncIOMotorClient(settings.mongodb_uri) @@ -285,7 +282,9 @@ async def _async_translate_and_synthesize(job_id: str): # Generate descriptive transcript (WCAG 2.1 1.2.1) transcript_gcs_uri = None try: - from ..services.descriptive_transcript import generate_descriptive_transcript + from ..services.descriptive_transcript import ( + generate_descriptive_transcript, + ) transcript_text = generate_descriptive_transcript(translated_captions, translated_ad) if transcript_text: transcript_gcs_uri = await upload_vtt_to_gcs( @@ -427,7 +426,9 @@ async def _async_translate_and_synthesize(job_id: str): # Generate descriptive transcript (WCAG 2.1 1.2.1) try: - from ..services.descriptive_transcript import generate_descriptive_transcript + from ..services.descriptive_transcript import ( + generate_descriptive_transcript, + ) transcript_text = generate_descriptive_transcript(translated_captions, translated_ad) if transcript_text: transcript_gcs_uri = await upload_vtt_to_gcs( @@ -681,9 +682,16 @@ async def _generate_language_tts(job_id: str, language: str, lang_output: dict, then assembles the results into a combined MP3. """ import io + from celery.result import allow_join_result from pydub import AudioSegment - from .tts_synthesis import dispatch_language_tts, parse_ad_cues, synthesize_cue_task, update_vtt_in_gcs + + from .tts_synthesis import ( + dispatch_language_tts, + parse_ad_cues, + synthesize_cue_task, + update_vtt_in_gcs, + ) if tts_preferences is None: tts_preferences = {} @@ -708,7 +716,7 @@ async def _generate_language_tts(job_id: str, language: str, lang_output: dict, # Preflight budget check before dispatching TTS tts_provider = tts_preferences.get("provider", "gemini") - from .tts_synthesis import _TTS_MODEL_STRINGS, _TTS_PROVIDER_MODEL_MAP + from .tts_synthesis import _TTS_MODEL_STRINGS tts_model_key = tts_preferences.get("model", "flash") await cost_tracker.aio_preflight( model=_TTS_MODEL_STRINGS.get(tts_model_key, tts_model_key), @@ -981,7 +989,6 @@ async def _generate_language_tts(job_id: str, language: str, lang_output: dict, # Trigger accessible video rendering if requested if accessible_video_requested: - from .render_accessible_video import render_accessible_video_task # Initialize progress tracking for this language await db.jobs.update_one( @@ -1017,4 +1024,4 @@ async def _generate_language_tts(job_id: str, language: str, lang_output: dict, } } ) - raise \ No newline at end of file + raise diff --git a/backend/app/tasks/tts_synthesis.py b/backend/app/tasks/tts_synthesis.py index 1fb7370..6888ce7 100644 --- a/backend/app/tasks/tts_synthesis.py +++ b/backend/app/tasks/tts_synthesis.py @@ -9,7 +9,6 @@ import asyncio import hashlib import io import time -from typing import Any, Optional from celery import group from celery.result import AsyncResult @@ -18,7 +17,7 @@ from pydub import AudioSegment from ..core.config import settings from ..core.logging import get_logger from ..services.gcs import gcs_service -from ..services.gemini_tts import gemini_tts_service, TTSSynthesisError +from ..services.gemini_tts import gemini_tts_service from ..services.tts import tts_service from . import celery_app @@ -47,7 +46,7 @@ def _record_tts_cost( text: str, user_id: str, job_id: str, - project_id: Optional[str], + project_id: str | None, latency_ms: int, ) -> None: try: @@ -81,15 +80,15 @@ def synthesize_cue_task( text: str, start_time: float, end_time: float, - voice_name: Optional[str], + voice_name: str | None, provider: str, model: str, speed: float, style_prompt: str, stability: float = 0.5, similarity_boost: float = 0.5, - user_id: Optional[str] = None, - cost_project_id: Optional[str] = None, + user_id: str | None = None, + cost_project_id: str | None = None, ) -> dict: """ Synthesize a single AD cue and upload to GCS immediately. @@ -200,7 +199,7 @@ def synthesize_cue_task( async def _synthesize_single_cue( text: str, - voice_name: Optional[str], + voice_name: str | None, language: str, provider: str, model: str, @@ -233,8 +232,8 @@ async def _synthesize_single_cue( else: raise ValueError(f"Unknown TTS provider: {provider}") - audio_bytes: Optional[bytes] = None - last_error: Optional[Exception] = None + audio_bytes: bytes | None = None + last_error: Exception | None = None for attempt_provider in providers_to_try: try: @@ -300,7 +299,7 @@ def _upload_cue_to_gcs(job_id: str, language: str, cue_index: int, audio_bytes: return gcs_uri, content_hash -def parse_cue_index_from_blob_name(blob_name: str) -> Optional[int]: +def parse_cue_index_from_blob_name(blob_name: str) -> int | None: """ Parse cue index from GCS blob name, supporting both filename formats: - Legacy: ...ad_cues/cue_0.mp3 → 0 @@ -337,8 +336,8 @@ def dispatch_language_tts( language: str, cues: list[dict], tts_preferences: dict, - user_id: Optional[str] = None, - cost_project_id: Optional[str] = None, + user_id: str | None = None, + cost_project_id: str | None = None, ) -> AsyncResult: """ Dispatch a group of cue synthesis tasks for a language. diff --git a/backend/app/telemetry/__init__.py b/backend/app/telemetry/__init__.py index 1a7ca5f..5661d43 100644 --- a/backend/app/telemetry/__init__.py +++ b/backend/app/telemetry/__init__.py @@ -1,28 +1,34 @@ """Telemetry package for OpenTelemetry tracing and metrics collection""" -from .metrics import app_metrics, time_ai_request, time_job_processing, time_storage_operation, time_celery_task +from .metrics import ( + app_metrics, + time_ai_request, + time_celery_task, + time_job_processing, + time_storage_operation, +) from .tracing import ( + TracingContext, get_tracer, instrument_dependencies, instrument_fastapi_app, setup_tracing, trace_ai_operation, - trace_job_pipeline, - trace_storage_operation, - TracingContext, trace_api_request, trace_celery_task, + trace_job_pipeline, + trace_storage_operation, ) __all__ = [ "app_metrics", - "time_ai_request", + "time_ai_request", "time_job_processing", "time_storage_operation", "time_celery_task", "get_tracer", "instrument_dependencies", - "instrument_fastapi_app", + "instrument_fastapi_app", "setup_tracing", "trace_ai_operation", "trace_job_pipeline", @@ -30,4 +36,4 @@ __all__ = [ "TracingContext", "trace_api_request", "trace_celery_task", -] \ No newline at end of file +] diff --git a/backend/app/telemetry/metrics.py b/backend/app/telemetry/metrics.py index 566a0b7..a15458d 100644 --- a/backend/app/telemetry/metrics.py +++ b/backend/app/telemetry/metrics.py @@ -1,7 +1,7 @@ import time -from typing import Optional from opentelemetry import metrics + # from opentelemetry.exporter.prometheus import PrometheusMetricReader # Disabled for local dev from opentelemetry.sdk.metrics import MeterProvider from opentelemetry.sdk.resources import Resource @@ -15,101 +15,101 @@ logger = get_logger(__name__) class ApplicationMetrics: """Central metrics collection for the accessible video platform""" - + def __init__(self): self.setup_metrics() - + # Job processing metrics self.job_total_counter = self.meter.create_counter( name="jobs_total", description="Total number of jobs created", unit="1" ) - + self.job_status_gauge = self.meter.create_up_down_counter( name="jobs_by_status", description="Current number of jobs by status", unit="1" ) - + self.job_processing_duration = self.meter.create_histogram( name="job_processing_duration_seconds", description="Time taken to process jobs through each stage", unit="s" ) - + # AI service metrics self.ai_requests_counter = self.meter.create_counter( name="ai_requests_total", description="Total AI service requests", unit="1" ) - + self.ai_request_duration = self.meter.create_histogram( - name="ai_request_duration_seconds", + name="ai_request_duration_seconds", description="Duration of AI service requests", unit="s" ) - + self.ai_confidence_histogram = self.meter.create_histogram( name="ai_confidence_score", description="AI confidence scores distribution", unit="1" ) - + # Storage metrics self.storage_operations_counter = self.meter.create_counter( name="storage_operations_total", description="Total storage operations", unit="1" ) - + self.storage_operation_duration = self.meter.create_histogram( name="storage_operation_duration_seconds", description="Duration of storage operations", unit="s" ) - + # Queue metrics self.queue_tasks_counter = self.meter.create_counter( name="celery_tasks_total", description="Total Celery tasks processed", unit="1" ) - + self.queue_task_duration = self.meter.create_histogram( name="celery_task_duration_seconds", description="Duration of Celery task execution", unit="s" ) - + # User activity metrics self.auth_attempts_counter = self.meter.create_counter( name="auth_attempts_total", description="Total authentication attempts", unit="1" ) - + self.active_users_gauge = self.meter.create_up_down_counter( name="active_users", description="Number of currently active users", unit="1" ) - + # Rate limiting metrics self.rate_limit_counter = self.meter.create_counter( name="rate_limit_checks_total", description="Total rate limit checks performed", unit="1" ) - + # Request validation metrics self.validation_counter = self.meter.create_counter( name="request_validation_total", - description="Total request validations performed", + description="Total request validations performed", unit="1" ) - + self.validation_duration = self.meter.create_histogram( name="request_validation_duration_seconds", description="Duration of request validation", @@ -123,20 +123,20 @@ class ApplicationMetrics: "service.version": "1.0.0", "deployment.environment": settings.app_env, }) - + # Set up Prometheus metrics reader (disabled for local dev) # prometheus_reader = PrometheusMetricReader() - + # Create metrics provider provider = MeterProvider( resource=resource, # metric_readers=[prometheus_reader] # Disabled for local dev ) metrics.set_meter_provider(provider) - + # Get meter for this service self.meter = metrics.get_meter("accessible-video-api") - + logger.info("Metrics provider initialized with Prometheus exporter") def start_prometheus_server(self, port: int = 8001): @@ -166,7 +166,7 @@ class ApplicationMetrics: -1, attributes={"status": old_status} ) - + # Increment new status count self.job_status_gauge.add( 1, @@ -184,7 +184,7 @@ class ApplicationMetrics: ) # AI service metrics methods - def record_ai_request(self, service: str, operation: str, language: Optional[str] = None): + def record_ai_request(self, service: str, operation: str, language: str | None = None): """Record AI service request""" attributes = { "service": service, @@ -192,7 +192,7 @@ class ApplicationMetrics: } if language: attributes["language"] = language - + self.ai_requests_counter.add(1, attributes=attributes) def record_ai_request_duration(self, service: str, operation: str, duration_seconds: float): @@ -251,12 +251,12 @@ class ApplicationMetrics: ) # Auth metrics methods - def record_auth_attempt(self, result: str, user_role: Optional[str] = None): + def record_auth_attempt(self, result: str, user_role: str | None = None): """Record authentication attempt""" attributes = {"result": result} if user_role: attributes["user_role"] = user_role - + self.auth_attempts_counter.add(1, attributes=attributes) def update_active_users(self, count_change: int, user_role: str): @@ -273,17 +273,17 @@ app_metrics = ApplicationMetrics() class MetricsTimer: """Context manager for timing operations""" - + def __init__(self, metric_recorder, *args, **kwargs): self.metric_recorder = metric_recorder self.args = args self.kwargs = kwargs self.start_time = None - + def __enter__(self): self.start_time = time.time() return self - + def __exit__(self, exc_type, exc_val, exc_tb): if self.start_time: duration = time.time() - self.start_time @@ -348,7 +348,7 @@ def track_validation_metrics(endpoint: str, method: str, is_valid: bool, validat "error_types": ",".join(error_types) if error_types else "none" } ) - + if hasattr(app_metrics, 'validation_duration'): app_metrics.validation_duration.record( validation_time, @@ -356,4 +356,4 @@ def track_validation_metrics(endpoint: str, method: str, is_valid: bool, validat "endpoint": endpoint, "method": method } - ) \ No newline at end of file + ) diff --git a/backend/app/telemetry/tracing.py b/backend/app/telemetry/tracing.py index 78a911c..a734028 100644 --- a/backend/app/telemetry/tracing.py +++ b/backend/app/telemetry/tracing.py @@ -1,7 +1,7 @@ import logging -from typing import Optional from opentelemetry import trace + # from opentelemetry.exporter.gcp.trace import CloudTraceSpanExporter # Disabled for local dev from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor from opentelemetry.instrumentation.pymongo import PymongoInstrumentor @@ -17,7 +17,7 @@ logger = logging.getLogger(__name__) def setup_tracing(app_name: str = "accessible-video-api", service_version: str = "1.0.0"): """Initialize OpenTelemetry tracing for the application""" - + # Create resource with service information resource = Resource.create({ "service.name": app_name, @@ -25,11 +25,11 @@ def setup_tracing(app_name: str = "accessible-video-api", service_version: str = "service.namespace": "accessible-video", "deployment.environment": settings.app_env, }) - + # Set up tracer provider tracer_provider = TracerProvider(resource=resource) trace.set_tracer_provider(tracer_provider) - + # Configure span processor and exporter based on environment if settings.app_env == "prod" and settings.gcp_project_id: # Use Google Cloud Trace in production (disabled for local dev) @@ -39,11 +39,13 @@ def setup_tracing(app_name: str = "accessible-video-api", service_version: str = # span_processor = BatchSpanProcessor(cloud_trace_exporter) # tracer_provider.add_span_processor(span_processor) logger.info("Google Cloud Trace disabled for local dev") - + elif settings.otel_exporter_otlp_endpoint: # Use OTLP exporter for other observability platforms - from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter - + from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( + OTLPSpanExporter, + ) + otlp_exporter = OTLPSpanExporter( endpoint=settings.otel_exporter_otlp_endpoint, headers={}, @@ -51,18 +53,18 @@ def setup_tracing(app_name: str = "accessible-video-api", service_version: str = span_processor = BatchSpanProcessor(otlp_exporter) tracer_provider.add_span_processor(span_processor) logger.info(f"Configured OTLP trace exporter: {settings.otel_exporter_otlp_endpoint}") - + else: # Development mode - use console exporter from opentelemetry.sdk.trace.export import ConsoleSpanExporter - + console_exporter = ConsoleSpanExporter() span_processor = BatchSpanProcessor(console_exporter) tracer_provider.add_span_processor(span_processor) logger.info("Configured console trace exporter for development") - + logger.info(f"OpenTelemetry tracing initialized for {app_name}") - + return tracer_provider @@ -83,7 +85,7 @@ def instrument_dependencies(): tracer_provider=trace.get_tracer_provider() ) logger.info("MongoDB instrumentation enabled") - + # Instrument Redis RedisInstrumentor().instrument( tracer_provider=trace.get_tracer_provider() @@ -101,7 +103,7 @@ def trace_async_operation(operation_name: str, **attributes): def decorator(func): async def wrapper(*args, **kwargs): tracer = get_tracer() - + with tracer.start_as_current_span( operation_name, attributes=attributes @@ -115,7 +117,7 @@ def trace_async_operation(operation_name: str, **attributes): span.set_attribute("operation.error_message", str(e)) span.record_exception(e) raise - + return wrapper return decorator @@ -125,7 +127,7 @@ def trace_job_pipeline(job_id: str, pipeline_stage: str): def decorator(func): async def wrapper(*args, **kwargs): tracer = get_tracer() - + with tracer.start_as_current_span( f"job_pipeline.{pipeline_stage}", attributes={ @@ -142,39 +144,39 @@ def trace_job_pipeline(job_id: str, pipeline_stage: str): span.set_attribute("job.error_message", str(e)) span.record_exception(e) raise - + return wrapper return decorator -def trace_ai_operation(operation_type: str, language: Optional[str] = None): +def trace_ai_operation(operation_type: str, language: str | None = None): """Decorator for tracing AI service operations""" def decorator(func): async def wrapper(*args, **kwargs): tracer = get_tracer() - + span_attributes = { "ai.operation_type": operation_type, "ai.provider": "gemini" if "gemini" in operation_type else "google_translate" } - + if language: span_attributes["ai.language"] = language - + with tracer.start_as_current_span( f"ai.{operation_type}", attributes=span_attributes ) as span: try: result = await func(*args, **kwargs) - + # Add result attributes if available if isinstance(result, dict): if "confidence" in result: span.set_attribute("ai.confidence", result["confidence"]) if "language" in result: span.set_attribute("ai.detected_language", result["language"]) - + span.set_attribute("ai.result", "success") return result except Exception as e: @@ -182,7 +184,7 @@ def trace_ai_operation(operation_type: str, language: Optional[str] = None): span.set_attribute("ai.error_message", str(e)) span.record_exception(e) raise - + return wrapper return decorator @@ -192,7 +194,7 @@ def trace_storage_operation(operation_type: str, file_path: str): def decorator(func): async def wrapper(*args, **kwargs): tracer = get_tracer() - + with tracer.start_as_current_span( f"storage.{operation_type}", attributes={ @@ -204,65 +206,65 @@ def trace_storage_operation(operation_type: str, file_path: str): try: result = await func(*args, **kwargs) span.set_attribute("storage.result", "success") - + if isinstance(result, str) and result.startswith("gs://"): span.set_attribute("storage.result_uri", result) - + return result except Exception as e: span.set_attribute("storage.result", "error") span.set_attribute("storage.error_message", str(e)) span.record_exception(e) raise - + return wrapper return decorator class TracingContext: """Context manager for manual span creation with attributes""" - - def __init__(self, span_name: str, attributes: Optional[dict] = None): + + def __init__(self, span_name: str, attributes: dict | None = None): self.span_name = span_name self.attributes = attributes or {} self.tracer = get_tracer() self.span = None - + def __enter__(self): self.span = self.tracer.start_span(self.span_name, attributes=self.attributes) return self.span - + def __exit__(self, exc_type, exc_val, exc_tb): if exc_type: self.span.set_attribute("error", True) self.span.set_attribute("error_message", str(exc_val)) self.span.record_exception(exc_val) - + self.span.end() # Convenience functions for common tracing patterns -def trace_api_request(endpoint: str, user_id: Optional[str] = None): +def trace_api_request(endpoint: str, user_id: str | None = None): """Create span for API request with common attributes""" attributes = { "http.route": endpoint, "component": "api" } - + if user_id: attributes["user.id"] = user_id - + return TracingContext(f"api.{endpoint.replace('/', '_')}", attributes) -def trace_celery_task(task_name: str, job_id: Optional[str] = None): +def trace_celery_task(task_name: str, job_id: str | None = None): """Create span for Celery task execution""" attributes = { "celery.task_name": task_name, "component": "worker" } - + if job_id: attributes["job.id"] = job_id - - return TracingContext(f"celery.{task_name}", attributes) \ No newline at end of file + + return TracingContext(f"celery.{task_name}", attributes) diff --git a/backend/poetry.lock b/backend/poetry.lock index 9586a29..7e7d2c6 100644 --- a/backend/poetry.lock +++ b/backend/poetry.lock @@ -205,7 +205,7 @@ description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.8" groups = ["main"] -markers = "python_version == \"3.11\" and python_full_version < \"3.11.3\"" +markers = "python_full_version < \"3.11.3\"" files = [ {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, @@ -3528,18 +3528,6 @@ files = [ [package.extras] cli = ["click (>=5.0)"] -[[package]] -name = "python-http-client" -version = "3.3.7" -description = "HTTP REST client, simplified for Python" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -groups = ["main"] -files = [ - {file = "python_http_client-3.3.7-py3-none-any.whl", hash = "sha256:ad371d2bbedc6ea15c26179c6222a78bc9308d272435ddf1d5c84f068f249a36"}, - {file = "python_http_client-3.3.7.tar.gz", hash = "sha256:bf841ee45262747e00dec7ee9971dfb8c7d83083f5713596488d67739170cea0"}, -] - [[package]] name = "python-jose" version = "3.5.0" @@ -3737,26 +3725,6 @@ files = [ {file = "ruff-0.1.15.tar.gz", hash = "sha256:f6dfa8c1b21c913c326919056c390966648b680966febcb796cc9d1aaab8564e"}, ] -[[package]] -name = "sendgrid" -version = "6.12.4" -description = "Twilio SendGrid library for Python" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" -groups = ["main"] -files = [ - {file = "sendgrid-6.12.4-py3-none-any.whl", hash = "sha256:9a211b96241e63bd5b9ed9afcc8608f4bcac426e4a319b3920ab877c8426e92c"}, - {file = "sendgrid-6.12.4.tar.gz", hash = "sha256:9e88b849daf0fa4bdf256c3b5da9f5a3272402c0c2fd6b1928c9de440db0a03d"}, -] - -[package.dependencies] -ecdsa = ">=0.19.1,<1" -python-http-client = ">=3.2.1" -werkzeug = [ - {version = ">=2.3.5", markers = "python_version >= \"3.12\""}, - {version = ">=2.2.0", markers = "python_version == \"3.11\""}, -] - [[package]] name = "sentry-sdk" version = "1.45.1" @@ -4368,24 +4336,6 @@ files = [ {file = "websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee"}, ] -[[package]] -name = "werkzeug" -version = "3.1.3" -description = "The comprehensive WSGI web application library." -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e"}, - {file = "werkzeug-3.1.3.tar.gz", hash = "sha256:60723ce945c19328679790e3282cc758aa4a6040e4bb330f53d30fa546d44746"}, -] - -[package.dependencies] -MarkupSafe = ">=2.1.1" - -[package.extras] -watchdog = ["watchdog (>=2.3)"] - [[package]] name = "wrapt" version = "1.17.3" @@ -4619,4 +4569,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "3cc20f655353315183d22b53815323c5d6255aeb07a275ebf886b82f77f2a27b" +content-hash = "e87a481b67a73be2b0127090a2f22c71a37adc378fff8bd08fa9a30bd3c6031e" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index bda03bf..490bf76 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -21,7 +21,6 @@ google-cloud-translate = "^3.12.1" google-cloud-texttospeech = "^2.16.3" google-cloud-secret-manager = "^2.18.1" google-genai = "^1.56.0" -sendgrid = "^6.11.0" python-jose = {extras = ["cryptography"], version = "^3.3.0"} libpass = {extras = ["bcrypt"], version = "^1.9.1"} python-multipart = "^0.0.6" @@ -67,6 +66,8 @@ include = '\.pyi?$' [tool.ruff] target-version = "py311" line-length = 88 + +[tool.ruff.lint] select = [ "E", # pycodestyle errors "W", # pycodestyle warnings @@ -82,7 +83,7 @@ ignore = [ "C901", # too complex ] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "__init__.py" = ["F401"] [tool.mypy] diff --git a/deploy/apache-video-accessibility.conf b/deploy/apache-video-accessibility.conf index 15ad713..6bed63c 100644 --- a/deploy/apache-video-accessibility.conf +++ b/deploy/apache-video-accessibility.conf @@ -19,20 +19,21 @@ # ── WebSocket proxy (MUST be before /api/ HTTP proxy) ──────── -# mod_proxy_wstunnel handles the Upgrade: websocket header. +# ProxyPassMatch uses regex — takes precedence over Alias even when the +# physical directory /var/www/html/video-accessibility exists on disk. # disablereuse=on keeps long-lived WS connections from blocking pool. -ProxyPass /video-accessibility/api/v1/ws/ ws://127.0.0.1:8012/api/v1/ws/ disablereuse=on +ProxyPassMatch ^/video-accessibility/api/v1/ws/(.*)$ ws://127.0.0.1:8012/api/v1/ws/$1 disablereuse=on ProxyPassReverse /video-accessibility/api/v1/ws/ ws://127.0.0.1:8012/api/v1/ws/ # ── API proxy ───────────────────────────────────────────────── -# Strips /video-accessibility prefix so FastAPI sees /api/v1/... -ProxyPass /video-accessibility/api/ http://127.0.0.1:8012/api/ +# ProxyPassMatch strips /video-accessibility prefix so FastAPI sees /api/v1/... +ProxyPassMatch ^/video-accessibility/api/(.*)$ http://127.0.0.1:8012/api/$1 ProxyPassReverse /video-accessibility/api/ http://127.0.0.1:8012/api/ # Swagger / OpenAPI -ProxyPass /video-accessibility/docs http://127.0.0.1:8012/docs -ProxyPassReverse /video-accessibility/docs http://127.0.0.1:8012/docs -ProxyPass /video-accessibility/openapi.json http://127.0.0.1:8012/openapi.json +ProxyPassMatch ^/video-accessibility/docs(/.*)?$ http://127.0.0.1:8012/docs$1 +ProxyPassReverse /video-accessibility/docs http://127.0.0.1:8012/docs +ProxyPassMatch ^/video-accessibility/openapi\.json$ http://127.0.0.1:8012/openapi.json ProxyPassReverse /video-accessibility/openapi.json http://127.0.0.1:8012/openapi.json # ── SPA static files ───────────────────────────────────────── diff --git a/docker-compose.yml.old b/docker-compose.yml.old deleted file mode 100644 index 4513526..0000000 --- a/docker-compose.yml.old +++ /dev/null @@ -1,132 +0,0 @@ -version: '3.8' - -services: - # MongoDB with Replica Set - mongodb: - image: mongo:7.0 - container_name: accessible-video-mongo - restart: unless-stopped - environment: - MONGO_INITDB_ROOT_USERNAME: admin - MONGO_INITDB_ROOT_PASSWORD: password123 - MONGO_INITDB_DATABASE: accessible_video - ports: - - "27017:27017" - volumes: - - mongodb_data:/data/db - - ./mongo-init.js:/docker-entrypoint-initdb.d/init.js:ro - - ./mongo-keyfile:/data/keyfile:ro - command: ["mongod", "--replSet", "rs0", "--bind_ip_all", "--keyFile", "/data/keyfile"] - networks: - - app-network - - # Redis - redis: - image: redis:7.2-alpine - container_name: accessible-video-redis - restart: unless-stopped - ports: - - "6379:6379" - volumes: - - redis_data:/data - networks: - - app-network - - # Backend API - api: - build: - context: ./backend - dockerfile: Dockerfile - target: development - container_name: accessible-video-api - restart: unless-stopped - environment: - - APP_ENV=dev - - MONGODB_URL=mongodb://admin:password123@mongodb:27017/accessible_video?authSource=admin&replicaSet=rs0 - - REDIS_URL=redis://redis:6379/0 - - JWT_SECRET_KEY=dev-secret-key-change-in-production - - JWT_REFRESH_SECRET_KEY=dev-refresh-secret-key-change-in-production - - GEMINI_API_KEY=${GEMINI_API_KEY} - - SENDGRID_API_KEY=${SENDGRID_API_KEY} - - ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY} - - GCS_BUCKET_NAME=accessible-video-dev - - GOOGLE_CLOUD_PROJECT=${GOOGLE_CLOUD_PROJECT} - - OTEL_SERVICE_NAME=accessible-video-api-dev - - OTEL_TRACES_EXPORTER=console - - OTEL_METRICS_EXPORTER=prometheus - - SENTRY_DSN=${SENTRY_DSN} - - SENTRY_ENVIRONMENT=development - ports: - - "8000:8000" - volumes: - - ./backend:/app - - /app/.venv # Keep venv in container - depends_on: - - mongodb - - redis - networks: - - app-network - - # Celery Worker - worker: - build: - context: ./backend - dockerfile: Dockerfile - target: development - container_name: accessible-video-worker - restart: unless-stopped - environment: - - APP_ENV=dev - - MONGODB_URL=mongodb://admin:password123@mongodb:27017/accessible_video?authSource=admin&replicaSet=rs0 - - REDIS_URL=redis://redis:6379/0 - - CELERY_BROKER_URL=redis://redis:6379/0 - - CELERY_RESULT_BACKEND=redis://redis:6379/0 - - GEMINI_API_KEY=${GEMINI_API_KEY} - - ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY} - - GCS_BUCKET_NAME=accessible-video-dev - - GOOGLE_CLOUD_PROJECT=${GOOGLE_CLOUD_PROJECT} - - OTEL_SERVICE_NAME=accessible-video-worker-dev - - OTEL_TRACES_EXPORTER=console - - OTEL_METRICS_EXPORTER=prometheus - - SENTRY_DSN=${SENTRY_DSN} - - SENTRY_ENVIRONMENT=development - - C_FORCE_ROOT=1 - volumes: - - ./backend:/app - - /app/.venv # Keep venv in container - depends_on: - - mongodb - - redis - command: ["celery", "-A", "app.tasks", "worker", "--loglevel=info", "--concurrency=1"] - networks: - - app-network - - - # Frontend (for local development) - frontend: - build: - context: ./frontend - dockerfile: Dockerfile - container_name: accessible-video-frontend - restart: unless-stopped - environment: - - VITE_API_URL=http://localhost:8000 - - VITE_SENTRY_DSN=${VITE_SENTRY_DSN} - - VITE_ENVIRONMENT=development - ports: - - "5173:5173" - volumes: - - ./frontend:/app - - /app/node_modules # Keep node_modules in container - depends_on: - - api - networks: - - app-network - -volumes: - mongodb_data: - redis_data: - -networks: - app-network: - driver: bridge \ No newline at end of file diff --git a/frontend/eslint.config.js b/frontend/eslint.config.js index d94e7de..9a3168a 100644 --- a/frontend/eslint.config.js +++ b/frontend/eslint.config.js @@ -19,5 +19,8 @@ export default tseslint.config([ ecmaVersion: 2020, globals: globals.browser, }, + rules: { + 'react-refresh/only-export-components': ['warn', { allowConstantExport: true }], + }, }, ]) diff --git a/frontend/src/components/Auth/__tests__/RequireAuth.test.tsx b/frontend/src/components/Auth/__tests__/RequireAuth.test.tsx index 950f283..220beb4 100644 --- a/frontend/src/components/Auth/__tests__/RequireAuth.test.tsx +++ b/frontend/src/components/Auth/__tests__/RequireAuth.test.tsx @@ -2,7 +2,7 @@ import { describe, it, expect, vi, beforeEach } from 'vitest' import { render, screen, waitFor } from '../../../test/utils' import { RequireAuth } from '../RequireAuth' import { useAuthStore } from '../../../lib/auth' -import { createMockUser } from '../../../test/utils' + // Mock the auth store vi.mock('../../../lib/auth', () => ({ diff --git a/frontend/src/components/Layout/Sidebar.tsx b/frontend/src/components/Layout/Sidebar.tsx index 373b221..529cc4a 100644 --- a/frontend/src/components/Layout/Sidebar.tsx +++ b/frontend/src/components/Layout/Sidebar.tsx @@ -48,7 +48,7 @@ export function Sidebar({ onMobileClose }: SidebarProps) { // Determine current org from route params or first membership const currentOrgSlug = params.orgSlug || - (memberships.length === 1 ? (memberships[0] as any).organization_slug : null); + (memberships.length === 1 ? memberships[0].organization_slug : null); const sidebarItems: SidebarItem[] = [ { @@ -152,8 +152,8 @@ export function Sidebar({ onMobileClose }: SidebarProps) {
{memberships.length === 1 ? (
- {(memberships[0] as any).organization_name} - · {(memberships[0] as any).role_in_org} + {memberships[0].organization_name} + · {memberships[0].role_in_org}
) : (