diff --git a/.DS_Store b/.DS_Store
index bcd98cd..b248104 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/.env.production b/.env.production
new file mode 100644
index 0000000..9f8f20f
--- /dev/null
+++ b/.env.production
@@ -0,0 +1,99 @@
+# =============================================================================
+# Production Environment Variables for Accessible Video Platform
+# =============================================================================
+# IMPORTANT: This file contains sensitive information. Set permissions: chmod 600
+# Location on server: /opt/accessible-video/.env.production
+# =============================================================================
+
+# -----------------------------------------------------------------------------
+# App Configuration
+# -----------------------------------------------------------------------------
+APP_ENV=prod
+API_BASE_URL=https://ai-sandbox.oliver.solutions/video-accessibility-back
+
+# -----------------------------------------------------------------------------
+# Authentication & Security
+# -----------------------------------------------------------------------------
+# IMPORTANT: Generate a secure random secret for JWT_SECRET
+# Example: openssl rand -hex 32
+JWT_SECRET=CHANGE_ME_TO_SECURE_RANDOM_64_CHAR_STRING
+JWT_ALG=HS256
+JWT_ACCESS_TTL_MIN=240
+JWT_REFRESH_TTL_DAYS=7
+COOKIE_DOMAIN=ai-sandbox.oliver.solutions
+COOKIE_SECURE=true
+COOKIE_SAMESITE=Lax
+
+# -----------------------------------------------------------------------------
+# MongoDB Configuration
+# -----------------------------------------------------------------------------
+# MongoDB runs without authentication in the internal Docker network
+MONGODB_DB=accessible_video
+
+# Note: MongoDB connection string is auto-constructed in docker-compose.yml
+# Format: mongodb://mongodb:27017/${MONGODB_DB}
+
+# -----------------------------------------------------------------------------
+# Redis Configuration
+# -----------------------------------------------------------------------------
+# Redis runs without authentication in the internal Docker network
+# No configuration needed - connection strings in docker-compose.yml
+# REDIS_URL=redis://redis:6379/0
+
+# -----------------------------------------------------------------------------
+# Google Cloud Platform (GCP)
+# -----------------------------------------------------------------------------
+GCP_PROJECT_ID=optical-414516
+GCS_BUCKET=accessible-video
+
+# GCP credentials file will be mounted as a volume
+# Location inside container: /secrets/gcp-credentials.json
+# Source file on server: /opt/accessible-video/secrets/gcp-credentials.json
+
+# -----------------------------------------------------------------------------
+# AI Services
+# -----------------------------------------------------------------------------
+# Gemini API (Required)
+GEMINI_API_KEY=AIzaSyAuuVGcvqfoP7pqX-YwieGszPsNSeAft-0
+
+# Google Cloud Translate (Optional - for translation features)
+TRANSLATE_API_KEY=
+
+# ElevenLabs TTS (Optional - for text-to-speech)
+ELEVENLABS_API_KEY=
+
+# -----------------------------------------------------------------------------
+# Email Configuration (SendGrid)
+# -----------------------------------------------------------------------------
+# IMPORTANT: Get SendGrid API key from https://app.sendgrid.com/settings/api_keys
+SENDGRID_API_KEY=
+
+# Email sender address (must be verified in SendGrid)
+EMAIL_FROM=noreply@ai-sandbox.oliver.solutions
+
+# Client-facing URL (used in emails)
+CLIENT_BASE_URL=https://ai-sandbox.oliver.solutions/video-accessibility
+
+# -----------------------------------------------------------------------------
+# Observability & Monitoring (Optional)
+# -----------------------------------------------------------------------------
+# Sentry for error tracking (leave empty to disable)
+SENTRY_DSN=
+
+# OpenTelemetry endpoint (leave empty to disable)
+OTEL_EXPORTER_OTLP_ENDPOINT=
+
+# =============================================================================
+# DEPLOYMENT CHECKLIST
+# =============================================================================
+# [ ] Generate secure JWT_SECRET (64 characters): openssl rand -hex 32
+# [ ] Verify GCP_PROJECT_ID and GCS_BUCKET
+# [ ] Copy GCP credentials JSON to /opt/accessible-video/secrets/
+# [ ] Update GEMINI_API_KEY with valid key
+# [ ] (Optional) Configure SENDGRID_API_KEY for email notifications
+# [ ] (Optional) Configure ELEVENLABS_API_KEY for TTS
+# [ ] (Optional) Configure SENTRY_DSN for error tracking
+# [ ] Set file permissions: chmod 600 /opt/accessible-video/.env.production
+# [ ] Verify COOKIE_DOMAIN matches your domain
+# [ ] Verify API_BASE_URL and CLIENT_BASE_URL are correct
+# =============================================================================
diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md
new file mode 100644
index 0000000..9d275ff
Binary files /dev/null and b/DEPLOYMENT.md differ
diff --git a/apache-config-snippet.conf b/apache-config-snippet.conf
new file mode 100644
index 0000000..a538f2b
--- /dev/null
+++ b/apache-config-snippet.conf
@@ -0,0 +1,172 @@
+# =============================================================================
+# Apache Configuration for Accessible Video Platform
+# =============================================================================
+# Add this configuration to your existing VirtualHost for ai-sandbox.oliver.solutions
+# Location: /etc/apache2/sites-available/ai-sandbox.oliver.solutions-ssl.conf
+# =============================================================================
+
+# -----------------------------------------------------------------------------
+# Frontend - Static React SPA served from subdirectory
+# -----------------------------------------------------------------------------
+
+# Serve frontend from /video-accessibility subdirectory
+Alias /video-accessibility /var/www/html/video-accessibility
+
+
+ # Basic options
+ Options -Indexes +FollowSymLinks
+ AllowOverride All
+ Require all granted
+
+ # React SPA routing - rewrite all requests to index.html
+ RewriteEngine On
+ RewriteBase /video-accessibility
+
+ # Don't rewrite files or directories that exist
+ RewriteCond %{REQUEST_FILENAME} !-f
+ RewriteCond %{REQUEST_FILENAME} !-d
+
+ # Rewrite everything else to index.html
+ RewriteRule ^ /video-accessibility/index.html [L]
+
+ # Security headers
+ Header always set X-Frame-Options "SAMEORIGIN"
+ Header always set X-Content-Type-Options "nosniff"
+ Header always set X-XSS-Protection "1; mode=block"
+ Header always set Referrer-Policy "strict-origin-when-cross-origin"
+
+ # Cache control for static assets
+
+ Header set Cache-Control "public, max-age=31536000, immutable"
+
+
+ # No cache for HTML files
+
+ Header set Cache-Control "no-cache, no-store, must-revalidate"
+ Header set Pragma "no-cache"
+ Header set Expires "0"
+
+
+
+# -----------------------------------------------------------------------------
+# Backend API - Reverse proxy to Docker container
+# -----------------------------------------------------------------------------
+
+# Proxy backend API to Docker container on port 8000
+
+ # Preserve original host header
+ ProxyPreserveHost On
+
+ # Proxy HTTP requests
+ ProxyPass http://localhost:8000
+ ProxyPassReverse http://localhost:8000
+
+ # Proxy timeout settings (important for long-running video processing)
+ ProxyTimeout 300
+
+ # WebSocket support (CRITICAL for real-time job updates)
+ RewriteEngine On
+ RewriteCond %{HTTP:Upgrade} =websocket [NC]
+ RewriteRule /video-accessibility-back/(.*) ws://localhost:8000/$1 [P,L]
+ RewriteCond %{HTTP:Upgrade} !=websocket [NC]
+ RewriteRule /video-accessibility-back/(.*) http://localhost:8000/$1 [P,L]
+
+ # Security headers
+ Header always set X-Frame-Options "SAMEORIGIN"
+ Header always set X-Content-Type-Options "nosniff"
+
+ # CORS is handled by the backend, don't add headers here
+
+
+# -----------------------------------------------------------------------------
+# Required Apache Modules
+# -----------------------------------------------------------------------------
+
+# Enable these modules with:
+# sudo a2enmod rewrite
+# sudo a2enmod proxy
+# sudo a2enmod proxy_http
+# sudo a2enmod proxy_wstunnel
+# sudo a2enmod headers
+# sudo systemctl restart apache2
+
+# Verify modules are enabled:
+# apache2ctl -M | grep -E '(rewrite|proxy|headers)'
+
+# =============================================================================
+# Full VirtualHost Example
+# =============================================================================
+
+# Example of complete VirtualHost configuration:
+#
+#
+# ServerName ai-sandbox.oliver.solutions
+# ServerAdmin admin@oliver.solutions
+#
+# DocumentRoot /var/www/html
+#
+# # SSL Configuration (with wildcard cert)
+# SSLEngine on
+# SSLCertificateFile /path/to/wildcard-ai-sandbox.oliver.solutions.crt
+# SSLCertificateKeyFile /path/to/wildcard-ai-sandbox.oliver.solutions.key
+# SSLCertificateChainFile /path/to/chain.crt # If needed
+#
+# # SSL Protocol and Cipher settings
+# SSLProtocol all -SSLv2 -SSLv3 -TLSv1 -TLSv1.1
+# SSLCipherSuite HIGH:!aNULL:!MD5
+#
+# # Frontend configuration (from above)
+# Alias /video-accessibility /var/www/html/video-accessibility
+#
+# ...
+#
+#
+# # Backend API configuration (from above)
+#
+# ...
+#
+#
+# # Logging
+# ErrorLog ${APACHE_LOG_DIR}/ai-sandbox-error.log
+# CustomLog ${APACHE_LOG_DIR}/ai-sandbox-access.log combined
+#
+
+# =============================================================================
+# Testing & Verification
+# =============================================================================
+
+# Test Apache configuration:
+# sudo apache2ctl configtest
+#
+# Restart Apache:
+# sudo systemctl restart apache2
+#
+# Test frontend:
+# curl -I https://ai-sandbox.oliver.solutions/video-accessibility
+#
+# Test backend:
+# curl https://ai-sandbox.oliver.solutions/video-accessibility-back/health
+#
+# Test WebSocket (requires wscat):
+# wscat -c wss://ai-sandbox.oliver.solutions/video-accessibility-back/api/v1/ws/job-list
+
+# =============================================================================
+# Troubleshooting
+# =============================================================================
+
+# Check Apache logs:
+# sudo tail -f /var/log/apache2/ai-sandbox-error.log
+# sudo tail -f /var/log/apache2/ai-sandbox-access.log
+#
+# Check if backend is running:
+# curl http://localhost:8000/health
+#
+# Check Docker containers:
+# cd /opt/accessible-video
+# docker-compose ps
+#
+# Common issues:
+# - 502 Bad Gateway: Backend container not running
+# - 404 Not Found: Frontend not deployed or Apache alias incorrect
+# - WebSocket fails: mod_proxy_wstunnel not enabled
+# - CORS errors: Check backend CORS configuration, not Apache
diff --git a/backend/.dockerignore b/backend/.dockerignore
index 53317a1..c1aebe5 100644
--- a/backend/.dockerignore
+++ b/backend/.dockerignore
@@ -1,9 +1,25 @@
-# Python
+# =============================================================================
+# .dockerignore for Accessible Video Backend
+# Excludes unnecessary files from Docker build context
+# =============================================================================
+
+# Python cache and compiled files
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
+
+# Virtual environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Build artifacts
build/
develop-eggs/
dist/
@@ -21,33 +37,13 @@ wheels/
*.egg
MANIFEST
-# Poetry (keep poetry.lock for reproducible builds)
-# poetry.lock
-
-# Virtual environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# IDE
+# IDE and editor files
.vscode/
.idea/
*.swp
*.swo
*~
-
-# OS
.DS_Store
-.DS_Store?
-._*
-.Spotlight-V100
-.Trashes
-ehthumbs.db
-Thumbs.db
# Testing
.coverage
@@ -58,6 +54,18 @@ htmlcov/
coverage.xml
*.cover
.hypothesis/
+tests/
+test_*.py
+
+# Git
+.git/
+.gitignore
+.github/
+
+# Docker files (don't copy Docker files into the image)
+Dockerfile*
+.dockerignore
+docker-compose*.yml
# Documentation
docs/
@@ -68,25 +76,33 @@ README*
*.log
logs/
-# Git
-.git/
-.gitignore
-
-# Docker
-Dockerfile*
-.dockerignore
-docker-compose*
-
-# CI/CD
-.github/
-
-# Local development
-.env.local
-.env.development
-.env.test
-
# Temporary files
tmp/
temp/
*.tmp
-*.bak
\ No newline at end of file
+*.bak
+
+# CI/CD
+.github/
+
+# Environment files (will be mounted or set via docker-compose)
+.env*
+
+# Old/backup files
+*.old
+
+# Development scripts (not needed in production)
+debug_*.py
+test_*.py
+simple_*.py
+create_test_users.py
+setup_secrets.py
+
+# Keep these important files:
+# - pyproject.toml (dependencies)
+# - poetry.lock (locked versions)
+# - gunicorn_conf.py (API server config)
+# - celery_worker.py (worker entry point)
+# - migrate.py (database migrations)
+# - app/ directory (application code)
+# - optical-414516-80e2475f6412.json (will be mounted as volume, but ok if copied)
diff --git a/backend/.dockerignore.old b/backend/.dockerignore.old
new file mode 100644
index 0000000..53317a1
--- /dev/null
+++ b/backend/.dockerignore.old
@@ -0,0 +1,92 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# Poetry (keep poetry.lock for reproducible builds)
+# poetry.lock
+
+# Virtual environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Testing
+.coverage
+.pytest_cache/
+.mypy_cache/
+.tox/
+htmlcov/
+coverage.xml
+*.cover
+.hypothesis/
+
+# Documentation
+docs/
+*.md
+README*
+
+# Logs
+*.log
+logs/
+
+# Git
+.git/
+.gitignore
+
+# Docker
+Dockerfile*
+.dockerignore
+docker-compose*
+
+# CI/CD
+.github/
+
+# Local development
+.env.local
+.env.development
+.env.test
+
+# Temporary files
+tmp/
+temp/
+*.tmp
+*.bak
\ No newline at end of file
diff --git a/backend/Dockerfile b/backend/Dockerfile
index 2fa68fb..7750671 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -1,18 +1,29 @@
-# Build stage - Install dependencies and build wheels
+# =============================================================================
+# Multi-stage Dockerfile for Accessible Video Processing Platform
+# =============================================================================
+# Stage 1: Builder - Install dependencies
+# Stage 2: Base - Common runtime for API and Worker
+# Stage 3: API - FastAPI + Gunicorn (no ffmpeg)
+# Stage 4: Worker - Celery worker (with ffmpeg for video processing)
+# =============================================================================
+
+# -----------------------------------------------------------------------------
+# Stage 1: Builder - Install Python dependencies using Poetry
+# -----------------------------------------------------------------------------
FROM python:3.11-slim AS builder
# Install build dependencies
-RUN apt-get update && apt-get install -y \
+RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
&& rm -rf /var/lib/apt/lists/*
# Install Poetry
-RUN pip install poetry==1.8.2
+RUN pip install --no-cache-dir poetry==1.8.2
-# Set Poetry configuration
+# Configure Poetry to not create virtual environment (we're in a container)
ENV POETRY_NO_INTERACTION=1 \
- POETRY_VENV_IN_PROJECT=1 \
+ POETRY_VIRTUALENVS_CREATE=false \
POETRY_CACHE_DIR=/tmp/poetry_cache
WORKDIR /app
@@ -20,108 +31,99 @@ WORKDIR /app
# Copy dependency files
COPY pyproject.toml poetry.lock ./
-# Install dependencies into venv
-RUN poetry config virtualenvs.in-project true && \
- poetry lock --no-update || true && \
- poetry install --only=main --no-root && \
- rm -rf $POETRY_CACHE_DIR
+# Install dependencies (using pip for simpler container)
+# Export to requirements.txt and install with pip for smaller image
+RUN poetry export -f requirements.txt --output requirements.txt --without-hashes \
+ && pip install --no-cache-dir --user -r requirements.txt \
+ && rm -rf $POETRY_CACHE_DIR
-# Base runtime stage
+# -----------------------------------------------------------------------------
+# Stage 2: Base - Common runtime environment
+# -----------------------------------------------------------------------------
FROM python:3.11-slim AS base
-# Install runtime system dependencies
-RUN apt-get update && apt-get install -y \
- ffmpeg \
+# Install common runtime dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ libmagic1 \
curl \
tini \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
-# Create non-root user
+# Create non-root user for security
RUN groupadd --gid 1000 app \
&& useradd --uid 1000 --gid app --shell /bin/bash --create-home app
-# Set working directory
+# Copy Python packages from builder
+COPY --from=builder /root/.local /home/app/.local
+
+# Set PATH to include user packages
+ENV PATH="/home/app/.local/bin:$PATH" \
+ PYTHONPATH=/app \
+ PYTHONUNBUFFERED=1 \
+ PYTHONDONTWRITEBYTECODE=1
+
WORKDIR /app
-# Copy virtual environment from builder stage
-COPY --from=builder --chown=app:app /app/.venv /app/.venv
-
-# Ensure venv is in PATH
-ENV PATH="/app/.venv/bin:$PATH"
-
# Copy application code
COPY --chown=app:app . .
# Switch to non-root user
USER app
-# Production API stage
-FROM base AS production
+# -----------------------------------------------------------------------------
+# Stage 3: API - FastAPI + Gunicorn (Production API Server)
+# -----------------------------------------------------------------------------
+FROM base AS api
-# Set environment variables for production
-ENV APP_ENV=prod \
- PYTHONPATH=/app \
- PYTHONUNBUFFERED=1 \
- PYTHONDONTWRITEBYTECODE=1
+# Set production environment variables
+ENV APP_ENV=prod
-# Health check
+# Health check for API
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
-# Expose port
+# Expose API port
EXPOSE 8000
# Use tini as init system for proper signal handling
ENTRYPOINT ["tini", "--"]
-# Default command for API server
-CMD ["gunicorn", "-c", "gunicorn_conf.py"]
+# Start Gunicorn with Uvicorn workers
+CMD ["gunicorn", "-c", "gunicorn_conf.py", "app.main:app"]
-# Worker stage for Celery workers
+# -----------------------------------------------------------------------------
+# Stage 4: Worker - Celery Worker (with ffmpeg for video processing)
+# -----------------------------------------------------------------------------
FROM base AS worker
-# Set environment variables for worker
+# Switch back to root to install ffmpeg
+USER root
+
+# Install ffmpeg for video processing
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ ffmpeg \
+ && rm -rf /var/lib/apt/lists/* \
+ && apt-get clean
+
+# Switch back to non-root user
+USER app
+
+# Set production environment variables
ENV APP_ENV=prod \
- PYTHONPATH=/app \
- PYTHONUNBUFFERED=1 \
- PYTHONDONTWRITEBYTECODE=1 \
- C_FORCE_ROOT=1
+ C_FORCE_ROOT=0
# Health check for worker (check if Celery is responding)
HEALTHCHECK --interval=60s --timeout=15s --start-period=10s --retries=3 \
- CMD python -c "from celery import Celery; app=Celery('app'); print('Worker healthy')" || exit 1
+ CMD python -c "from celery import Celery; app=Celery('accessible-video-tasks', broker='redis://redis:6379/0'); app.control.inspect().ping() or exit(1)" || exit 1
# Use tini as init system for proper signal handling
ENTRYPOINT ["tini", "--"]
-# Default command for Celery worker
-CMD ["celery", "-A", "app.tasks", "worker", "--loglevel=info", "--concurrency=1"]
-
-# Development stage with dev dependencies
-FROM builder AS development
-
-# Install all dependencies including dev
-RUN poetry install --no-root && rm -rf $POETRY_CACHE_DIR
-
-# Install additional dev tools
-RUN apt-get update && apt-get install -y \
- git \
- vim \
- && rm -rf /var/lib/apt/lists/*
-
-# Copy application code
-COPY --chown=app:app . .
-
-# Switch to non-root user
-USER app
-
-# Set environment for development
-ENV APP_ENV=dev \
- PYTHONPATH=/app \
- PYTHONUNBUFFERED=1
-
-EXPOSE 8000
-
-# Development command with hot reload
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
\ No newline at end of file
+# Start Celery worker listening to all queues
+# --concurrency=4 for 4 worker processes (adjust based on CPU cores available)
+CMD ["celery", "-A", "celery_worker", "worker", \
+ "-Q", "default,ingest,notify", \
+ "--loglevel=info", \
+ "--concurrency=4", \
+ "--max-tasks-per-child=100"]
diff --git a/backend/Dockerfile.old b/backend/Dockerfile.old
new file mode 100644
index 0000000..2fa68fb
--- /dev/null
+++ b/backend/Dockerfile.old
@@ -0,0 +1,127 @@
+# Build stage - Install dependencies and build wheels
+FROM python:3.11-slim AS builder
+
+# Install build dependencies
+RUN apt-get update && apt-get install -y \
+ build-essential \
+ curl \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install Poetry
+RUN pip install poetry==1.8.2
+
+# Set Poetry configuration
+ENV POETRY_NO_INTERACTION=1 \
+ POETRY_VENV_IN_PROJECT=1 \
+ POETRY_CACHE_DIR=/tmp/poetry_cache
+
+WORKDIR /app
+
+# Copy dependency files
+COPY pyproject.toml poetry.lock ./
+
+# Install dependencies into venv
+RUN poetry config virtualenvs.in-project true && \
+ poetry lock --no-update || true && \
+ poetry install --only=main --no-root && \
+ rm -rf $POETRY_CACHE_DIR
+
+# Base runtime stage
+FROM python:3.11-slim AS base
+
+# Install runtime system dependencies
+RUN apt-get update && apt-get install -y \
+ ffmpeg \
+ curl \
+ tini \
+ && rm -rf /var/lib/apt/lists/* \
+ && apt-get clean
+
+# Create non-root user
+RUN groupadd --gid 1000 app \
+ && useradd --uid 1000 --gid app --shell /bin/bash --create-home app
+
+# Set working directory
+WORKDIR /app
+
+# Copy virtual environment from builder stage
+COPY --from=builder --chown=app:app /app/.venv /app/.venv
+
+# Ensure venv is in PATH
+ENV PATH="/app/.venv/bin:$PATH"
+
+# Copy application code
+COPY --chown=app:app . .
+
+# Switch to non-root user
+USER app
+
+# Production API stage
+FROM base AS production
+
+# Set environment variables for production
+ENV APP_ENV=prod \
+ PYTHONPATH=/app \
+ PYTHONUNBUFFERED=1 \
+ PYTHONDONTWRITEBYTECODE=1
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+ CMD curl -f http://localhost:8000/health || exit 1
+
+# Expose port
+EXPOSE 8000
+
+# Use tini as init system for proper signal handling
+ENTRYPOINT ["tini", "--"]
+
+# Default command for API server
+CMD ["gunicorn", "-c", "gunicorn_conf.py"]
+
+# Worker stage for Celery workers
+FROM base AS worker
+
+# Set environment variables for worker
+ENV APP_ENV=prod \
+ PYTHONPATH=/app \
+ PYTHONUNBUFFERED=1 \
+ PYTHONDONTWRITEBYTECODE=1 \
+ C_FORCE_ROOT=1
+
+# Health check for worker (check if Celery is responding)
+HEALTHCHECK --interval=60s --timeout=15s --start-period=10s --retries=3 \
+ CMD python -c "from celery import Celery; app=Celery('app'); print('Worker healthy')" || exit 1
+
+# Use tini as init system for proper signal handling
+ENTRYPOINT ["tini", "--"]
+
+# Default command for Celery worker
+CMD ["celery", "-A", "app.tasks", "worker", "--loglevel=info", "--concurrency=1"]
+
+# Development stage with dev dependencies
+FROM builder AS development
+
+# Install all dependencies including dev
+RUN poetry install --no-root && rm -rf $POETRY_CACHE_DIR
+
+# Install additional dev tools
+RUN apt-get update && apt-get install -y \
+ git \
+ vim \
+ && rm -rf /var/lib/apt/lists/*
+
+# Copy application code
+COPY --chown=app:app . .
+
+# Switch to non-root user
+USER app
+
+# Set environment for development
+ENV APP_ENV=dev \
+ PYTHONPATH=/app \
+ PYTHONUNBUFFERED=1
+
+EXPOSE 8000
+
+# Development command with hot reload
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
\ No newline at end of file
diff --git a/backend/app/api/v1/__pycache__/routes_websockets.cpython-313.pyc b/backend/app/api/v1/__pycache__/routes_websockets.cpython-313.pyc
index 8bcdd45..acdb4bb 100644
Binary files a/backend/app/api/v1/__pycache__/routes_websockets.cpython-313.pyc and b/backend/app/api/v1/__pycache__/routes_websockets.cpython-313.pyc differ
diff --git a/backend/app/api/v1/routes_websockets.py b/backend/app/api/v1/routes_websockets.py
index 8b5b360..b1e3180 100644
--- a/backend/app/api/v1/routes_websockets.py
+++ b/backend/app/api/v1/routes_websockets.py
@@ -111,7 +111,7 @@ async def websocket_job_status(
except Exception as e:
logger.error(f"WebSocket job status error: {e}")
finally:
- manager.disconnect(websocket, user_id)
+ await manager.disconnect(websocket, user_id)
@router.websocket("/ws/jobs")
@@ -191,7 +191,7 @@ async def websocket_job_list(
except Exception as e:
logger.error(f"WebSocket job list error: {e}")
finally:
- manager.disconnect(websocket, user_id)
+ await manager.disconnect(websocket, user_id)
@router.get("/ws/status")
diff --git a/backend/app/core/__pycache__/config.cpython-313.pyc b/backend/app/core/__pycache__/config.cpython-313.pyc
index d7c16c0..575521b 100644
Binary files a/backend/app/core/__pycache__/config.cpython-313.pyc and b/backend/app/core/__pycache__/config.cpython-313.pyc differ
diff --git a/backend/app/core/config.py b/backend/app/core/config.py
index fb1fc91..5f6397b 100644
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -63,7 +63,7 @@ class Settings(BaseSettings):
otel_exporter_otlp_endpoint: str = ""
# CORS
- cors_origins: list[str] = ["http://localhost:5173", "http://localhost:3000"]
+ cors_origins: list[str] = ["http://localhost:5173", "http://localhost:5174", "http://localhost:3000"]
class Config:
env_file = ".env"
diff --git a/backend/app/services/__pycache__/gemini.cpython-313.pyc b/backend/app/services/__pycache__/gemini.cpython-313.pyc
index d960f56..1e7d0c5 100644
Binary files a/backend/app/services/__pycache__/gemini.cpython-313.pyc and b/backend/app/services/__pycache__/gemini.cpython-313.pyc differ
diff --git a/backend/app/services/__pycache__/websocket.cpython-313.pyc b/backend/app/services/__pycache__/websocket.cpython-313.pyc
index 7b256d8..9507a99 100644
Binary files a/backend/app/services/__pycache__/websocket.cpython-313.pyc and b/backend/app/services/__pycache__/websocket.cpython-313.pyc differ
diff --git a/backend/app/services/__pycache__/websocket_publisher.cpython-313.pyc b/backend/app/services/__pycache__/websocket_publisher.cpython-313.pyc
new file mode 100644
index 0000000..ddeadba
Binary files /dev/null and b/backend/app/services/__pycache__/websocket_publisher.cpython-313.pyc differ
diff --git a/backend/app/services/gemini.py b/backend/app/services/gemini.py
index a9d01e8..12d0053 100644
--- a/backend/app/services/gemini.py
+++ b/backend/app/services/gemini.py
@@ -34,8 +34,8 @@ class GeminiService:
while total_waited < max_wait_seconds:
try:
- # Get file status
- file_info = client.files.get(name=file_name)
+ # Get file status - use asyncio.to_thread to avoid blocking event loop
+ file_info = await asyncio.to_thread(client.files.get, name=file_name)
logger.info(f"File {file_name} status: {file_info.state} (waited {total_waited}s)")
if file_info.state == "ACTIVE":
@@ -65,13 +65,15 @@ class GeminiService:
Returns structured JSON with transcript, captions VTT, and audio description VTT
"""
prompt = self._load_prompt("gemini_ingestion.md")
+ uploaded_file = None
try:
logger.info(f"Starting Gemini processing for video: {video_file_path}")
- # Upload video file to Gemini using new API
+ # Upload video file to Gemini using new API - use asyncio.to_thread to avoid blocking
logger.info("Uploading video file to Gemini API...")
- uploaded_file = client.files.upload(
+ uploaded_file = await asyncio.to_thread(
+ client.files.upload,
file=video_file_path,
config={
"display_name": f"video_processing_{Path(video_file_path).name}",
@@ -86,9 +88,10 @@ class GeminiService:
if not file_ready:
raise Exception("File failed to become ACTIVE within timeout")
- # Generate content using new API
+ # Generate content using new API - use asyncio.to_thread to avoid blocking
logger.info("Generating content with Gemini model...")
- response = client.models.generate_content(
+ response = await asyncio.to_thread(
+ client.models.generate_content,
model=self.model_name,
contents=[
genai.types.Part.from_text(text=prompt),
@@ -144,12 +147,6 @@ class GeminiService:
f"Successfully extracted accessibility content with confidence: {result['confidence']}"
)
- # Clean up uploaded file
- try:
- client.files.delete(name=uploaded_file.name)
- except Exception as e:
- logger.warning(f"Failed to cleanup uploaded file: {e}")
-
return result
except json.JSONDecodeError as e:
@@ -163,6 +160,14 @@ class GeminiService:
# Print to stdout for immediate visibility
print(f"π¨ GEMINI ERROR: {type(e).__name__}: {str(e)}")
raise
+ finally:
+ # Guaranteed cleanup of uploaded file regardless of success/failure/cancellation
+ if uploaded_file:
+ try:
+ await asyncio.to_thread(client.files.delete, name=uploaded_file.name)
+ logger.info(f"Successfully cleaned up uploaded file: {uploaded_file.name}")
+ except Exception as e:
+ logger.warning(f"Failed to cleanup uploaded file {uploaded_file.name}: {e}")
async def _self_heal_response(self, video_file_path: str, invalid_response: str) -> dict[str, Any]:
"""Attempt to self-heal invalid JSON response from Gemini"""
@@ -196,7 +201,8 @@ Fix the JSON and return it:
"""
try:
- response = client.models.generate_content(
+ response = await asyncio.to_thread(
+ client.models.generate_content,
model=self.model_name,
contents=[genai.types.Part.from_text(text=self_heal_prompt)]
)
@@ -316,7 +322,8 @@ JSON:
"""
try:
- response = client.models.generate_content(
+ response = await asyncio.to_thread(
+ client.models.generate_content,
model=self.model_name,
contents=[
genai.types.Part.from_text(text=prompt + "\n\n" + user_prompt)
diff --git a/backend/app/services/websocket.py b/backend/app/services/websocket.py
index 4e055c3..c08d572 100644
--- a/backend/app/services/websocket.py
+++ b/backend/app/services/websocket.py
@@ -8,16 +8,13 @@ message broadcasting across multiple worker processes.
import asyncio
import json
import logging
-from typing import Dict, List, Set, Optional, Any
from datetime import datetime
+from typing import Any, Optional
-from fastapi import WebSocket, WebSocketDisconnect
import redis.asyncio as redis
-import redis as sync_redis
+from fastapi import WebSocket
from pydantic import BaseModel
-from ..core.redis import get_redis_client
-from ..core.security import decode_token
from ..core.config import settings
logger = logging.getLogger(__name__)
@@ -31,46 +28,49 @@ class JobStatusUpdate(BaseModel):
job_title: Optional[str] = None # Job title for better user experience
message: Optional[str] = None
progress: Optional[int] = None # 0-100 percentage
- metadata: Optional[Dict[str, Any]] = None
+ metadata: Optional[dict[str, Any]] = None
+ eligible_users: Optional[set[str]] = None # Pre-computed eligible users
class ConnectionManager:
"""Manages WebSocket connections and Redis pub/sub for job status updates"""
-
+
def __init__(self):
- # Active WebSocket connections by user_id
- self.active_connections: Dict[str, Set[WebSocket]] = {}
- # Job subscriptions: job_id -> set of user_ids
- self.job_subscriptions: Dict[str, Set[str]] = {}
- # Global job list subscriptions by user_id
- self.global_subscriptions: Set[str] = set()
+ # WebSocket connections by user_id
+ self.user_ws: dict[str, set[WebSocket]] = {}
+ # WebSocket metadata: websocket -> {user_id, jobs, scopes}
+ self.ws_meta: dict[WebSocket, dict[str, Any]] = {}
+ # Job subscriptions: job_id -> set of websockets
+ self.job_ws: dict[str, set[WebSocket]] = {}
+ # Lock for thread safety
+ self.lock = asyncio.Lock()
# Redis client for pub/sub
self.redis_client: Optional[redis.Redis] = None
self.pubsub: Optional[redis.client.PubSub] = None
self.subscriber_task: Optional[asyncio.Task] = None
-
+
async def start(self):
"""Initialize Redis pub/sub subscriber"""
try:
- self.redis_client = await redis.from_url(
+ self.redis_client = redis.from_url(
settings.redis_url,
encoding="utf-8",
decode_responses=True
)
self.pubsub = self.redis_client.pubsub()
-
+
# Subscribe to job status channels
await self.pubsub.subscribe("job_status_updates") # Global channel
await self.pubsub.psubscribe("job_status_updates:*") # Pattern for individual job channels
-
+
# Start background task to handle Redis messages
self.subscriber_task = asyncio.create_task(self._redis_subscriber())
logger.info("WebSocket connection manager started")
-
+
except Exception as e:
logger.error(f"Failed to start WebSocket connection manager: {e}")
raise
-
+
async def stop(self):
"""Cleanup Redis connections"""
if self.subscriber_task:
@@ -79,146 +79,170 @@ class ConnectionManager:
await self.subscriber_task
except asyncio.CancelledError:
pass
-
+
if self.pubsub:
await self.pubsub.unsubscribe()
await self.pubsub.punsubscribe()
await self.pubsub.aclose()
-
+
if self.redis_client:
await self.redis_client.aclose()
-
+
logger.info("WebSocket connection manager stopped")
-
+
async def connect_job_status(self, websocket: WebSocket, user_id: str, job_id: str):
"""Connect a WebSocket for specific job status updates"""
await websocket.accept()
-
- # Add connection to active connections
- if user_id not in self.active_connections:
- self.active_connections[user_id] = set()
- self.active_connections[user_id].add(websocket)
-
- # Add job subscription
- if job_id not in self.job_subscriptions:
- self.job_subscriptions[job_id] = set()
- self.job_subscriptions[job_id].add(user_id)
-
+
+ async with self.lock:
+ # Add to user connections
+ if user_id not in self.user_ws:
+ self.user_ws[user_id] = set()
+ self.user_ws[user_id].add(websocket)
+
+ # Initialize/update websocket metadata
+ if websocket not in self.ws_meta:
+ self.ws_meta[websocket] = {
+ "user_id": user_id,
+ "jobs": set(),
+ "scopes": set()
+ }
+ self.ws_meta[websocket]["jobs"].add(job_id)
+
+ # Add to job subscriptions
+ if job_id not in self.job_ws:
+ self.job_ws[job_id] = set()
+ self.job_ws[job_id].add(websocket)
+
logger.info(f"User {user_id} connected for job {job_id} status updates")
-
+
# Send initial connection confirmation
await self._send_to_websocket(websocket, {
"type": "connection_established",
"job_id": job_id,
"timestamp": datetime.utcnow().isoformat()
})
-
+
async def connect_job_list(self, websocket: WebSocket, user_id: str):
"""Connect a WebSocket for job list updates (all jobs for a user)"""
await websocket.accept()
-
- # Add connection to active connections
- if user_id not in self.active_connections:
- self.active_connections[user_id] = set()
- self.active_connections[user_id].add(websocket)
-
- # Add to global subscriptions
- self.global_subscriptions.add(user_id)
-
+
+ async with self.lock:
+ # Add to user connections
+ if user_id not in self.user_ws:
+ self.user_ws[user_id] = set()
+ self.user_ws[user_id].add(websocket)
+
+ # Initialize/update websocket metadata
+ if websocket not in self.ws_meta:
+ self.ws_meta[websocket] = {
+ "user_id": user_id,
+ "jobs": set(),
+ "scopes": set()
+ }
+ self.ws_meta[websocket]["scopes"].add("job_list")
+
logger.info(f"User {user_id} connected for job list updates")
-
+
# Send initial connection confirmation
await self._send_to_websocket(websocket, {
"type": "connection_established",
"scope": "job_list",
"timestamp": datetime.utcnow().isoformat()
})
-
- def disconnect(self, websocket: WebSocket, user_id: str):
+
+ async def disconnect(self, websocket: WebSocket, user_id: str):
"""Disconnect a WebSocket and clean up subscriptions"""
- # Remove from active connections
- if user_id in self.active_connections:
- self.active_connections[user_id].discard(websocket)
- if not self.active_connections[user_id]:
- del self.active_connections[user_id]
-
- # Remove from global subscriptions if no connections left
- if user_id not in self.active_connections:
- self.global_subscriptions.discard(user_id)
-
+ async with self.lock:
+ # Get websocket metadata
+ meta = self.ws_meta.pop(websocket, None)
+ if not meta:
+ return
+
# Remove from job subscriptions
- for job_id in list(self.job_subscriptions.keys()):
- self.job_subscriptions[job_id].discard(user_id)
- if not self.job_subscriptions[job_id]:
- del self.job_subscriptions[job_id]
-
+ for job_id in meta.get("jobs", set()):
+ if job_id in self.job_ws:
+ self.job_ws[job_id].discard(websocket)
+ if not self.job_ws[job_id]:
+ del self.job_ws[job_id]
+
+ # Remove from user connections
+ if user_id in self.user_ws:
+ self.user_ws[user_id].discard(websocket)
+ if not self.user_ws[user_id]:
+ del self.user_ws[user_id]
+
logger.info(f"User {user_id} disconnected from WebSocket")
-
+
async def broadcast_job_status_update(
- self,
- job_id: str,
- status: str,
+ self,
+ job_id: str,
+ status: str,
job_title: Optional[str] = None,
- user_id: Optional[str] = None,
message: Optional[str] = None,
progress: Optional[int] = None,
- metadata: Optional[Dict[str, Any]] = None
+ metadata: Optional[dict[str, Any]] = None
):
"""
- Broadcast job status update to Redis pub/sub
- This will be called from Celery workers
+ Async wrapper for broadcasting job status updates from API routes
+ For Celery workers, use websocket_publisher.publish_job_update_with_eligibility() directly
"""
- update = JobStatusUpdate(
- job_id=job_id,
- status=status,
- updated_at=datetime.utcnow(),
- job_title=job_title,
- message=message,
- progress=progress,
- metadata=metadata
- )
-
- try:
- # Create a synchronous Redis client for Celery workers
- redis_client = sync_redis.Redis.from_url(
- settings.redis_url,
- encoding="utf-8",
- decode_responses=True
+ import asyncio
+ from concurrent.futures import ThreadPoolExecutor
+
+ from .websocket_publisher import publish_job_update_with_eligibility
+
+ # Run the sync publisher in a thread pool
+ loop = asyncio.get_event_loop()
+ with ThreadPoolExecutor(max_workers=1) as executor:
+ await loop.run_in_executor(
+ executor,
+ publish_job_update_with_eligibility,
+ job_id,
+ status,
+ job_title,
+ message,
+ progress,
+ metadata
)
-
- # Publish to global channel
- redis_client.publish(
- "job_status_updates",
- update.model_dump_json()
- )
-
- # Publish to specific job channel
- redis_client.publish(
- f"job_status_updates:{job_id}",
- update.model_dump_json()
- )
-
- # Close the connection
- redis_client.close()
-
- logger.debug(f"Broadcasted status update for job {job_id}: {status}")
-
- except Exception as e:
- logger.error(f"Failed to broadcast job status update: {e}")
-
+
async def _redis_subscriber(self):
- """Background task to handle Redis pub/sub messages"""
- try:
- async for message in self.pubsub.listen():
- # Handle both regular messages and pattern messages
- if message["type"] in ("message", "pmessage"):
- await self._handle_redis_message(message)
- except asyncio.CancelledError:
- logger.info("Redis subscriber task cancelled")
- except Exception as e:
- logger.error(f"Redis subscriber error: {e}")
-
- async def _handle_redis_message(self, message: Dict[str, Any]):
+ """Background task to handle Redis pub/sub messages with reconnection logic"""
+ delay = 1 # Start with 1 second delay
+ max_delay = 30 # Maximum delay of 30 seconds
+
+ while True:
+ try:
+ # (Re)create pubsub connection
+ if self.pubsub:
+ try:
+ await self.pubsub.aclose()
+ except Exception:
+ pass
+
+ self.pubsub = self.redis_client.pubsub()
+
+ # Subscribe to channels
+ await self.pubsub.subscribe("job_status_updates")
+ await self.pubsub.psubscribe("job_status_updates:*")
+
+ logger.info("Redis subscriber connected and subscribed")
+ delay = 1 # Reset delay on successful connection
+
+ # Listen for messages
+ async for message in self.pubsub.listen():
+ if message["type"] in ("message", "pmessage"):
+ await self._handle_redis_message(message)
+
+ except asyncio.CancelledError:
+ logger.info("Redis subscriber task cancelled")
+ break
+ except Exception as e:
+ logger.error(f"Redis subscriber error, retrying in {delay}s: {e}")
+ await asyncio.sleep(delay)
+ delay = min(delay * 2, max_delay) # Exponential backoff
+
+ async def _handle_redis_message(self, message: dict[str, Any]):
"""Handle incoming Redis pub/sub message"""
try:
# For pattern messages, the channel is in the "channel" field
@@ -226,120 +250,135 @@ class ConnectionManager:
channel = message["channel"]
data = json.loads(message["data"])
update = JobStatusUpdate(**data)
-
+
logger.debug(f"Received Redis message on channel '{channel}': {data}")
-
+
# Send to specific job subscribers
if channel.startswith("job_status_updates:"):
job_id = channel.split(":", 1)[1]
logger.debug(f"Sending job status update for job {job_id} to subscribers")
await self._send_job_status_to_subscribers(job_id, update)
-
+
# Send to global subscribers (job list updates)
elif channel == "job_status_updates":
- logger.debug(f"Sending global job status update to subscribers")
+ logger.debug("Sending global job status update to subscribers")
await self._send_job_status_to_global_subscribers(update)
-
+
except Exception as e:
logger.error(f"Failed to handle Redis message: {e}")
-
+
async def _send_job_status_to_subscribers(self, job_id: str, update: JobStatusUpdate):
"""Send job status update to specific job subscribers"""
- if job_id not in self.job_subscriptions:
+ async with self.lock:
+ target_websockets = list(self.job_ws.get(job_id, set()))
+
+ if not target_websockets:
return
-
+
# Convert to JSON-serializable dict
message = {
"type": "job_status_update",
- "data": json.loads(update.model_dump_json())
+ "data": update.model_dump(mode="json")
}
-
- for user_id in list(self.job_subscriptions[job_id]):
- await self._send_to_user(user_id, message)
-
+
+ await self._send_to_websockets(target_websockets, message)
+
async def _send_job_status_to_global_subscribers(self, update: JobStatusUpdate):
"""Send job status update to global (job list) subscribers with user filtering"""
# Convert to JSON-serializable dict
+ message_data = update.model_dump(mode="json")
+ # Remove eligible_users from the client message
+ message_data.pop("eligible_users", None)
message = {
- "type": "job_list_update",
- "data": json.loads(update.model_dump_json())
+ "type": "job_list_update",
+ "data": message_data
}
-
- # Get users who should receive this notification
- eligible_users = await self._get_job_related_users(update.job_id)
-
- # Only send to users who are both subscribed and have access to this job
- for user_id in list(self.global_subscriptions):
- if user_id in eligible_users:
- await self._send_to_user(user_id, message)
-
- async def _get_job_related_users(self, job_id: str) -> Set[str]:
+
+ # Use pre-computed eligible users if available, otherwise compute them
+ eligible_users = getattr(update, 'eligible_users', None)
+ if eligible_users is None:
+ eligible_users = await self._get_job_related_users(update.job_id)
+
+ # Find websockets for eligible users that have job_list scope
+ target_websockets = []
+ async with self.lock:
+ for user_id in eligible_users:
+ for websocket in self.user_ws.get(user_id, set()):
+ meta = self.ws_meta.get(websocket, {})
+ if "job_list" in meta.get("scopes", set()):
+ target_websockets.append(websocket)
+
+ await self._send_to_websockets(target_websockets, message)
+
+ async def _get_job_related_users(self, job_id: str) -> set[str]:
"""
Get all users who should receive notifications for a specific job.
Returns set of user IDs for:
- Job creator (client_id)
- - Reviewers who worked on the job
+ - Reviewers who worked on the job
- Admin users (see all jobs)
"""
eligible_users = set()
-
+
try:
# Import database connection
from ..core.database import get_database
db = await get_database()
-
+
# Get the job
job = await db["jobs"].find_one({"_id": job_id})
if not job:
logger.warning(f"Job {job_id} not found for notification filtering")
return eligible_users
-
+
# Add job creator
if job.get("client_id"):
eligible_users.add(job["client_id"])
-
+
# Add reviewers from review history
review = job.get("review", {})
if review.get("reviewer_id"):
eligible_users.add(review["reviewer_id"])
-
+
# Add reviewers from history
for history_item in review.get("history", []):
if history_item.get("by"):
eligible_users.add(history_item["by"])
-
+
# Add all admin users (they can see all jobs)
admin_users = db["users"].find({"role": "admin"})
async for admin_user in admin_users:
user_id = str(admin_user["_id"])
eligible_users.add(user_id)
-
+
logger.debug(f"Job {job_id} notification eligible users: {len(eligible_users)}")
-
+
except Exception as e:
logger.error(f"Error getting job related users for {job_id}: {e}")
-
+
return eligible_users
-
- async def _send_to_user(self, user_id: str, message: Dict[str, Any]):
- """Send message to all WebSocket connections for a user"""
- if user_id not in self.active_connections:
- return
-
- # Send to all connections for this user
- disconnected_connections = set()
- for websocket in list(self.active_connections[user_id]):
+
+ async def _send_to_websockets(self, websockets: list[WebSocket], message: dict[str, Any]):
+ """Send message to a list of WebSocket connections"""
+ disconnected_websockets = []
+
+ for websocket in websockets:
try:
await self._send_to_websocket(websocket, message)
except Exception as e:
- logger.warning(f"Failed to send to websocket for user {user_id}: {e}")
- disconnected_connections.add(websocket)
-
+ logger.warning(f"Failed to send to websocket: {e}")
+ disconnected_websockets.append(websocket)
+
# Clean up disconnected connections
- for websocket in disconnected_connections:
- self.disconnect(websocket, user_id)
-
- async def _send_to_websocket(self, websocket: WebSocket, message: Dict[str, Any]):
+ for websocket in disconnected_websockets:
+ # Get user_id from metadata before disconnecting
+ async with self.lock:
+ meta = self.ws_meta.get(websocket, {})
+ user_id = meta.get("user_id")
+ if user_id:
+ await self.disconnect(websocket, user_id)
+
+ async def _send_to_websocket(self, websocket: WebSocket, message: dict[str, Any]):
"""Send message to a specific WebSocket connection"""
try:
await websocket.send_json(message)
@@ -352,30 +391,6 @@ class ConnectionManager:
connection_manager = ConnectionManager()
-async def authenticate_websocket(websocket: WebSocket, token: str) -> Optional[str]:
- """
- Authenticate WebSocket connection using JWT token
- Returns user_id if valid, None if invalid
- """
- try:
- if not token:
- await websocket.close(code=4001, reason="Missing authentication token")
- return None
-
- # Decode JWT token
- payload = decode_token(token)
- if not payload or "sub" not in payload:
- await websocket.close(code=4001, reason="Invalid authentication token")
- return None
-
- return payload["sub"] # user_id
-
- except Exception as e:
- logger.warning(f"WebSocket authentication failed: {e}")
- await websocket.close(code=4001, reason="Authentication failed")
- return None
-
-
async def authenticate_websocket(websocket: WebSocket, token: Optional[str]) -> Optional[str]:
"""
Authenticate a WebSocket connection using a JWT token
@@ -386,10 +401,10 @@ async def authenticate_websocket(websocket: WebSocket, token: Optional[str]) ->
logger.warning("WebSocket authentication failed: Missing token")
await websocket.close(code=4001, reason="Missing authentication token")
return None
-
+
# Import JWT decode function
from ..core.security import decode_token
-
+
# Decode JWT token - this may raise HTTPException
try:
payload = decode_token(token)
@@ -397,7 +412,7 @@ async def authenticate_websocket(websocket: WebSocket, token: Optional[str]) ->
logger.warning("WebSocket authentication failed: Invalid token payload")
await websocket.close(code=4001, reason="Invalid authentication token")
return None
-
+
user_id = payload["sub"]
logger.info(f"WebSocket authentication successful for user: {user_id}")
return user_id
@@ -405,7 +420,7 @@ async def authenticate_websocket(websocket: WebSocket, token: Optional[str]) ->
logger.warning(f"WebSocket authentication failed: JWT decode error: {jwt_error}")
await websocket.close(code=4001, reason="Invalid authentication token")
return None
-
+
except Exception as e:
logger.error(f"WebSocket authentication failed with unexpected error: {e}")
await websocket.close(code=4001, reason="Authentication failed")
@@ -414,4 +429,4 @@ async def authenticate_websocket(websocket: WebSocket, token: Optional[str]) ->
async def get_connection_manager() -> ConnectionManager:
"""Dependency to get the connection manager"""
- return connection_manager
\ No newline at end of file
+ return connection_manager
diff --git a/backend/app/services/websocket_publisher.py b/backend/app/services/websocket_publisher.py
new file mode 100644
index 0000000..9a82b6c
--- /dev/null
+++ b/backend/app/services/websocket_publisher.py
@@ -0,0 +1,214 @@
+"""
+Synchronous WebSocket Publisher for Celery Workers
+
+This module provides a synchronous Redis publisher for broadcasting job status
+updates from Celery workers. It maintains a persistent Redis connection pool
+to avoid connection overhead per publish operation.
+"""
+import logging
+import threading
+from datetime import datetime
+from typing import Any, Optional
+
+import redis
+from pydantic import BaseModel
+
+from ..core.config import settings
+
+logger = logging.getLogger(__name__)
+
+# Channel name constants
+CHANNEL_GLOBAL = "job_status_updates"
+CHANNEL_JOB_FMT = "job_status_updates:{job_id}"
+
+
+class JobStatusUpdate(BaseModel):
+ """Schema for job status update messages"""
+ job_id: str
+ status: str
+ updated_at: datetime
+ job_title: Optional[str] = None
+ message: Optional[str] = None
+ progress: Optional[int] = None
+ metadata: Optional[dict[str, Any]] = None
+ eligible_users: Optional[set[str]] = None # Pre-computed eligible users
+
+
+class WebSocketPublisher:
+ """Synchronous Redis publisher for WebSocket updates from Celery workers"""
+
+ def __init__(self):
+ self._redis_client: Optional[redis.Redis] = None
+ self._lock = threading.Lock()
+
+ def _get_client(self) -> redis.Redis:
+ """Get or create Redis client (thread-safe)"""
+ if self._redis_client is None:
+ with self._lock:
+ if self._redis_client is None:
+ self._redis_client = redis.Redis.from_url(
+ settings.redis_url,
+ encoding="utf-8",
+ decode_responses=True
+ )
+ return self._redis_client
+
+ def publish_job_status_update(
+ self,
+ job_id: str,
+ status: str,
+ job_title: Optional[str] = None,
+ message: Optional[str] = None,
+ progress: Optional[int] = None,
+ metadata: Optional[dict[str, Any]] = None,
+ eligible_users: Optional[set[str]] = None
+ ) -> bool:
+ """
+ Publish job status update to Redis pub/sub channels
+ Returns True if successful, False otherwise
+ """
+ try:
+ update = JobStatusUpdate(
+ job_id=job_id,
+ status=status,
+ updated_at=datetime.utcnow(),
+ job_title=job_title,
+ message=message,
+ progress=progress,
+ metadata=metadata,
+ eligible_users=eligible_users
+ )
+
+ # Serialize once for efficiency
+ payload = update.model_dump_json(separators=(",", ":"))
+
+ # Use pipeline for atomic publish
+ client = self._get_client()
+ with client.pipeline() as pipe:
+ # Publish to global channel
+ pipe.publish(CHANNEL_GLOBAL, payload)
+ # Publish to job-specific channel
+ pipe.publish(CHANNEL_JOB_FMT.format(job_id=job_id), payload)
+ pipe.execute()
+
+ logger.debug(f"Published status update for job {job_id}: {status}")
+ return True
+
+ except Exception as e:
+ logger.error(f"Failed to publish job status update for {job_id}: {e}")
+ return False
+
+ def close(self):
+ """Close Redis connection"""
+ if self._redis_client:
+ with self._lock:
+ if self._redis_client:
+ self._redis_client.connection_pool.disconnect()
+ self._redis_client = None
+ logger.info("WebSocket publisher Redis connection closed")
+
+
+# Global publisher instance for Celery workers
+_publisher = WebSocketPublisher()
+
+
+def publish_job_status_update(
+ job_id: str,
+ status: str,
+ job_title: Optional[str] = None,
+ message: Optional[str] = None,
+ progress: Optional[int] = None,
+ metadata: Optional[dict[str, Any]] = None,
+ eligible_users: Optional[set[str]] = None
+) -> bool:
+ """
+ Convenience function to publish job status update
+ This is the function that Celery workers should use
+ """
+ return _publisher.publish_job_status_update(
+ job_id=job_id,
+ status=status,
+ job_title=job_title,
+ message=message,
+ progress=progress,
+ metadata=metadata,
+ eligible_users=eligible_users
+ )
+
+
+def close_publisher():
+ """Close the global publisher - call this on worker shutdown"""
+ _publisher.close()
+
+
+def get_job_eligible_users(job_id: str) -> set[str]:
+ """
+ Get eligible users for a job (synchronous version for Celery workers)
+ This should be called at publish time to avoid DB lookups in the hot path
+ """
+ eligible_users = set()
+
+ try:
+ # Import MongoDB client (synchronous)
+ from pymongo import MongoClient
+
+ # Use synchronous MongoDB client for Celery workers
+ client = MongoClient(settings.mongodb_url)
+ db = client[settings.database_name]
+
+ # Get the job
+ job = db["jobs"].find_one({"_id": job_id})
+ if not job:
+ logger.warning(f"Job {job_id} not found for eligibility check")
+ return eligible_users
+
+ # Add job creator
+ if job.get("client_id"):
+ eligible_users.add(job["client_id"])
+
+ # Add reviewers from review history
+ review = job.get("review", {})
+ if review.get("reviewer_id"):
+ eligible_users.add(review["reviewer_id"])
+
+ # Add reviewers from history
+ for history_item in review.get("history", []):
+ if history_item.get("by"):
+ eligible_users.add(history_item["by"])
+
+ # Add all admin users (they can see all jobs)
+ for admin_user in db["users"].find({"role": "admin"}):
+ user_id = str(admin_user["_id"])
+ eligible_users.add(user_id)
+
+ client.close()
+ logger.debug(f"Job {job_id} eligible users: {len(eligible_users)}")
+
+ except Exception as e:
+ logger.error(f"Error getting eligible users for job {job_id}: {e}")
+
+ return eligible_users
+
+
+def publish_job_update_with_eligibility(
+ job_id: str,
+ status: str,
+ job_title: Optional[str] = None,
+ message: Optional[str] = None,
+ progress: Optional[int] = None,
+ metadata: Optional[dict[str, Any]] = None
+) -> bool:
+ """
+ Convenience function that computes eligible users and publishes
+ This is the recommended function for Celery workers to use
+ """
+ eligible_users = get_job_eligible_users(job_id)
+ return publish_job_status_update(
+ job_id=job_id,
+ status=status,
+ job_title=job_title,
+ message=message,
+ progress=progress,
+ metadata=metadata,
+ eligible_users=eligible_users
+ )
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index 4837684..06b19d1 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -1,110 +1,64 @@
+# =============================================================================
+# Docker Compose Production Overrides
+# =============================================================================
+# Usage: docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d
+# =============================================================================
+
version: '3.8'
services:
- # MongoDB with Replica Set
+ # ---------------------------------------------------------------------------
+ # MongoDB - Production Settings
+ # ---------------------------------------------------------------------------
mongodb:
- image: mongo:7.0
- container_name: accessible-video-mongo-prod
- restart: unless-stopped
- environment:
- MONGO_INITDB_ROOT_USERNAME: ${MONGODB_ROOT_USER:-admin}
- MONGO_INITDB_ROOT_PASSWORD: ${MONGODB_ROOT_PASSWORD}
- MONGO_INITDB_DATABASE: accessible_video
- ports:
- - "27017:27017"
- volumes:
- - mongodb_data_prod:/data/db
- - ./mongo-init.js:/docker-entrypoint-initdb.d/init.js:ro
- - ./mongo-keyfile:/data/keyfile:ro
- command: ["mongod", "--replSet", "rs0", "--bind_ip_all", "--keyFile", "/data/keyfile"]
- networks:
- - app-network-prod
+ deploy:
+ resources:
+ limits:
+ memory: 4G
+ cpus: '1.0'
+ reservations:
+ memory: 2G
+ cpus: '0.5'
- # Redis
+ # ---------------------------------------------------------------------------
+ # Redis - Production Settings
+ # ---------------------------------------------------------------------------
redis:
- image: redis:7.2-alpine
- container_name: accessible-video-redis-prod
- restart: unless-stopped
- ports:
- - "6379:6379"
- volumes:
- - redis_data_prod:/data
- networks:
- - app-network-prod
+ deploy:
+ resources:
+ limits:
+ memory: 2G
+ cpus: '0.5'
+ reservations:
+ memory: 1G
+ cpus: '0.25'
- # Backend API
+ # ---------------------------------------------------------------------------
+ # API - Production Settings
+ # ---------------------------------------------------------------------------
api:
- build:
- context: ./backend
- dockerfile: Dockerfile
- target: production
- container_name: accessible-video-api-prod
- restart: unless-stopped
+ deploy:
+ resources:
+ limits:
+ memory: 4G
+ cpus: '2.0'
+ reservations:
+ memory: 2G
+ cpus: '1.0'
environment:
- - APP_ENV=production
- - MONGODB_URL=${MONGODB_URL}
- - REDIS_URL=${REDIS_URL}
- - JWT_SECRET_KEY=${JWT_SECRET_KEY}
- - JWT_REFRESH_SECRET_KEY=${JWT_REFRESH_SECRET_KEY}
- - GEMINI_API_KEY=${GEMINI_API_KEY}
- - SENDGRID_API_KEY=${SENDGRID_API_KEY}
- - ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
- - GCS_BUCKET_NAME=${GCS_BUCKET_NAME}
- - GOOGLE_CLOUD_PROJECT=${GOOGLE_CLOUD_PROJECT}
- - OTEL_SERVICE_NAME=accessible-video-api-prod
- - SENTRY_DSN=${SENTRY_DSN}
- - SENTRY_ENVIRONMENT=production
- - CORS_ORIGINS=${CORS_ORIGINS:-https://your-domain.com,https://www.your-domain.com}
- ports:
- - "8000:8000"
- depends_on:
- - mongodb
- - redis
- networks:
- - app-network-prod
- healthcheck:
- test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
- interval: 30s
- timeout: 10s
- retries: 3
+ APP_ENV: prod
- # Celery Worker
+ # ---------------------------------------------------------------------------
+ # Worker - Production Settings
+ # ---------------------------------------------------------------------------
worker:
- build:
- context: ./backend
- dockerfile: Dockerfile
- target: production
- container_name: accessible-video-worker-prod
- restart: unless-stopped
+ deploy:
+ resources:
+ limits:
+ memory: 8G
+ cpus: '4.0'
+ reservations:
+ memory: 4G
+ cpus: '2.0'
environment:
- - APP_ENV=production
- - MONGODB_URL=${MONGODB_URL}
- - REDIS_URL=${REDIS_URL}
- - CELERY_BROKER_URL=${REDIS_URL}
- - CELERY_RESULT_BACKEND=${REDIS_URL}
- - GEMINI_API_KEY=${GEMINI_API_KEY}
- - ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
- - GCS_BUCKET_NAME=${GCS_BUCKET_NAME}
- - GOOGLE_CLOUD_PROJECT=${GOOGLE_CLOUD_PROJECT}
- - OTEL_SERVICE_NAME=accessible-video-worker-prod
- - SENTRY_DSN=${SENTRY_DSN}
- - SENTRY_ENVIRONMENT=production
- depends_on:
- - mongodb
- - redis
- command: ["celery", "-A", "app.tasks", "worker", "--loglevel=info", "--concurrency=2"]
- networks:
- - app-network-prod
-
-
- # Note: Frontend will be built separately and hosted on Apache webserver
- # Build command: cd frontend && npm run build
- # Deploy the 'dist' folder contents to your Apache document root
-
-volumes:
- mongodb_data_prod:
- redis_data_prod:
-
-networks:
- app-network-prod:
- driver: bridge
\ No newline at end of file
+ APP_ENV: prod
diff --git a/docker-compose.yml b/docker-compose.yml
index 4513526..955e017 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,132 +1,210 @@
+# =============================================================================
+# Docker Compose Configuration for Accessible Video Processing Platform
+# =============================================================================
+# Services:
+# - api: FastAPI + Gunicorn REST API
+# - worker: Celery worker for background processing
+# - mongodb: MongoDB database
+# - redis: Redis for Celery broker and cache
+# =============================================================================
+
version: '3.8'
services:
- # MongoDB with Replica Set
+ # ---------------------------------------------------------------------------
+ # MongoDB Database
+ # ---------------------------------------------------------------------------
mongodb:
image: mongo:7.0
- container_name: accessible-video-mongo
+ container_name: accessible-video-mongodb
restart: unless-stopped
environment:
- MONGO_INITDB_ROOT_USERNAME: admin
- MONGO_INITDB_ROOT_PASSWORD: password123
- MONGO_INITDB_DATABASE: accessible_video
- ports:
- - "27017:27017"
+ MONGO_INITDB_DATABASE: ${MONGODB_DB:-accessible_video}
volumes:
- - mongodb_data:/data/db
- - ./mongo-init.js:/docker-entrypoint-initdb.d/init.js:ro
- - ./mongo-keyfile:/data/keyfile:ro
- command: ["mongod", "--replSet", "rs0", "--bind_ip_all", "--keyFile", "/data/keyfile"]
+ - mongodb-data:/data/db
+ - mongodb-config:/data/configdb
networks:
- - app-network
+ - accessible-video-network
+ healthcheck:
+ test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet
+ interval: 30s
+ timeout: 10s
+ retries: 3
+ start_period: 10s
+ logging:
+ driver: "json-file"
+ options:
+ max-size: "10m"
+ max-file: "3"
- # Redis
+ # ---------------------------------------------------------------------------
+ # Redis Cache and Message Broker
+ # ---------------------------------------------------------------------------
redis:
- image: redis:7.2-alpine
+ image: redis:7-alpine
container_name: accessible-video-redis
restart: unless-stopped
- ports:
- - "6379:6379"
+ command: redis-server --appendonly yes --maxmemory 2gb --maxmemory-policy allkeys-lru
volumes:
- - redis_data:/data
+ - redis-data:/data
networks:
- - app-network
+ - accessible-video-network
+ healthcheck:
+ test: ["CMD", "redis-cli", "ping"]
+ interval: 30s
+ timeout: 10s
+ retries: 3
+ start_period: 5s
+ logging:
+ driver: "json-file"
+ options:
+ max-size: "10m"
+ max-file: "3"
- # Backend API
+ # ---------------------------------------------------------------------------
+ # FastAPI Backend API
+ # ---------------------------------------------------------------------------
api:
build:
context: ./backend
dockerfile: Dockerfile
- target: development
+ target: api
container_name: accessible-video-api
restart: unless-stopped
- environment:
- - APP_ENV=dev
- - MONGODB_URL=mongodb://admin:password123@mongodb:27017/accessible_video?authSource=admin&replicaSet=rs0
- - REDIS_URL=redis://redis:6379/0
- - JWT_SECRET_KEY=dev-secret-key-change-in-production
- - JWT_REFRESH_SECRET_KEY=dev-refresh-secret-key-change-in-production
- - GEMINI_API_KEY=${GEMINI_API_KEY}
- - SENDGRID_API_KEY=${SENDGRID_API_KEY}
- - ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
- - GCS_BUCKET_NAME=accessible-video-dev
- - GOOGLE_CLOUD_PROJECT=${GOOGLE_CLOUD_PROJECT}
- - OTEL_SERVICE_NAME=accessible-video-api-dev
- - OTEL_TRACES_EXPORTER=console
- - OTEL_METRICS_EXPORTER=prometheus
- - SENTRY_DSN=${SENTRY_DSN}
- - SENTRY_ENVIRONMENT=development
+ depends_on:
+ mongodb:
+ condition: service_healthy
+ redis:
+ condition: service_healthy
ports:
- "8000:8000"
- volumes:
- - ./backend:/app
- - /app/.venv # Keep venv in container
- depends_on:
- - mongodb
- - redis
- networks:
- - app-network
+ environment:
+ # App configuration
+ APP_ENV: ${APP_ENV:-dev}
+ API_BASE_URL: ${API_BASE_URL:-http://localhost:8000}
- # Celery Worker
+ # Auth
+ JWT_SECRET: ${JWT_SECRET}
+ JWT_ALG: ${JWT_ALG:-HS256}
+ JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240}
+ JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7}
+ COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions}
+ COOKIE_SECURE: ${COOKIE_SECURE:-true}
+ COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax}
+
+ # Database
+ MONGODB_URI: mongodb://mongodb:27017/${MONGODB_DB:-accessible_video}
+ MONGODB_DB: ${MONGODB_DB:-accessible_video}
+
+ # Redis
+ REDIS_URL: redis://redis:6379/0
+ CELERY_BROKER_URL: redis://redis:6379/0
+ CELERY_RESULT_BACKEND: redis://redis:6379/0
+
+ # GCP
+ GCP_PROJECT_ID: ${GCP_PROJECT_ID}
+ GCS_BUCKET: ${GCS_BUCKET:-accessible-video}
+ GOOGLE_APPLICATION_CREDENTIALS: /secrets/gcp-credentials.json
+
+ # AI Services
+ GEMINI_API_KEY: ${GEMINI_API_KEY}
+ TRANSLATE_API_KEY: ${TRANSLATE_API_KEY:-}
+ ELEVENLABS_API_KEY: ${ELEVENLABS_API_KEY:-}
+
+ # Email
+ SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
+ EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions}
+ CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility}
+
+ # Observability
+ SENTRY_DSN: ${SENTRY_DSN:-}
+ OTEL_EXPORTER_OTLP_ENDPOINT: ${OTEL_EXPORTER_OTLP_ENDPOINT:-}
+ volumes:
+ - ./secrets:/secrets:ro
+ - api-logs:/app/logs
+ networks:
+ - accessible-video-network
+ logging:
+ driver: "json-file"
+ options:
+ max-size: "10m"
+ max-file: "3"
+
+ # ---------------------------------------------------------------------------
+ # Celery Worker for Background Processing
+ # ---------------------------------------------------------------------------
worker:
build:
context: ./backend
dockerfile: Dockerfile
- target: development
+ target: worker
container_name: accessible-video-worker
restart: unless-stopped
- environment:
- - APP_ENV=dev
- - MONGODB_URL=mongodb://admin:password123@mongodb:27017/accessible_video?authSource=admin&replicaSet=rs0
- - REDIS_URL=redis://redis:6379/0
- - CELERY_BROKER_URL=redis://redis:6379/0
- - CELERY_RESULT_BACKEND=redis://redis:6379/0
- - GEMINI_API_KEY=${GEMINI_API_KEY}
- - ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
- - GCS_BUCKET_NAME=accessible-video-dev
- - GOOGLE_CLOUD_PROJECT=${GOOGLE_CLOUD_PROJECT}
- - OTEL_SERVICE_NAME=accessible-video-worker-dev
- - OTEL_TRACES_EXPORTER=console
- - OTEL_METRICS_EXPORTER=prometheus
- - SENTRY_DSN=${SENTRY_DSN}
- - SENTRY_ENVIRONMENT=development
- - C_FORCE_ROOT=1
- volumes:
- - ./backend:/app
- - /app/.venv # Keep venv in container
depends_on:
- - mongodb
- - redis
- command: ["celery", "-A", "app.tasks", "worker", "--loglevel=info", "--concurrency=1"]
- networks:
- - app-network
-
-
- # Frontend (for local development)
- frontend:
- build:
- context: ./frontend
- dockerfile: Dockerfile
- container_name: accessible-video-frontend
- restart: unless-stopped
+ mongodb:
+ condition: service_healthy
+ redis:
+ condition: service_healthy
environment:
- - VITE_API_URL=http://localhost:8000
- - VITE_SENTRY_DSN=${VITE_SENTRY_DSN}
- - VITE_ENVIRONMENT=development
- ports:
- - "5173:5173"
+ # App configuration
+ APP_ENV: ${APP_ENV:-dev}
+
+ # Database
+ MONGODB_URI: mongodb://mongodb:27017/${MONGODB_DB:-accessible_video}
+ MONGODB_DB: ${MONGODB_DB:-accessible_video}
+
+ # Redis
+ REDIS_URL: redis://redis:6379/0
+ CELERY_BROKER_URL: redis://redis:6379/0
+ CELERY_RESULT_BACKEND: redis://redis:6379/0
+
+ # GCP
+ GCP_PROJECT_ID: ${GCP_PROJECT_ID}
+ GCS_BUCKET: ${GCS_BUCKET:-accessible-video}
+ GOOGLE_APPLICATION_CREDENTIALS: /secrets/gcp-credentials.json
+
+ # AI Services
+ GEMINI_API_KEY: ${GEMINI_API_KEY}
+ TRANSLATE_API_KEY: ${TRANSLATE_API_KEY:-}
+ ELEVENLABS_API_KEY: ${ELEVENLABS_API_KEY:-}
+
+ # Email
+ SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
+ EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions}
+ CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility}
+
+ # Observability
+ SENTRY_DSN: ${SENTRY_DSN:-}
volumes:
- - ./frontend:/app
- - /app/node_modules # Keep node_modules in container
- depends_on:
- - api
+ - ./secrets:/secrets:ro
+ - worker-logs:/app/logs
networks:
- - app-network
-
-volumes:
- mongodb_data:
- redis_data:
+ - accessible-video-network
+ logging:
+ driver: "json-file"
+ options:
+ max-size: "10m"
+ max-file: "3"
+# =============================================================================
+# Networks
+# =============================================================================
networks:
- app-network:
- driver: bridge
\ No newline at end of file
+ accessible-video-network:
+ driver: bridge
+ name: accessible-video-network
+
+# =============================================================================
+# Volumes
+# =============================================================================
+volumes:
+ mongodb-data:
+ name: accessible-video-mongodb-data
+ mongodb-config:
+ name: accessible-video-mongodb-config
+ redis-data:
+ name: accessible-video-redis-data
+ api-logs:
+ name: accessible-video-api-logs
+ worker-logs:
+ name: accessible-video-worker-logs
diff --git a/docker-compose.yml.old b/docker-compose.yml.old
new file mode 100644
index 0000000..4513526
--- /dev/null
+++ b/docker-compose.yml.old
@@ -0,0 +1,132 @@
+version: '3.8'
+
+services:
+ # MongoDB with Replica Set
+ mongodb:
+ image: mongo:7.0
+ container_name: accessible-video-mongo
+ restart: unless-stopped
+ environment:
+ MONGO_INITDB_ROOT_USERNAME: admin
+ MONGO_INITDB_ROOT_PASSWORD: password123
+ MONGO_INITDB_DATABASE: accessible_video
+ ports:
+ - "27017:27017"
+ volumes:
+ - mongodb_data:/data/db
+ - ./mongo-init.js:/docker-entrypoint-initdb.d/init.js:ro
+ - ./mongo-keyfile:/data/keyfile:ro
+ command: ["mongod", "--replSet", "rs0", "--bind_ip_all", "--keyFile", "/data/keyfile"]
+ networks:
+ - app-network
+
+ # Redis
+ redis:
+ image: redis:7.2-alpine
+ container_name: accessible-video-redis
+ restart: unless-stopped
+ ports:
+ - "6379:6379"
+ volumes:
+ - redis_data:/data
+ networks:
+ - app-network
+
+ # Backend API
+ api:
+ build:
+ context: ./backend
+ dockerfile: Dockerfile
+ target: development
+ container_name: accessible-video-api
+ restart: unless-stopped
+ environment:
+ - APP_ENV=dev
+ - MONGODB_URL=mongodb://admin:password123@mongodb:27017/accessible_video?authSource=admin&replicaSet=rs0
+ - REDIS_URL=redis://redis:6379/0
+ - JWT_SECRET_KEY=dev-secret-key-change-in-production
+ - JWT_REFRESH_SECRET_KEY=dev-refresh-secret-key-change-in-production
+ - GEMINI_API_KEY=${GEMINI_API_KEY}
+ - SENDGRID_API_KEY=${SENDGRID_API_KEY}
+ - ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
+ - GCS_BUCKET_NAME=accessible-video-dev
+ - GOOGLE_CLOUD_PROJECT=${GOOGLE_CLOUD_PROJECT}
+ - OTEL_SERVICE_NAME=accessible-video-api-dev
+ - OTEL_TRACES_EXPORTER=console
+ - OTEL_METRICS_EXPORTER=prometheus
+ - SENTRY_DSN=${SENTRY_DSN}
+ - SENTRY_ENVIRONMENT=development
+ ports:
+ - "8000:8000"
+ volumes:
+ - ./backend:/app
+ - /app/.venv # Keep venv in container
+ depends_on:
+ - mongodb
+ - redis
+ networks:
+ - app-network
+
+ # Celery Worker
+ worker:
+ build:
+ context: ./backend
+ dockerfile: Dockerfile
+ target: development
+ container_name: accessible-video-worker
+ restart: unless-stopped
+ environment:
+ - APP_ENV=dev
+ - MONGODB_URL=mongodb://admin:password123@mongodb:27017/accessible_video?authSource=admin&replicaSet=rs0
+ - REDIS_URL=redis://redis:6379/0
+ - CELERY_BROKER_URL=redis://redis:6379/0
+ - CELERY_RESULT_BACKEND=redis://redis:6379/0
+ - GEMINI_API_KEY=${GEMINI_API_KEY}
+ - ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
+ - GCS_BUCKET_NAME=accessible-video-dev
+ - GOOGLE_CLOUD_PROJECT=${GOOGLE_CLOUD_PROJECT}
+ - OTEL_SERVICE_NAME=accessible-video-worker-dev
+ - OTEL_TRACES_EXPORTER=console
+ - OTEL_METRICS_EXPORTER=prometheus
+ - SENTRY_DSN=${SENTRY_DSN}
+ - SENTRY_ENVIRONMENT=development
+ - C_FORCE_ROOT=1
+ volumes:
+ - ./backend:/app
+ - /app/.venv # Keep venv in container
+ depends_on:
+ - mongodb
+ - redis
+ command: ["celery", "-A", "app.tasks", "worker", "--loglevel=info", "--concurrency=1"]
+ networks:
+ - app-network
+
+
+ # Frontend (for local development)
+ frontend:
+ build:
+ context: ./frontend
+ dockerfile: Dockerfile
+ container_name: accessible-video-frontend
+ restart: unless-stopped
+ environment:
+ - VITE_API_URL=http://localhost:8000
+ - VITE_SENTRY_DSN=${VITE_SENTRY_DSN}
+ - VITE_ENVIRONMENT=development
+ ports:
+ - "5173:5173"
+ volumes:
+ - ./frontend:/app
+ - /app/node_modules # Keep node_modules in container
+ depends_on:
+ - api
+ networks:
+ - app-network
+
+volumes:
+ mongodb_data:
+ redis_data:
+
+networks:
+ app-network:
+ driver: bridge
\ No newline at end of file
diff --git a/docs/prompt_closed_captions.md b/docs/prompt_closed_captions.md
new file mode 100644
index 0000000..4096b93
--- /dev/null
+++ b/docs/prompt_closed_captions.md
@@ -0,0 +1,100 @@
+This is a comprehensive AI prompt created by converting the DCMP closed captioning guidelines into a set of actionable instructions.
+
+This prompt is designed to be given to an AI model along with a raw transcript of a video. It instructs the AI on how to format the text, add non-speech elements, and adhere to accessibility best practices.
+
+These rules significantly enhance the quality and accessibility of the captions by focusing on grammatical integrity, speaker context, and emotional tone.
+
+---
+# AI Prompt for Generating and Verifying Accessible Closed Captions (Broadcast Standard)
+
+**Your Role:** You are an expert, end-to-end AI Closed Captioning Engine. Your function is to analyze, create, and quality-control professional, accessible WEBVTT caption files to a broadcast-ready standard.
+
+**Primary Goal:** To autonomously produce a single, production-ready, and error-free WEBVTT file that is perfectly synchronized with the provided video. The final output must be so accurate and well-formatted that it requires no human intervention.
+
+---
+
+## Your Workflow: A Three-Step Process
+
+You must execute the following three steps internally for every task:
+
+### Step 1: Comprehensive Analysis
+* First, thoroughly analyze the video's audio and visual content.
+* Identify all spoken dialogue, distinguish between different speakers, and note their tone, dialect, and any regional accents.
+* Listen for and identify all non-speech audio cues essential for a deaf or hard-of-hearing viewer, including music, sound effects, and significant silences.
+
+### Step 2: Creation & Synchronization
+* Based on your analysis, generate the caption text according to the **Core Captioning Instructions & Rules** listed below.
+* Meticulously synchronize each caption cue with the audio timeline. Timestamps must be precise, marking the exact start and end of each audio event.
+
+### Step 3: Final Quality Control (QC) Verification
+* **Before finalizing your output, you must perform a rigorous self-check.** Review your generated WEBVTT file against the following critical QC checklist. If any point fails, you must correct it before presenting the final result.
+
+ * **QC Checklist:**
+ * **Format:** Is the file in valid WEBVTT format? Is the `WEBVTT` header present? Are timestamps in the exact `HH:MM:SS.mmm --> HH:MM:SS.mmm` format? Are blank lines correctly separating each cue?
+ * **Synchronization:** Do captions appear and disappear in perfect sync with the audio?
+ * **Spelling & Capitalization:** Is all spelling correct according to **Merriam-Webster Online**? Is capitalization used consistently and only for screaming (not emphasis)?
+ * **Speaker IDs:** Is the speaker ID (`NARRATOR:`) used only on the *first* caption of a continuous block of speech and correctly re-introduced after any interruption?
+ * **Language & Dialect:** Are foreign words captioned verbatim (not translated)? Are accents and dialects preserved correctly?
+ * **Music & Lyrics:** Are music descriptions objective? Is the `βͺ...βͺ` and `βͺ...βͺβͺ` format used correctly for lyrics?
+ * **Completeness:** Have all meaningful audio cues been captured?
+
+---
+
+## Core Captioning Instructions & Rules (For Step 2)
+
+### 1. Output Format
+* The output must be a single, complete **WEBVTT (.vtt) file**.
+* The file must start with the header `WEBVTT` on the first line, followed by a blank line.
+* Each caption cue consists of a timestamp line followed by the caption text, separated by a blank line.
+* **Do not** include any sequential numbers (e.g., `1`, `2`) in the output.
+
+### 2. Spelling & Capitalization
+* **Primary Source:** Use **Merriam-Webster Online** for all spelling and capitalization.
+* **Consistency:** Ensure consistent spelling of all words and names throughout the file.
+* **Emphasis:** Do not use all caps for emphasis. Reserve ALL CAPS for indicating **screaming or shouting**.
+
+### 3. Language, Dialect, and Accents
+* **Foreign Language:** Caption foreign words verbatim using correct accent marks and diacriticals (e.g., rΓ©sumΓ©, piΓ±ata). If the words are unintelligible, use a description (e.g., `[speaking French]`). **Never translate foreign speech into English.**
+* **Dialect:** Keep the flavor of the speaker's language (e.g., caption "gonna," "ain't," etc., as spoken).
+* **Accents:** If a speaker has a distinct regional accent, indicate it at the beginning of their first caption (e.g., `[with a Southern accent] My goodness.`).
+
+### 4. Speaker Identification
+* **Format:** Identify speakers with a label in **ALL CAPS**, followed by a colon (e.g., `NARRATOR:`).
+* **Redundancy:** For a continuous block of speech from the same speaker, **only use the speaker ID on the first caption of that block.** Do not repeat the ID for subsequent captions by that same person. If another sound or speaker interrupts, re-introduce the ID when they resume.
+
+### 5. Sound Effects, Music, and Lyrics
+* **Sound Effects:** Describe meaningful sounds in `[lowercase letters]`.
+* **Music Mood:** Use **objective** descriptions for music (e.g., "tense," "somber," "upbeat"). Avoid subjective words like "beautiful" or "delightful."
+* **Lyrics:**
+ * Caption lyrics verbatim.
+ * Use one music icon at the **beginning and end** of each caption line within a song (e.g., `βͺ I can see clearly now βͺ`).
+ * Use two music icons at the end of the **last line** of a song (e.g., `βͺ the rain is gone βͺβͺ`).
+* **Background Music:** For non-essential background music, place a single music icon (βͺ) in the upper right corner using VTT positioning (`line:0 position:90% align:end`).
+
+---
+
+## Example Scenario
+
+**Input:** A video clip where a character named Maria speaks continuously.
+
+**Correct WEBVTT Output:**
+
+```vtt
+WEBVTT
+
+00:00:21.500 --> 00:00:24.000
+MARIA: This is the first part
+of my statement.
+
+00:00:24.500 --> 00:06.100
+I will continue speaking now
+without being interrupted.
+
+00:00:26.500 --> 00:27.300
+[phone rings]
+
+00:00:28.100 --> 00:30.250
+MARIA: As I was saying,
+it's important to be clear.
+
+Now, apply this entire three-step analysis, creation, and verification process to the provided video. The final output must be a single, verified WEBVTT file.
\ No newline at end of file
diff --git a/docs/video_accessibility_spec.md b/docs/video_accessibility_spec.md
new file mode 100644
index 0000000..d75b803
--- /dev/null
+++ b/docs/video_accessibility_spec.md
@@ -0,0 +1,792 @@
+# Video Accessibility Processing Platform - Software Specification
+
+## 1. Executive Summary
+
+The Video Accessibility Processing Platform is a comprehensive web application designed to automatically generate closed captions and audio descriptions for video content using artificial intelligence. The platform provides a complete workflow from video upload through AI processing, human quality control, multi-language translation, and final content delivery.
+
+**Core Capabilities:**
+- Automated generation of closed captions and audio descriptions using Google Gemini 2.5 Pro
+- Multi-language translation and transcreation services
+- Professional quality control workflow for reviewers
+- Text-to-speech generation for audio descriptions
+- Role-based access control for clients, reviewers, and administrators
+- Real-time job status updates via WebSocket connections
+- Secure file storage and signed URL download system
+
+**Target Users:**
+- **Clients**: Organizations needing video accessibility services
+- **Reviewers**: Professional accessibility specialists who review and approve content
+- **Administrators**: System administrators managing users and system operations
+
+## 2. System Architecture
+
+### 2.1 Technology Stack
+
+**Frontend:**
+- React 18 with TypeScript
+- Vite for build tooling
+- TanStack Query for state management
+- React Router for navigation
+- Tailwind CSS for styling
+
+**Backend:**
+- FastAPI (Python 3.11+) for REST API
+- Celery with Redis for background task processing
+- MongoDB Atlas for data storage
+- JWT authentication with HttpOnly refresh cookies
+
+**External Services:**
+- Google Cloud Storage for file storage
+- Google Gemini 2.5 Pro for AI processing
+- Google Cloud Translate for language translation
+- ElevenLabs for text-to-speech synthesis
+
+**Infrastructure:**
+- Docker containerization
+- Redis for caching and task queues
+- WebSocket support for real-time updates
+
+### 2.2 System Components
+
+```
+βββββββββββββββββββ βββββββββββββββββββ βββββββββββββββββββ
+β React SPA β β FastAPI β β Celery β
+β Frontend βββββΊβ Backend βββββΊβ Workers β
+βββββββββββββββββββ βββββββββββββββββββ βββββββββββββββββββ
+ β β
+ βΌ βΌ
+ βββββββββββββββββββ βββββββββββββββββββ
+ β MongoDB β β Redis β
+ β Database β β Queue/Cache β
+ βββββββββββββββββββ βββββββββββββββββββ
+ β
+ βΌ
+ βββββββββββββββββββ
+ β Google Cloud β
+ β Storage β
+ βββββββββββββββββββ
+```
+
+## 3. User Roles and Access Control
+
+### 3.1 Role Definitions
+
+**Client Role:**
+- Upload videos and create processing jobs
+- View own job status and progress
+- Download completed accessibility assets
+- Limited to own content only
+
+**Reviewer Role:**
+- Access quality control dashboard
+- Review AI-generated content for accuracy
+- Edit VTT files (captions and audio descriptions)
+- Approve or reject English content
+- Perform final review of completed jobs
+- Access to all jobs in system
+
+**Admin Role:**
+- Full system access including all reviewer capabilities
+- User management (create, edit, deactivate users)
+- System monitoring and health checks
+- Bulk operations and maintenance tasks
+- Access to audit logs and system statistics
+
+### 3.2 Authentication System
+
+**JWT Token Management:**
+- Access tokens stored in memory (15-minute expiry)
+- Refresh tokens stored in HttpOnly cookies (7-day expiry)
+- Automatic token refresh for active sessions
+- Secure logout with cookie clearing
+
+**Security Features:**
+- Password hashing using bcrypt
+- CORS protection with configurable origins
+- Rate limiting on authentication endpoints
+- Session-based security with proper token rotation
+
+## 4. Job Processing Workflow
+
+### 4.1 Job Status State Machine
+
+The system implements a comprehensive state machine for tracking job progress:
+
+```
+created β ingesting β ai_processing β pending_qc β approved_english β translating β tts_generating β pending_final_review β completed
+ β
+ rejected β (manual intervention required)
+ β
+ qc_feedback β (back to pending_qc after fixes)
+```
+
+**Status Definitions:**
+
+- **created**: Job record created, video uploaded to storage
+- **ingesting**: Video being processed for metadata extraction
+- **ai_processing**: AI analyzing video content and generating captions/audio descriptions
+- **pending_qc**: Awaiting human quality control review
+- **approved_english**: English content approved, ready for translation
+- **rejected**: Content rejected, requires client revision
+- **qc_feedback**: Reviewer provided feedback, awaiting fixes
+- **translating**: Processing multi-language translations
+- **tts_generating**: Generating audio files from text descriptions
+- **pending_final_review**: All content ready, awaiting final approval
+- **completed**: Job finished, all assets available for download
+
+### 4.2 Processing Pipeline
+
+**Phase 1: Upload and Ingestion**
+1. Client uploads MP4 video file through web interface
+2. File stored in Google Cloud Storage with unique job ID path
+3. Job record created in MongoDB with metadata
+4. Background Celery task queued for processing
+
+**Phase 2: AI Content Generation**
+1. Video file sent to Google Gemini 2.5 Pro API
+2. AI generates:
+ - Plain text transcript
+ - Closed captions in WebVTT format
+ - Audio description script in WebVTT format
+ - Confidence score for generated content
+3. Generated content stored in GCS and linked to job
+4. Job status updated to `pending_qc`
+
+**Phase 3: Quality Control Review**
+1. Reviewer accesses job through QC dashboard
+2. Side-by-side video player with generated captions/audio descriptions
+3. Inline VTT editor for making corrections
+4. Timing adjustment tools for synchronization
+5. Approve or reject with reviewer notes
+6. If approved, job moves to translation phase
+
+**Phase 4: Translation and Localization**
+1. Automatic translation of approved English content
+2. Support for standard translation and cultural transcreation
+3. Available target languages: Spanish, French, German (expandable)
+4. Translated VTT files stored per language
+
+**Phase 5: Audio Generation**
+1. Text-to-speech synthesis using ElevenLabs API
+2. MP3 files generated for each audio description track
+3. Language-specific voice selection
+4. Audio files stored alongside VTT content
+
+**Phase 6: Final Review and Delivery**
+1. Final review by authorized reviewer
+2. Asset validation to ensure all requested outputs present
+3. Client notification of job completion
+4. Signed URL generation for secure downloads
+
+## 5. User Interface and Experience
+
+### 5.1 Client Workflow
+
+**Dashboard:**
+- Overview of all jobs with status indicators
+- Quick actions for creating new jobs
+- Real-time status updates via WebSocket
+- Notification system for job completion
+
+**Job Creation Process:**
+1. **Video Upload**: Drag-and-drop interface with progress tracking
+2. **Job Configuration**:
+ - Descriptive title
+ - Source language selection
+ - Output format selection (captions VTT, audio description VTT, audio MP3)
+ - Target languages for translation
+3. **Processing Initiation**: Automatic background processing begins
+4. **Confirmation**: Success page with job tracking link
+
+**Job Monitoring:**
+- Detailed status view with progress indicators
+- Processing history timeline
+- Real-time updates without page refresh
+- Error notifications with context
+
+**Content Download:**
+- Secure download links for completed assets
+- Organized by language (en/, es/, fr/, de/)
+- File format options (VTT, MP3)
+- Source video access
+
+### 5.2 Reviewer Workflow
+
+**Quality Control Dashboard:**
+- Queue view of jobs pending review
+- Priority sorting by creation date
+- Job metadata preview
+- Quick status filtering
+
+**Review Interface:**
+- **Video Player**: HTML5 player with custom controls
+- **VTT Editor**: Syntax-highlighted editor with validation
+- **Side-by-Side View**: Simultaneous video and text editing
+- **Timing Tools**: Bulk timing adjustment with offset controls
+- **Review Controls**: Approve/reject with mandatory notes
+
+**Advanced Features:**
+- Keyboard shortcuts for efficient workflow (A=Approve, R=Reject, S=Save)
+- View mode switching (side-by-side, video-only, editor-only)
+- Real-time VTT validation and error highlighting
+- Unsaved changes warnings
+
+**Final Review Process:**
+- Asset validation before completion
+- Final quality checks
+- Client notification triggering
+- Completion workflow
+
+### 5.3 Administrator Interface
+
+**User Management:**
+- Create users with role assignment
+- Password reset functionality
+- User activation/deactivation
+- Role-based permission enforcement
+
+**System Monitoring:**
+- Health check dashboard with component status
+- Job processing statistics and metrics
+- Queue monitoring for background tasks
+- Performance analytics
+
+**Audit and Security:**
+- Comprehensive audit logging
+- Security event monitoring
+- User activity tracking
+- System maintenance tools
+
+## 6. Data Models and Storage
+
+### 6.1 Job Data Structure
+
+```typescript
+interface Job {
+ id: string; // Unique job identifier
+ client_id: string; // Owner client ID
+ title: string; // Human-readable job name
+ status: JobStatus; // Current processing status
+
+ source: {
+ filename: string; // Storage path
+ original_filename: string; // User's original filename
+ gcs_uri: string; // Google Cloud Storage URI
+ duration_s: number; // Video duration in seconds
+ language: string; // Source language code
+ };
+
+ requested_outputs: {
+ captions_vtt: boolean; // Closed captions requested
+ audio_description_vtt: boolean; // Audio description script requested
+ audio_description_mp3: boolean; // Audio voiceover requested
+ languages: string[]; // Target languages
+ transcreation: string[]; // Languages requiring cultural adaptation
+ };
+
+ outputs: {
+ [language: string]: {
+ captions_vtt_gcs?: string; // VTT file location
+ ad_vtt_gcs?: string; // Audio description VTT location
+ ad_mp3_gcs?: string; // Audio MP3 file location
+ origin: "translate" | "transcreate"; // Processing method
+ qa_notes?: string; // Quality assurance notes
+ };
+ };
+
+ ai: {
+ ingestion_json: object; // Full AI response data
+ confidence: number; // AI confidence score (0-1)
+ };
+
+ review: {
+ notes: string; // Current reviewer notes
+ reviewer_id?: string; // Last reviewer ID
+ history: ReviewHistoryItem[]; // Complete review history
+ };
+
+ created_at: Date;
+ updated_at: Date;
+ error?: ErrorInfo; // Processing error details
+}
+```
+
+### 6.2 User Data Structure
+
+```typescript
+interface User {
+ id: string;
+ email: string; // Unique login identifier
+ hashed_password: string; // Bcrypt hashed password
+ full_name: string; // Display name
+ role: "client" | "reviewer" | "admin";
+ is_active: boolean; // Account status
+ created_at: Date;
+ updated_at: Date;
+}
+```
+
+### 6.3 File Storage Organization
+
+**Google Cloud Storage Bucket Structure:**
+```
+gs://accessible-video/
+βββ {jobId}/
+β βββ source.mp4 # Original video
+β βββ en/
+β β βββ captions.vtt # English captions
+β β βββ ad.vtt # English audio description
+β β βββ ad.mp3 # English audio file
+β βββ es/
+β β βββ captions.vtt # Spanish captions
+β β βββ ad.vtt # Spanish audio description
+β β βββ ad.mp3 # Spanish audio file
+β βββ [other languages]/
+βββ health_check_dummy # System health verification
+```
+
+**Security Features:**
+- Signed URLs with 24-hour expiration
+- Role-based access control
+- Automatic cleanup on job deletion
+- Secure upload with content-type validation
+
+## 7. API Design
+
+### 7.1 Authentication Endpoints
+
+```
+POST /api/v1/auth/login
+POST /api/v1/auth/refresh
+POST /api/v1/auth/logout
+```
+
+### 7.2 Job Management Endpoints
+
+```
+POST /api/v1/jobs # Create new job
+GET /api/v1/jobs # List jobs (filtered by role)
+GET /api/v1/jobs/{id} # Get job details
+DELETE /api/v1/jobs/{id} # Delete job
+DELETE /api/v1/jobs/bulk # Bulk delete (admin only)
+
+# Job Actions
+POST /api/v1/jobs/{id}/actions/approve_english
+POST /api/v1/jobs/{id}/actions/reject
+POST /api/v1/jobs/{id}/actions/complete
+POST /api/v1/jobs/{id}/actions/reject_final
+
+# Content Management
+GET /api/v1/jobs/{id}/vtt # Get VTT content
+PATCH /api/v1/jobs/{id}/vtt # Update VTT content
+POST /api/v1/jobs/{id}/vtt/adjust-timing # Adjust timing
+GET /api/v1/jobs/{id}/downloads # Get download URLs
+GET /api/v1/jobs/{id}/validate # Validate assets
+```
+
+### 7.3 Administrative Endpoints
+
+```
+# User Management
+GET /api/v1/admin/users
+POST /api/v1/admin/users
+GET /api/v1/admin/users/{id}
+PATCH /api/v1/admin/users/{id}
+DELETE /api/v1/admin/users/{id}
+
+# System Monitoring
+GET /api/v1/admin/stats
+GET /api/v1/admin/health/detailed
+GET /api/v1/admin/jobs/stats
+GET /api/v1/admin/audit-logs
+```
+
+### 7.4 File Management
+
+```
+GET /api/v1/files/signed-url/{path} # Generate signed download URL
+POST /api/v1/files/upload # Direct file upload endpoint
+```
+
+### 7.5 Real-time Updates
+
+**WebSocket Endpoints:**
+- `/ws/jobs` - General job status updates
+- `/ws/jobs/{job_id}` - Job-specific status updates
+
+**WebSocket Message Format:**
+```json
+{
+ "job_id": "string",
+ "status": "string",
+ "updated_at": "ISO8601",
+ "job_title": "string",
+ "message": "string",
+ "progress": "number"
+}
+```
+
+## 8. AI Services Integration
+
+### 8.1 Google Gemini 2.5 Pro Integration
+
+**Content Generation Capabilities:**
+- Video content analysis and understanding
+- Automatic transcript generation
+- Closed caption creation with proper timing
+- Audio description generation for visual elements
+- Content confidence scoring
+
+**Processing Flow:**
+1. Video upload to Gemini Files API
+2. Content generation using multimodal prompt
+3. Structured JSON response parsing
+4. Error handling and self-healing for invalid responses
+5. Automatic file cleanup after processing
+
+**Quality Assurance:**
+- VTT format validation
+- Timestamp accuracy verification
+- Content completeness checks
+- Fallback content generation for missing elements
+
+### 8.2 Translation Services
+
+**Google Cloud Translate:**
+- High-quality machine translation for standard content
+- Support for multiple target languages
+- VTT format preservation during translation
+- Batch processing for efficiency
+
+**Transcreation via Gemini:**
+- Cultural adaptation for marketing content
+- Context-aware translation with brand guidelines
+- Maintained timing synchronization
+- Creative adaptation while preserving meaning
+
+### 8.3 Text-to-Speech Integration
+
+**ElevenLabs TTS Service:**
+- High-quality voice synthesis
+- Language-specific voice selection
+- MP3 output format
+- Proper pronunciation for accessibility terms
+
+**Audio Processing:**
+- Per-cue synthesis for precise timing
+- Audio quality optimization
+- File format standardization
+- Integration with VTT timing
+
+## 9. Quality Control Features
+
+### 9.1 Review Workflow
+
+**Content Review Process:**
+1. **Initial Review**: AI-generated content assessment
+2. **Content Editing**: Direct VTT file modification
+3. **Synchronization Check**: Video timing validation
+4. **Quality Verification**: Accessibility standards compliance
+5. **Final Approval**: Content ready for translation
+
+**Review Tools:**
+- Integrated video player with caption overlay
+- Syntax-highlighted VTT editor
+- Real-time content validation
+- Timing adjustment utilities
+- Review history tracking
+
+### 9.2 Quality Metrics
+
+**AI Confidence Scoring:**
+- Content generation confidence (0-100%)
+- Quality indicators for reviewer guidance
+- Threshold-based workflow routing
+
+**Review Analytics:**
+- Processing time tracking
+- Reviewer performance metrics
+- Quality score trending
+- Error rate monitoring
+
+## 10. Security and Compliance
+
+### 10.1 Data Security
+
+**Authentication Security:**
+- JWT token-based authentication
+- HttpOnly cookie refresh tokens
+- Automatic token rotation
+- Secure password hashing (bcrypt)
+
+**File Security:**
+- Signed URL access control
+- Time-limited download permissions
+- Secure file upload validation
+- Automatic cleanup procedures
+
+**API Security:**
+- CORS protection
+- Rate limiting
+- Input validation and sanitization
+- SQL injection prevention (NoSQL)
+
+### 10.2 Privacy Protection
+
+**Data Handling:**
+- Client data isolation
+- Role-based access enforcement
+- Audit trail maintenance
+- Secure data deletion
+
+**Content Protection:**
+- Temporary file processing
+- Secure cloud storage
+- Access logging
+- Data retention policies
+
+### 10.3 Audit and Compliance
+
+**Audit Logging:**
+- User action tracking
+- System event logging
+- Security event monitoring
+- Performance metric collection
+
+**Compliance Features:**
+- Data export capabilities
+- User consent management
+- Access control documentation
+- Security incident tracking
+
+## 11. Performance and Scalability
+
+### 11.1 System Performance
+
+**Backend Performance:**
+- Async request handling with FastAPI
+- Background task processing via Celery
+- Database query optimization
+- Caching strategy with Redis
+
+**Frontend Performance:**
+- React Query for data caching
+- Lazy loading of components
+- Optimized bundle splitting
+- Progressive web app features
+
+### 11.2 Scalability Architecture
+
+**Horizontal Scaling:**
+- Stateless API servers
+- Independent worker processes
+- Load balancing ready
+- Database connection pooling
+
+**Resource Optimization:**
+- File compression and optimization
+- CDN integration ready
+- Memory-efficient processing
+- Garbage collection optimization
+
+### 11.3 Monitoring and Observability
+
+**Health Monitoring:**
+- Component health checks
+- Service dependency monitoring
+- Performance metric collection
+- Error rate tracking
+
+**Logging and Debugging:**
+- Structured logging with correlation IDs
+- Error tracking and alerting
+- Performance profiling
+- Debug mode capabilities
+
+## 12. Deployment and Infrastructure
+
+### 12.1 Containerization
+
+**Docker Configuration:**
+- Multi-stage builds for optimization
+- Health check integration
+- Environment-based configuration
+- Security-hardened images
+
+### 12.2 Environment Configuration
+
+**Development Environment:**
+- Local Docker Compose setup
+- Hot-reload development servers
+- Test database seeding
+- Mock external services
+
+**Production Environment:**
+- Cloud-native deployment
+- SSL/TLS termination
+- Environment variable management
+- Secret management integration
+
+### 12.3 Database Management
+
+**MongoDB Configuration:**
+- Document schema validation
+- Index optimization
+- Replica set support
+- Backup and recovery procedures
+
+**Migration System:**
+- Schema version tracking
+- Safe migration procedures
+- Rollback capabilities
+- Data integrity validation
+
+## 13. Testing Strategy
+
+### 13.1 Testing Levels
+
+**Unit Testing:**
+- Service layer testing
+- Utility function testing
+- Component testing
+- Mock external dependencies
+
+**Integration Testing:**
+- API endpoint testing
+- Database integration testing
+- File storage integration
+- Authentication flow testing
+
+**End-to-End Testing:**
+- Complete user workflow testing
+- Cross-browser compatibility
+- Mobile responsiveness
+- Performance testing
+
+### 13.2 Testing Tools
+
+**Backend Testing:**
+- PyTest for unit and integration tests
+- Factory Boy for test data generation
+- Async test support
+- Mock external services
+
+**Frontend Testing:**
+- Jest for unit testing
+- React Testing Library
+- Playwright for E2E testing
+- Visual regression testing
+
+## 14. Error Handling and Recovery
+
+### 14.1 Error Classification
+
+**User Errors:**
+- Invalid file formats
+- Insufficient permissions
+- Validation failures
+- Authentication errors
+
+**System Errors:**
+- External service failures
+- Database connection issues
+- File storage problems
+- Processing timeouts
+
+**Recovery Strategies:**
+- Automatic retry mechanisms
+- Graceful degradation
+- User-friendly error messages
+- Administrative error resolution
+
+### 14.2 Reliability Features
+
+**Fault Tolerance:**
+- Circuit breaker patterns
+- Timeout configurations
+- Retry logic with exponential backoff
+- Fallback procedures
+
+**Data Integrity:**
+- Transaction management
+- Consistent state handling
+- Backup and recovery
+- Data validation
+
+## 15. Configuration and Customization
+
+### 15.1 System Configuration
+
+**Application Settings:**
+- Environment-specific configurations
+- Feature flag support
+- Service endpoint configuration
+- Security parameter tuning
+
+**Processing Configuration:**
+- AI model parameters
+- Translation service options
+- File size limits
+- Processing timeouts
+
+### 15.2 User Customization
+
+**Client Settings:**
+- Language preferences
+- Notification preferences
+- Default job settings
+- Download preferences
+
+**Reviewer Settings:**
+- Workflow preferences
+- Editor configurations
+- Keyboard shortcuts
+- Quality thresholds
+
+## 16. Future Enhancements
+
+### 16.1 Planned Features
+
+**Enhanced AI Capabilities:**
+- Multi-modal content analysis
+- Improved accuracy metrics
+- Custom model training
+- Advanced quality scoring
+
+**Extended Language Support:**
+- Additional target languages
+- Regional dialect support
+- Custom transcreation workflows
+- Cultural adaptation tools
+
+**Advanced Workflow Features:**
+- Batch processing capabilities
+- Template-based job creation
+- Advanced approval workflows
+- Custom review stages
+
+### 16.2 Integration Opportunities
+
+**Third-Party Integrations:**
+- Content management systems
+- Video hosting platforms
+- Accessibility testing tools
+- Quality assurance services
+
+**API Extensions:**
+- Webhook support for job events
+- Advanced reporting APIs
+- Bulk operation endpoints
+- Custom integration points
+
+## 17. Conclusion
+
+The Video Accessibility Processing Platform represents a comprehensive solution for automated video accessibility content generation. Built with modern web technologies and integrated with leading AI services, the platform provides an end-to-end workflow from video upload to final content delivery.
+
+The system's architecture supports scalability, security, and reliability while maintaining a focus on user experience and content quality. The role-based access control ensures appropriate separation of concerns between content creators, quality reviewers, and system administrators.
+
+With its robust API design, real-time updates, and comprehensive error handling, the platform serves as a professional-grade solution for organizations requiring high-quality accessibility content at scale.
+
+---
+
+*This specification document serves as the comprehensive technical and functional guide for the Video Accessibility Processing Platform, detailing all implemented features, workflows, and system capabilities as of the current release.*
\ No newline at end of file
diff --git a/frontend/.env.production b/frontend/.env.production
new file mode 100644
index 0000000..c566b5d
--- /dev/null
+++ b/frontend/.env.production
@@ -0,0 +1,25 @@
+# =============================================================================
+# Frontend Production Environment Variables
+# =============================================================================
+# These variables are embedded into the built JavaScript at build time
+# They are NOT secret - they will be visible in the browser
+# =============================================================================
+
+# Backend API base URL (proxied through Apache)
+VITE_API_BASE_URL=https://ai-sandbox.oliver.solutions/video-accessibility-back
+
+# Application environment
+VITE_APP_ENV=production
+
+# Sentry DSN for error tracking (optional - leave empty to disable)
+VITE_SENTRY_DSN=
+
+# =============================================================================
+# Important Notes:
+# =============================================================================
+# 1. All VITE_ prefixed variables are embedded in the build
+# 2. Never put secrets here - they will be visible in browser dev tools
+# 3. After changing these values, rebuild the frontend:
+# npm run build
+# 4. The API URL must match your Apache reverse proxy configuration
+# =============================================================================
diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts
index 1f439e8..5fda732 100644
--- a/frontend/vite.config.ts
+++ b/frontend/vite.config.ts
@@ -5,6 +5,8 @@ import react from '@vitejs/plugin-react'
// https://vite.dev/config/
export default defineConfig({
plugins: [react()],
+ // Base path for production deployment in Apache subdirectory
+ base: '/video-accessibility/',
server: {
proxy: {
'/api': {
diff --git a/scripts/build-frontend.sh b/scripts/build-frontend.sh
new file mode 100755
index 0000000..808188b
--- /dev/null
+++ b/scripts/build-frontend.sh
@@ -0,0 +1,239 @@
+#!/bin/bash
+# =============================================================================
+# Frontend Build and Deploy Script
+# =============================================================================
+# Builds the React frontend and deploys to Apache document root
+# Run from: /opt/accessible-video/
+# Usage: ./scripts/build-frontend.sh
+# =============================================================================
+
+set -e # Exit on any error
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Configuration
+PROJECT_DIR="/opt/accessible-video"
+FRONTEND_DIR="$PROJECT_DIR/frontend"
+DEPLOY_DIR="/var/www/html/video-accessibility"
+
+# =============================================================================
+# Helper Functions
+# =============================================================================
+
+print_success() {
+ echo -e "${GREEN} $1${NC}"
+}
+
+print_error() {
+ echo -e "${RED} $1${NC}"
+}
+
+print_info() {
+ echo -e "${BLUE}9 $1${NC}"
+}
+
+print_header() {
+ echo -e "${BLUE}==============================================================================${NC}"
+ echo -e "${BLUE}$1${NC}"
+ echo -e "${BLUE}==============================================================================${NC}"
+}
+
+# =============================================================================
+# Pre-flight Checks
+# =============================================================================
+
+preflight_checks() {
+ print_header "Pre-flight Checks"
+
+ # Check if frontend directory exists
+ if [ ! -d "$FRONTEND_DIR" ]; then
+ print_error "Frontend directory not found at $FRONTEND_DIR"
+ exit 1
+ fi
+ print_success "Frontend directory found"
+
+ # Check if package.json exists
+ if [ ! -f "$FRONTEND_DIR/package.json" ]; then
+ print_error "package.json not found in frontend directory"
+ exit 1
+ fi
+ print_success "package.json found"
+
+ # Check if .env.production exists
+ if [ ! -f "$FRONTEND_DIR/.env.production" ]; then
+ print_error ".env.production not found in frontend directory"
+ print_info "Creating .env.production from template..."
+ cp "$FRONTEND_DIR/.env.example" "$FRONTEND_DIR/.env.production" || exit 1
+ print_info "Please edit $FRONTEND_DIR/.env.production and run again"
+ exit 1
+ fi
+ print_success ".env.production found"
+
+ # Check if Node.js is installed
+ if ! command -v node &> /dev/null; then
+ print_error "Node.js is not installed"
+ exit 1
+ fi
+ print_success "Node.js $(node --version) is installed"
+
+ # Check if npm is installed
+ if ! command -v npm &> /dev/null; then
+ print_error "npm is not installed"
+ exit 1
+ fi
+ print_success "npm $(npm --version) is installed"
+
+ echo ""
+}
+
+# =============================================================================
+# Build Frontend
+# =============================================================================
+
+build_frontend() {
+ print_header "Building Frontend"
+
+ cd "$FRONTEND_DIR"
+
+ # Install dependencies
+ print_info "Installing dependencies..."
+ npm ci --only=production
+ print_success "Dependencies installed"
+
+ # Build the application
+ print_info "Building React application (this may take a minute)..."
+ npm run build
+ print_success "Build completed"
+
+ # Check if dist directory was created
+ if [ ! -d "dist" ]; then
+ print_error "Build failed - dist directory not found"
+ exit 1
+ fi
+ print_success "Build artifacts created in dist/"
+
+ # Display build size
+ BUILD_SIZE=$(du -sh dist | cut -f1)
+ print_info "Build size: $BUILD_SIZE"
+
+ cd "$PROJECT_DIR"
+ echo ""
+}
+
+# =============================================================================
+# Deploy to Apache
+# =============================================================================
+
+deploy_to_apache() {
+ print_header "Deploying to Apache"
+
+ # Create deployment directory if it doesn't exist
+ print_info "Creating deployment directory..."
+ sudo mkdir -p "$DEPLOY_DIR"
+ print_success "Deployment directory ready"
+
+ # Backup existing deployment (if any)
+ if [ -d "$DEPLOY_DIR" ] && [ "$(ls -A $DEPLOY_DIR)" ]; then
+ BACKUP_DIR="$DEPLOY_DIR.backup.$(date +%Y%m%d_%H%M%S)"
+ print_info "Backing up existing deployment to $BACKUP_DIR"
+ sudo cp -r "$DEPLOY_DIR" "$BACKUP_DIR"
+ print_success "Backup created"
+ fi
+
+ # Clear deployment directory
+ print_info "Clearing deployment directory..."
+ sudo rm -rf "$DEPLOY_DIR"/*
+ print_success "Deployment directory cleared"
+
+ # Copy build artifacts
+ print_info "Copying build artifacts..."
+ sudo cp -r "$FRONTEND_DIR/dist"/* "$DEPLOY_DIR"/
+ print_success "Build artifacts copied"
+
+ # Set proper ownership
+ print_info "Setting file ownership to www-data..."
+ sudo chown -R www-data:www-data "$DEPLOY_DIR"
+ print_success "Ownership set"
+
+ # Set proper permissions
+ print_info "Setting file permissions..."
+ sudo find "$DEPLOY_DIR" -type d -exec chmod 755 {} \;
+ sudo find "$DEPLOY_DIR" -type f -exec chmod 644 {} \;
+ print_success "Permissions set"
+
+ echo ""
+}
+
+# =============================================================================
+# Verify Deployment
+# =============================================================================
+
+verify_deployment() {
+ print_header "Verifying Deployment"
+
+ # Check if index.html exists
+ if [ ! -f "$DEPLOY_DIR/index.html" ]; then
+ print_error "index.html not found in deployment directory!"
+ exit 1
+ fi
+ print_success "index.html found"
+
+ # Check if assets directory exists
+ if [ ! -d "$DEPLOY_DIR/assets" ]; then
+ print_error "assets/ directory not found in deployment!"
+ exit 1
+ fi
+ print_success "assets/ directory found"
+
+ # Count files in deployment
+ FILE_COUNT=$(find "$DEPLOY_DIR" -type f | wc -l)
+ print_info "Total files deployed: $FILE_COUNT"
+
+ # Display deployment size
+ DEPLOY_SIZE=$(sudo du -sh "$DEPLOY_DIR" | cut -f1)
+ print_info "Deployment size: $DEPLOY_SIZE"
+
+ echo ""
+}
+
+# =============================================================================
+# Display Summary
+# =============================================================================
+
+display_summary() {
+ print_header "Deployment Summary"
+
+ echo -e "${GREEN}Frontend successfully deployed!${NC}"
+ echo ""
+ echo "Deployment location: $DEPLOY_DIR"
+ echo "Frontend URL: https://ai-sandbox.oliver.solutions/video-accessibility"
+ echo ""
+ echo "To verify the deployment, visit the URL above in your browser."
+ echo ""
+ echo "If you need to rollback, backups are stored in:"
+ echo " $DEPLOY_DIR.backup.*"
+ echo ""
+}
+
+# =============================================================================
+# Main Function
+# =============================================================================
+
+main() {
+ print_header "Frontend Build & Deploy"
+ echo ""
+
+ preflight_checks
+ build_frontend
+ deploy_to_apache
+ verify_deployment
+ display_summary
+}
+
+# Run main function
+main
diff --git a/scripts/deploy.sh b/scripts/deploy.sh
new file mode 100755
index 0000000..3151f11
--- /dev/null
+++ b/scripts/deploy.sh
@@ -0,0 +1,287 @@
+#!/bin/bash
+# =============================================================================
+# Deployment Script for Accessible Video Platform
+# =============================================================================
+# This script handles building and deploying the application
+# Run from: /opt/accessible-video/
+# Usage: ./scripts/deploy.sh [options]
+# =============================================================================
+
+set -e # Exit on any error
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Configuration
+PROJECT_DIR="/opt/accessible-video"
+COMPOSE_FILES="-f docker-compose.yml -f docker-compose.prod.yml"
+
+# =============================================================================
+# Helper Functions
+# =============================================================================
+
+print_header() {
+ echo -e "${BLUE}==============================================================================${NC}"
+ echo -e "${BLUE}$1${NC}"
+ echo -e "${BLUE}==============================================================================${NC}"
+}
+
+print_success() {
+ echo -e "${GREEN} $1${NC}"
+}
+
+print_error() {
+ echo -e "${RED} $1${NC}"
+}
+
+print_warning() {
+ echo -e "${YELLOW} $1${NC}"
+}
+
+print_info() {
+ echo -e "${BLUE}9 $1${NC}"
+}
+
+# =============================================================================
+# Pre-flight Checks
+# =============================================================================
+
+preflight_checks() {
+ print_header "Running Pre-flight Checks"
+
+ # Check if running from correct directory
+ if [ ! -f "docker-compose.yml" ]; then
+ print_error "docker-compose.yml not found. Please run from /opt/accessible-video/"
+ exit 1
+ fi
+ print_success "Running from correct directory"
+
+ # Check if .env.production exists
+ if [ ! -f ".env.production" ]; then
+ print_error ".env.production not found. Please create it first."
+ exit 1
+ fi
+ print_success ".env.production found"
+
+ # Check if secrets directory exists
+ if [ ! -d "secrets" ]; then
+ print_error "secrets/ directory not found. Please create it and add gcp-credentials.json"
+ exit 1
+ fi
+ print_success "secrets/ directory found"
+
+ # Check if GCP credentials exist
+ if [ ! -f "secrets/gcp-credentials.json" ]; then
+ print_error "secrets/gcp-credentials.json not found"
+ exit 1
+ fi
+ print_success "GCP credentials found"
+
+ # Check if Docker is running
+ if ! docker info > /dev/null 2>&1; then
+ print_error "Docker is not running"
+ exit 1
+ fi
+ print_success "Docker is running"
+
+ # Check if docker-compose is available
+ if ! command -v docker-compose &> /dev/null; then
+ print_error "docker-compose is not installed"
+ exit 1
+ fi
+ print_success "docker-compose is available"
+
+ echo ""
+}
+
+# =============================================================================
+# Pull Latest Code
+# =============================================================================
+
+pull_code() {
+ print_header "Pulling Latest Code"
+
+ # Pull backend
+ if [ -d "backend/.git" ]; then
+ print_info "Pulling backend repository..."
+ cd backend
+ git pull
+ cd ..
+ print_success "Backend code updated"
+ else
+ print_warning "Backend is not a git repository, skipping pull"
+ fi
+
+ # Pull frontend
+ if [ -d "frontend/.git" ]; then
+ print_info "Pulling frontend repository..."
+ cd frontend
+ git pull
+ cd ..
+ print_success "Frontend code updated"
+ else
+ print_warning "Frontend is not a git repository, skipping pull"
+ fi
+
+ echo ""
+}
+
+# =============================================================================
+# Build and Deploy Backend
+# =============================================================================
+
+deploy_backend() {
+ print_header "Building and Deploying Backend Services"
+
+ # Load environment variables
+ export $(cat .env.production | grep -v '^#' | xargs)
+
+ # Build images
+ print_info "Building Docker images (this may take a few minutes)..."
+ docker-compose $COMPOSE_FILES build --no-cache
+ print_success "Docker images built"
+
+ # Stop existing containers
+ print_info "Stopping existing containers..."
+ docker-compose $COMPOSE_FILES down
+ print_success "Containers stopped"
+
+ # Start services
+ print_info "Starting services..."
+ docker-compose $COMPOSE_FILES up -d
+ print_success "Services started"
+
+ # Wait for services to be healthy
+ print_info "Waiting for services to be healthy..."
+ sleep 10
+
+ # Check service health
+ if docker-compose $COMPOSE_FILES ps | grep -q "unhealthy"; then
+ print_error "Some services are unhealthy!"
+ docker-compose $COMPOSE_FILES ps
+ exit 1
+ fi
+ print_success "All services are healthy"
+
+ echo ""
+}
+
+# =============================================================================
+# Build and Deploy Frontend
+# =============================================================================
+
+deploy_frontend() {
+ print_header "Building and Deploying Frontend"
+
+ cd frontend
+
+ # Install dependencies
+ print_info "Installing frontend dependencies..."
+ npm ci --only=production
+ print_success "Dependencies installed"
+
+ # Build frontend
+ print_info "Building frontend..."
+ npm run build
+ print_success "Frontend built"
+
+ # Deploy to Apache
+ print_info "Deploying frontend to /var/www/html/video-accessibility/..."
+
+ # Create directory if it doesn't exist
+ sudo mkdir -p /var/www/html/video-accessibility
+
+ # Copy built files
+ sudo rm -rf /var/www/html/video-accessibility/*
+ sudo cp -r dist/* /var/www/html/video-accessibility/
+
+ # Set proper permissions
+ sudo chown -R www-data:www-data /var/www/html/video-accessibility
+ sudo chmod -R 755 /var/www/html/video-accessibility
+
+ print_success "Frontend deployed to Apache"
+
+ cd ..
+ echo ""
+}
+
+# =============================================================================
+# Run Database Migrations
+# =============================================================================
+
+run_migrations() {
+ print_header "Running Database Migrations"
+
+ print_info "Running migrations..."
+ docker-compose $COMPOSE_FILES exec -T api python migrate.py
+ print_success "Migrations completed"
+
+ echo ""
+}
+
+# =============================================================================
+# Display Status
+# =============================================================================
+
+display_status() {
+ print_header "Deployment Status"
+
+ echo -e "${BLUE}Container Status:${NC}"
+ docker-compose $COMPOSE_FILES ps
+
+ echo ""
+ echo -e "${BLUE}Service URLs:${NC}"
+ echo "Frontend: https://ai-sandbox.oliver.solutions/video-accessibility"
+ echo "Backend API: https://ai-sandbox.oliver.solutions/video-accessibility-back"
+ echo "API Health: https://ai-sandbox.oliver.solutions/video-accessibility-back/health"
+
+ echo ""
+ echo -e "${GREEN}Deployment completed successfully!${NC}"
+ echo ""
+ echo "To view logs:"
+ echo " docker-compose $COMPOSE_FILES logs -f [service]"
+ echo ""
+ echo "To restart a service:"
+ echo " docker-compose $COMPOSE_FILES restart [service]"
+ echo ""
+}
+
+# =============================================================================
+# Main Deployment Flow
+# =============================================================================
+
+main() {
+ print_header "Accessible Video Platform Deployment"
+ echo ""
+
+ # Run checks
+ preflight_checks
+
+ # Pull latest code
+ if [ "$1" != "--skip-pull" ]; then
+ pull_code
+ fi
+
+ # Deploy backend
+ deploy_backend
+
+ # Deploy frontend
+ if [ "$1" != "--skip-frontend" ]; then
+ deploy_frontend
+ fi
+
+ # Run migrations
+ if [ "$1" != "--skip-migrations" ]; then
+ run_migrations
+ fi
+
+ # Display status
+ display_status
+}
+
+# Run main function
+main "$@"
diff --git a/scripts/mongodb-init.js b/scripts/mongodb-init.js
new file mode 100644
index 0000000..4c1dee4
--- /dev/null
+++ b/scripts/mongodb-init.js
@@ -0,0 +1,147 @@
+// =============================================================================
+// MongoDB Initialization Script for Accessible Video Platform
+// =============================================================================
+// Run this script ONCE after starting the MongoDB container
+// Usage: docker-compose exec mongodb mongosh < scripts/mongodb-init.js
+// =============================================================================
+
+// Connect to the accessible_video database
+db = db.getSiblingDB('accessible_video');
+
+print('=============================================================================');
+print('MongoDB Initialization for Accessible Video Platform');
+print('=============================================================================');
+
+// -----------------------------------------------------------------------------
+// Create Collections with Validation
+// -----------------------------------------------------------------------------
+print('\n1. Creating collections with schema validation...');
+
+// Jobs collection
+db.createCollection('jobs', {
+ validator: {
+ $jsonSchema: {
+ bsonType: 'object',
+ required: ['_id', 'title', 'status', 'client_id', 'created_at', 'updated_at'],
+ properties: {
+ _id: { bsonType: 'string' },
+ title: { bsonType: 'string' },
+ status: {
+ enum: ['created', 'ingesting', 'ai_processing', 'pending_qc',
+ 'approved_english', 'rejected', 'translating', 'tts_generating',
+ 'pending_final_review', 'completed']
+ },
+ client_id: { bsonType: 'string' },
+ created_at: { bsonType: 'date' },
+ updated_at: { bsonType: 'date' }
+ }
+ }
+ }
+});
+print(' Created jobs collection');
+
+// Users collection
+db.createCollection('users', {
+ validator: {
+ $jsonSchema: {
+ bsonType: 'object',
+ required: ['_id', 'email', 'hashed_password', 'role', 'created_at'],
+ properties: {
+ _id: { bsonType: 'string' },
+ email: { bsonType: 'string' },
+ hashed_password: { bsonType: 'string' },
+ role: { enum: ['client', 'reviewer', 'admin'] },
+ is_active: { bsonType: 'bool' },
+ created_at: { bsonType: 'date' }
+ }
+ }
+ }
+});
+print(' Created users collection');
+
+// Audit logs collection
+db.createCollection('audit_logs');
+print(' Created audit_logs collection');
+
+// -----------------------------------------------------------------------------
+// Create Indexes for Performance
+// -----------------------------------------------------------------------------
+print('\n2. Creating indexes for optimized queries...');
+
+// Jobs collection indexes
+db.jobs.createIndex({ 'status': 1, 'created_at': -1 }, { name: 'idx_status_created' });
+print(' Created index: jobs.idx_status_created');
+
+db.jobs.createIndex({ 'client_id': 1 }, { name: 'idx_client_id' });
+print(' Created index: jobs.idx_client_id');
+
+db.jobs.createIndex({ 'created_at': -1 }, { name: 'idx_created_at' });
+print(' Created index: jobs.idx_created_at');
+
+// Users collection indexes
+db.users.createIndex({ 'email': 1 }, { unique: true, name: 'idx_email_unique' });
+print(' Created index: users.idx_email_unique (unique)');
+
+db.users.createIndex({ 'role': 1 }, { name: 'idx_role' });
+print(' Created index: users.idx_role');
+
+// Audit logs collection indexes
+db.audit_logs.createIndex({ 'timestamp': -1 }, { name: 'idx_timestamp' });
+print(' Created index: audit_logs.idx_timestamp');
+
+db.audit_logs.createIndex({ 'action': 1, 'timestamp': -1 }, { name: 'idx_action_timestamp' });
+print(' Created index: audit_logs.idx_action_timestamp');
+
+db.audit_logs.createIndex({ 'user_id': 1, 'timestamp': -1 }, { name: 'idx_user_timestamp' });
+print(' Created index: audit_logs.idx_user_timestamp');
+
+db.audit_logs.createIndex({ 'severity': 1, 'timestamp': -1 }, { name: 'idx_severity_timestamp' });
+print(' Created index: audit_logs.idx_severity_timestamp');
+
+db.audit_logs.createIndex({ 'resource_type': 1, 'resource_id': 1 }, { name: 'idx_resource' });
+print(' Created index: audit_logs.idx_resource');
+
+// Text search index for audit logs
+db.audit_logs.createIndex(
+ {
+ 'description': 'text',
+ 'details': 'text',
+ 'error_message': 'text'
+ },
+ {
+ name: 'idx_text_search',
+ weights: {
+ 'description': 10,
+ 'details': 5,
+ 'error_message': 8
+ }
+ }
+);
+print(' Created index: audit_logs.idx_text_search (full-text)');
+
+// -----------------------------------------------------------------------------
+// Display Database Statistics
+// -----------------------------------------------------------------------------
+print('\n3. Database initialization complete!');
+print('\nDatabase statistics:');
+print('-------------------');
+
+const stats = db.stats();
+print('Database: ' + stats.db);
+print('Collections: ' + stats.collections);
+print('Indexes: ' + stats.indexes);
+print('Data Size: ' + (stats.dataSize / 1024).toFixed(2) + ' KB');
+print('Storage Size: ' + (stats.storageSize / 1024).toFixed(2) + ' KB');
+
+print('\nCollections created:');
+db.getCollectionNames().forEach(function(collName) {
+ const collStats = db.getCollection(collName).stats();
+ const indexCount = db.getCollection(collName).getIndexes().length;
+ print(' - ' + collName + ' (indexes: ' + indexCount + ')');
+});
+
+print('\n=============================================================================');
+print('Next steps:');
+print('1. Run migrations: docker-compose exec api python migrate.py');
+print('2. Create admin user: docker-compose exec api python create_test_users.py');
+print('=============================================================================');