wrote docker files and deployment instructions
This commit is contained in:
parent
dac0fbea04
commit
1a1ed3048d
28 changed files with 3020 additions and 520 deletions
BIN
.DS_Store
vendored
BIN
.DS_Store
vendored
Binary file not shown.
99
.env.production
Normal file
99
.env.production
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
# =============================================================================
|
||||
# Production Environment Variables for Accessible Video Platform
|
||||
# =============================================================================
|
||||
# IMPORTANT: This file contains sensitive information. Set permissions: chmod 600
|
||||
# Location on server: /opt/accessible-video/.env.production
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# App Configuration
|
||||
# -----------------------------------------------------------------------------
|
||||
APP_ENV=prod
|
||||
API_BASE_URL=https://ai-sandbox.oliver.solutions/video-accessibility-back
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Authentication & Security
|
||||
# -----------------------------------------------------------------------------
|
||||
# IMPORTANT: Generate a secure random secret for JWT_SECRET
|
||||
# Example: openssl rand -hex 32
|
||||
JWT_SECRET=CHANGE_ME_TO_SECURE_RANDOM_64_CHAR_STRING
|
||||
JWT_ALG=HS256
|
||||
JWT_ACCESS_TTL_MIN=240
|
||||
JWT_REFRESH_TTL_DAYS=7
|
||||
COOKIE_DOMAIN=ai-sandbox.oliver.solutions
|
||||
COOKIE_SECURE=true
|
||||
COOKIE_SAMESITE=Lax
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# MongoDB Configuration
|
||||
# -----------------------------------------------------------------------------
|
||||
# MongoDB runs without authentication in the internal Docker network
|
||||
MONGODB_DB=accessible_video
|
||||
|
||||
# Note: MongoDB connection string is auto-constructed in docker-compose.yml
|
||||
# Format: mongodb://mongodb:27017/${MONGODB_DB}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Redis Configuration
|
||||
# -----------------------------------------------------------------------------
|
||||
# Redis runs without authentication in the internal Docker network
|
||||
# No configuration needed - connection strings in docker-compose.yml
|
||||
# REDIS_URL=redis://redis:6379/0
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Google Cloud Platform (GCP)
|
||||
# -----------------------------------------------------------------------------
|
||||
GCP_PROJECT_ID=optical-414516
|
||||
GCS_BUCKET=accessible-video
|
||||
|
||||
# GCP credentials file will be mounted as a volume
|
||||
# Location inside container: /secrets/gcp-credentials.json
|
||||
# Source file on server: /opt/accessible-video/secrets/gcp-credentials.json
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# AI Services
|
||||
# -----------------------------------------------------------------------------
|
||||
# Gemini API (Required)
|
||||
GEMINI_API_KEY=AIzaSyAuuVGcvqfoP7pqX-YwieGszPsNSeAft-0
|
||||
|
||||
# Google Cloud Translate (Optional - for translation features)
|
||||
TRANSLATE_API_KEY=
|
||||
|
||||
# ElevenLabs TTS (Optional - for text-to-speech)
|
||||
ELEVENLABS_API_KEY=
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Email Configuration (SendGrid)
|
||||
# -----------------------------------------------------------------------------
|
||||
# IMPORTANT: Get SendGrid API key from https://app.sendgrid.com/settings/api_keys
|
||||
SENDGRID_API_KEY=
|
||||
|
||||
# Email sender address (must be verified in SendGrid)
|
||||
EMAIL_FROM=noreply@ai-sandbox.oliver.solutions
|
||||
|
||||
# Client-facing URL (used in emails)
|
||||
CLIENT_BASE_URL=https://ai-sandbox.oliver.solutions/video-accessibility
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Observability & Monitoring (Optional)
|
||||
# -----------------------------------------------------------------------------
|
||||
# Sentry for error tracking (leave empty to disable)
|
||||
SENTRY_DSN=
|
||||
|
||||
# OpenTelemetry endpoint (leave empty to disable)
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT=
|
||||
|
||||
# =============================================================================
|
||||
# DEPLOYMENT CHECKLIST
|
||||
# =============================================================================
|
||||
# [ ] Generate secure JWT_SECRET (64 characters): openssl rand -hex 32
|
||||
# [ ] Verify GCP_PROJECT_ID and GCS_BUCKET
|
||||
# [ ] Copy GCP credentials JSON to /opt/accessible-video/secrets/
|
||||
# [ ] Update GEMINI_API_KEY with valid key
|
||||
# [ ] (Optional) Configure SENDGRID_API_KEY for email notifications
|
||||
# [ ] (Optional) Configure ELEVENLABS_API_KEY for TTS
|
||||
# [ ] (Optional) Configure SENTRY_DSN for error tracking
|
||||
# [ ] Set file permissions: chmod 600 /opt/accessible-video/.env.production
|
||||
# [ ] Verify COOKIE_DOMAIN matches your domain
|
||||
# [ ] Verify API_BASE_URL and CLIENT_BASE_URL are correct
|
||||
# =============================================================================
|
||||
BIN
DEPLOYMENT.md
Normal file
BIN
DEPLOYMENT.md
Normal file
Binary file not shown.
172
apache-config-snippet.conf
Normal file
172
apache-config-snippet.conf
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
# =============================================================================
|
||||
# Apache Configuration for Accessible Video Platform
|
||||
# =============================================================================
|
||||
# Add this configuration to your existing VirtualHost for ai-sandbox.oliver.solutions
|
||||
# Location: /etc/apache2/sites-available/ai-sandbox.oliver.solutions-ssl.conf
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Frontend - Static React SPA served from subdirectory
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Serve frontend from /video-accessibility subdirectory
|
||||
Alias /video-accessibility /var/www/html/video-accessibility
|
||||
|
||||
<Directory /var/www/html/video-accessibility>
|
||||
# Basic options
|
||||
Options -Indexes +FollowSymLinks
|
||||
AllowOverride All
|
||||
Require all granted
|
||||
|
||||
# React SPA routing - rewrite all requests to index.html
|
||||
RewriteEngine On
|
||||
RewriteBase /video-accessibility
|
||||
|
||||
# Don't rewrite files or directories that exist
|
||||
RewriteCond %{REQUEST_FILENAME} !-f
|
||||
RewriteCond %{REQUEST_FILENAME} !-d
|
||||
|
||||
# Rewrite everything else to index.html
|
||||
RewriteRule ^ /video-accessibility/index.html [L]
|
||||
|
||||
# Security headers
|
||||
Header always set X-Frame-Options "SAMEORIGIN"
|
||||
Header always set X-Content-Type-Options "nosniff"
|
||||
Header always set X-XSS-Protection "1; mode=block"
|
||||
Header always set Referrer-Policy "strict-origin-when-cross-origin"
|
||||
|
||||
# Cache control for static assets
|
||||
<FilesMatch "\.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$">
|
||||
Header set Cache-Control "public, max-age=31536000, immutable"
|
||||
</FilesMatch>
|
||||
|
||||
# No cache for HTML files
|
||||
<FilesMatch "\.(html)$">
|
||||
Header set Cache-Control "no-cache, no-store, must-revalidate"
|
||||
Header set Pragma "no-cache"
|
||||
Header set Expires "0"
|
||||
</FilesMatch>
|
||||
</Directory>
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Backend API - Reverse proxy to Docker container
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Proxy backend API to Docker container on port 8000
|
||||
<Location /video-accessibility-back>
|
||||
# Preserve original host header
|
||||
ProxyPreserveHost On
|
||||
|
||||
# Proxy HTTP requests
|
||||
ProxyPass http://localhost:8000
|
||||
ProxyPassReverse http://localhost:8000
|
||||
|
||||
# Proxy timeout settings (important for long-running video processing)
|
||||
ProxyTimeout 300
|
||||
|
||||
# WebSocket support (CRITICAL for real-time job updates)
|
||||
RewriteEngine On
|
||||
RewriteCond %{HTTP:Upgrade} =websocket [NC]
|
||||
RewriteRule /video-accessibility-back/(.*) ws://localhost:8000/$1 [P,L]
|
||||
RewriteCond %{HTTP:Upgrade} !=websocket [NC]
|
||||
RewriteRule /video-accessibility-back/(.*) http://localhost:8000/$1 [P,L]
|
||||
|
||||
# Security headers
|
||||
Header always set X-Frame-Options "SAMEORIGIN"
|
||||
Header always set X-Content-Type-Options "nosniff"
|
||||
|
||||
# CORS is handled by the backend, don't add headers here
|
||||
</Location>
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Required Apache Modules
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Enable these modules with:
|
||||
# sudo a2enmod rewrite
|
||||
# sudo a2enmod proxy
|
||||
# sudo a2enmod proxy_http
|
||||
# sudo a2enmod proxy_wstunnel
|
||||
# sudo a2enmod headers
|
||||
# sudo systemctl restart apache2
|
||||
|
||||
# Verify modules are enabled:
|
||||
# apache2ctl -M | grep -E '(rewrite|proxy|headers)'
|
||||
|
||||
# =============================================================================
|
||||
# Full VirtualHost Example
|
||||
# =============================================================================
|
||||
|
||||
# Example of complete VirtualHost configuration:
|
||||
#
|
||||
# <VirtualHost *:443>
|
||||
# ServerName ai-sandbox.oliver.solutions
|
||||
# ServerAdmin admin@oliver.solutions
|
||||
#
|
||||
# DocumentRoot /var/www/html
|
||||
#
|
||||
# # SSL Configuration (with wildcard cert)
|
||||
# SSLEngine on
|
||||
# SSLCertificateFile /path/to/wildcard-ai-sandbox.oliver.solutions.crt
|
||||
# SSLCertificateKeyFile /path/to/wildcard-ai-sandbox.oliver.solutions.key
|
||||
# SSLCertificateChainFile /path/to/chain.crt # If needed
|
||||
#
|
||||
# # SSL Protocol and Cipher settings
|
||||
# SSLProtocol all -SSLv2 -SSLv3 -TLSv1 -TLSv1.1
|
||||
# SSLCipherSuite HIGH:!aNULL:!MD5
|
||||
#
|
||||
# # Frontend configuration (from above)
|
||||
# Alias /video-accessibility /var/www/html/video-accessibility
|
||||
# <Directory /var/www/html/video-accessibility>
|
||||
# ...
|
||||
# </Directory>
|
||||
#
|
||||
# # Backend API configuration (from above)
|
||||
# <Location /video-accessibility-back>
|
||||
# ...
|
||||
# </Location>
|
||||
#
|
||||
# # Logging
|
||||
# ErrorLog ${APACHE_LOG_DIR}/ai-sandbox-error.log
|
||||
# CustomLog ${APACHE_LOG_DIR}/ai-sandbox-access.log combined
|
||||
# </VirtualHost>
|
||||
|
||||
# =============================================================================
|
||||
# Testing & Verification
|
||||
# =============================================================================
|
||||
|
||||
# Test Apache configuration:
|
||||
# sudo apache2ctl configtest
|
||||
#
|
||||
# Restart Apache:
|
||||
# sudo systemctl restart apache2
|
||||
#
|
||||
# Test frontend:
|
||||
# curl -I https://ai-sandbox.oliver.solutions/video-accessibility
|
||||
#
|
||||
# Test backend:
|
||||
# curl https://ai-sandbox.oliver.solutions/video-accessibility-back/health
|
||||
#
|
||||
# Test WebSocket (requires wscat):
|
||||
# wscat -c wss://ai-sandbox.oliver.solutions/video-accessibility-back/api/v1/ws/job-list
|
||||
|
||||
# =============================================================================
|
||||
# Troubleshooting
|
||||
# =============================================================================
|
||||
|
||||
# Check Apache logs:
|
||||
# sudo tail -f /var/log/apache2/ai-sandbox-error.log
|
||||
# sudo tail -f /var/log/apache2/ai-sandbox-access.log
|
||||
#
|
||||
# Check if backend is running:
|
||||
# curl http://localhost:8000/health
|
||||
#
|
||||
# Check Docker containers:
|
||||
# cd /opt/accessible-video
|
||||
# docker-compose ps
|
||||
#
|
||||
# Common issues:
|
||||
# - 502 Bad Gateway: Backend container not running
|
||||
# - 404 Not Found: Frontend not deployed or Apache alias incorrect
|
||||
# - WebSocket fails: mod_proxy_wstunnel not enabled
|
||||
# - CORS errors: Check backend CORS configuration, not Apache
|
||||
|
|
@ -1,9 +1,25 @@
|
|||
# Python
|
||||
# =============================================================================
|
||||
# .dockerignore for Accessible Video Backend
|
||||
# Excludes unnecessary files from Docker build context
|
||||
# =============================================================================
|
||||
|
||||
# Python cache and compiled files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
|
||||
# Virtual environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Build artifacts
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
|
|
@ -21,33 +37,13 @@ wheels/
|
|||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# Poetry (keep poetry.lock for reproducible builds)
|
||||
# poetry.lock
|
||||
|
||||
# Virtual environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# IDE
|
||||
# IDE and editor files
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
|
||||
# Testing
|
||||
.coverage
|
||||
|
|
@ -58,6 +54,18 @@ htmlcov/
|
|||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
tests/
|
||||
test_*.py
|
||||
|
||||
# Git
|
||||
.git/
|
||||
.gitignore
|
||||
.github/
|
||||
|
||||
# Docker files (don't copy Docker files into the image)
|
||||
Dockerfile*
|
||||
.dockerignore
|
||||
docker-compose*.yml
|
||||
|
||||
# Documentation
|
||||
docs/
|
||||
|
|
@ -68,25 +76,33 @@ README*
|
|||
*.log
|
||||
logs/
|
||||
|
||||
# Git
|
||||
.git/
|
||||
.gitignore
|
||||
|
||||
# Docker
|
||||
Dockerfile*
|
||||
.dockerignore
|
||||
docker-compose*
|
||||
|
||||
# CI/CD
|
||||
.github/
|
||||
|
||||
# Local development
|
||||
.env.local
|
||||
.env.development
|
||||
.env.test
|
||||
|
||||
# Temporary files
|
||||
tmp/
|
||||
temp/
|
||||
*.tmp
|
||||
*.bak
|
||||
|
||||
# CI/CD
|
||||
.github/
|
||||
|
||||
# Environment files (will be mounted or set via docker-compose)
|
||||
.env*
|
||||
|
||||
# Old/backup files
|
||||
*.old
|
||||
|
||||
# Development scripts (not needed in production)
|
||||
debug_*.py
|
||||
test_*.py
|
||||
simple_*.py
|
||||
create_test_users.py
|
||||
setup_secrets.py
|
||||
|
||||
# Keep these important files:
|
||||
# - pyproject.toml (dependencies)
|
||||
# - poetry.lock (locked versions)
|
||||
# - gunicorn_conf.py (API server config)
|
||||
# - celery_worker.py (worker entry point)
|
||||
# - migrate.py (database migrations)
|
||||
# - app/ directory (application code)
|
||||
# - optical-414516-80e2475f6412.json (will be mounted as volume, but ok if copied)
|
||||
|
|
|
|||
92
backend/.dockerignore.old
Normal file
92
backend/.dockerignore.old
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# Poetry (keep poetry.lock for reproducible builds)
|
||||
# poetry.lock
|
||||
|
||||
# Virtual environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
|
||||
# Testing
|
||||
.coverage
|
||||
.pytest_cache/
|
||||
.mypy_cache/
|
||||
.tox/
|
||||
htmlcov/
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
|
||||
# Documentation
|
||||
docs/
|
||||
*.md
|
||||
README*
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
logs/
|
||||
|
||||
# Git
|
||||
.git/
|
||||
.gitignore
|
||||
|
||||
# Docker
|
||||
Dockerfile*
|
||||
.dockerignore
|
||||
docker-compose*
|
||||
|
||||
# CI/CD
|
||||
.github/
|
||||
|
||||
# Local development
|
||||
.env.local
|
||||
.env.development
|
||||
.env.test
|
||||
|
||||
# Temporary files
|
||||
tmp/
|
||||
temp/
|
||||
*.tmp
|
||||
*.bak
|
||||
|
|
@ -1,18 +1,29 @@
|
|||
# Build stage - Install dependencies and build wheels
|
||||
# =============================================================================
|
||||
# Multi-stage Dockerfile for Accessible Video Processing Platform
|
||||
# =============================================================================
|
||||
# Stage 1: Builder - Install dependencies
|
||||
# Stage 2: Base - Common runtime for API and Worker
|
||||
# Stage 3: API - FastAPI + Gunicorn (no ffmpeg)
|
||||
# Stage 4: Worker - Celery worker (with ffmpeg for video processing)
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Stage 1: Builder - Install Python dependencies using Poetry
|
||||
# -----------------------------------------------------------------------------
|
||||
FROM python:3.11-slim AS builder
|
||||
|
||||
# Install build dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Poetry
|
||||
RUN pip install poetry==1.8.2
|
||||
RUN pip install --no-cache-dir poetry==1.8.2
|
||||
|
||||
# Set Poetry configuration
|
||||
# Configure Poetry to not create virtual environment (we're in a container)
|
||||
ENV POETRY_NO_INTERACTION=1 \
|
||||
POETRY_VENV_IN_PROJECT=1 \
|
||||
POETRY_VIRTUALENVS_CREATE=false \
|
||||
POETRY_CACHE_DIR=/tmp/poetry_cache
|
||||
|
||||
WORKDIR /app
|
||||
|
|
@ -20,95 +31,39 @@ WORKDIR /app
|
|||
# Copy dependency files
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
|
||||
# Install dependencies into venv
|
||||
RUN poetry config virtualenvs.in-project true && \
|
||||
poetry lock --no-update || true && \
|
||||
poetry install --only=main --no-root && \
|
||||
rm -rf $POETRY_CACHE_DIR
|
||||
# Install dependencies (using pip for simpler container)
|
||||
# Export to requirements.txt and install with pip for smaller image
|
||||
RUN poetry export -f requirements.txt --output requirements.txt --without-hashes \
|
||||
&& pip install --no-cache-dir --user -r requirements.txt \
|
||||
&& rm -rf $POETRY_CACHE_DIR
|
||||
|
||||
# Base runtime stage
|
||||
# -----------------------------------------------------------------------------
|
||||
# Stage 2: Base - Common runtime environment
|
||||
# -----------------------------------------------------------------------------
|
||||
FROM python:3.11-slim AS base
|
||||
|
||||
# Install runtime system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
ffmpeg \
|
||||
# Install common runtime dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libmagic1 \
|
||||
curl \
|
||||
tini \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
|
||||
# Create non-root user
|
||||
# Create non-root user for security
|
||||
RUN groupadd --gid 1000 app \
|
||||
&& useradd --uid 1000 --gid app --shell /bin/bash --create-home app
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
# Copy Python packages from builder
|
||||
COPY --from=builder /root/.local /home/app/.local
|
||||
|
||||
# Copy virtual environment from builder stage
|
||||
COPY --from=builder --chown=app:app /app/.venv /app/.venv
|
||||
|
||||
# Ensure venv is in PATH
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
|
||||
# Copy application code
|
||||
COPY --chown=app:app . .
|
||||
|
||||
# Switch to non-root user
|
||||
USER app
|
||||
|
||||
# Production API stage
|
||||
FROM base AS production
|
||||
|
||||
# Set environment variables for production
|
||||
ENV APP_ENV=prod \
|
||||
# Set PATH to include user packages
|
||||
ENV PATH="/home/app/.local/bin:$PATH" \
|
||||
PYTHONPATH=/app \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Use tini as init system for proper signal handling
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
|
||||
# Default command for API server
|
||||
CMD ["gunicorn", "-c", "gunicorn_conf.py"]
|
||||
|
||||
# Worker stage for Celery workers
|
||||
FROM base AS worker
|
||||
|
||||
# Set environment variables for worker
|
||||
ENV APP_ENV=prod \
|
||||
PYTHONPATH=/app \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
C_FORCE_ROOT=1
|
||||
|
||||
# Health check for worker (check if Celery is responding)
|
||||
HEALTHCHECK --interval=60s --timeout=15s --start-period=10s --retries=3 \
|
||||
CMD python -c "from celery import Celery; app=Celery('app'); print('Worker healthy')" || exit 1
|
||||
|
||||
# Use tini as init system for proper signal handling
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
|
||||
# Default command for Celery worker
|
||||
CMD ["celery", "-A", "app.tasks", "worker", "--loglevel=info", "--concurrency=1"]
|
||||
|
||||
# Development stage with dev dependencies
|
||||
FROM builder AS development
|
||||
|
||||
# Install all dependencies including dev
|
||||
RUN poetry install --no-root && rm -rf $POETRY_CACHE_DIR
|
||||
|
||||
# Install additional dev tools
|
||||
RUN apt-get update && apt-get install -y \
|
||||
git \
|
||||
vim \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
WORKDIR /app
|
||||
|
||||
# Copy application code
|
||||
COPY --chown=app:app . .
|
||||
|
|
@ -116,12 +71,59 @@ COPY --chown=app:app . .
|
|||
# Switch to non-root user
|
||||
USER app
|
||||
|
||||
# Set environment for development
|
||||
ENV APP_ENV=dev \
|
||||
PYTHONPATH=/app \
|
||||
PYTHONUNBUFFERED=1
|
||||
# -----------------------------------------------------------------------------
|
||||
# Stage 3: API - FastAPI + Gunicorn (Production API Server)
|
||||
# -----------------------------------------------------------------------------
|
||||
FROM base AS api
|
||||
|
||||
# Set production environment variables
|
||||
ENV APP_ENV=prod
|
||||
|
||||
# Health check for API
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 8000
|
||||
|
||||
# Development command with hot reload
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
|
||||
# Use tini as init system for proper signal handling
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
|
||||
# Start Gunicorn with Uvicorn workers
|
||||
CMD ["gunicorn", "-c", "gunicorn_conf.py", "app.main:app"]
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Stage 4: Worker - Celery Worker (with ffmpeg for video processing)
|
||||
# -----------------------------------------------------------------------------
|
||||
FROM base AS worker
|
||||
|
||||
# Switch back to root to install ffmpeg
|
||||
USER root
|
||||
|
||||
# Install ffmpeg for video processing
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ffmpeg \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
|
||||
# Switch back to non-root user
|
||||
USER app
|
||||
|
||||
# Set production environment variables
|
||||
ENV APP_ENV=prod \
|
||||
C_FORCE_ROOT=0
|
||||
|
||||
# Health check for worker (check if Celery is responding)
|
||||
HEALTHCHECK --interval=60s --timeout=15s --start-period=10s --retries=3 \
|
||||
CMD python -c "from celery import Celery; app=Celery('accessible-video-tasks', broker='redis://redis:6379/0'); app.control.inspect().ping() or exit(1)" || exit 1
|
||||
|
||||
# Use tini as init system for proper signal handling
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
|
||||
# Start Celery worker listening to all queues
|
||||
# --concurrency=4 for 4 worker processes (adjust based on CPU cores available)
|
||||
CMD ["celery", "-A", "celery_worker", "worker", \
|
||||
"-Q", "default,ingest,notify", \
|
||||
"--loglevel=info", \
|
||||
"--concurrency=4", \
|
||||
"--max-tasks-per-child=100"]
|
||||
|
|
|
|||
127
backend/Dockerfile.old
Normal file
127
backend/Dockerfile.old
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
# Build stage - Install dependencies and build wheels
|
||||
FROM python:3.11-slim AS builder
|
||||
|
||||
# Install build dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Poetry
|
||||
RUN pip install poetry==1.8.2
|
||||
|
||||
# Set Poetry configuration
|
||||
ENV POETRY_NO_INTERACTION=1 \
|
||||
POETRY_VENV_IN_PROJECT=1 \
|
||||
POETRY_CACHE_DIR=/tmp/poetry_cache
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy dependency files
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
|
||||
# Install dependencies into venv
|
||||
RUN poetry config virtualenvs.in-project true && \
|
||||
poetry lock --no-update || true && \
|
||||
poetry install --only=main --no-root && \
|
||||
rm -rf $POETRY_CACHE_DIR
|
||||
|
||||
# Base runtime stage
|
||||
FROM python:3.11-slim AS base
|
||||
|
||||
# Install runtime system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
ffmpeg \
|
||||
curl \
|
||||
tini \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
|
||||
# Create non-root user
|
||||
RUN groupadd --gid 1000 app \
|
||||
&& useradd --uid 1000 --gid app --shell /bin/bash --create-home app
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy virtual environment from builder stage
|
||||
COPY --from=builder --chown=app:app /app/.venv /app/.venv
|
||||
|
||||
# Ensure venv is in PATH
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
|
||||
# Copy application code
|
||||
COPY --chown=app:app . .
|
||||
|
||||
# Switch to non-root user
|
||||
USER app
|
||||
|
||||
# Production API stage
|
||||
FROM base AS production
|
||||
|
||||
# Set environment variables for production
|
||||
ENV APP_ENV=prod \
|
||||
PYTHONPATH=/app \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Use tini as init system for proper signal handling
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
|
||||
# Default command for API server
|
||||
CMD ["gunicorn", "-c", "gunicorn_conf.py"]
|
||||
|
||||
# Worker stage for Celery workers
|
||||
FROM base AS worker
|
||||
|
||||
# Set environment variables for worker
|
||||
ENV APP_ENV=prod \
|
||||
PYTHONPATH=/app \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
C_FORCE_ROOT=1
|
||||
|
||||
# Health check for worker (check if Celery is responding)
|
||||
HEALTHCHECK --interval=60s --timeout=15s --start-period=10s --retries=3 \
|
||||
CMD python -c "from celery import Celery; app=Celery('app'); print('Worker healthy')" || exit 1
|
||||
|
||||
# Use tini as init system for proper signal handling
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
|
||||
# Default command for Celery worker
|
||||
CMD ["celery", "-A", "app.tasks", "worker", "--loglevel=info", "--concurrency=1"]
|
||||
|
||||
# Development stage with dev dependencies
|
||||
FROM builder AS development
|
||||
|
||||
# Install all dependencies including dev
|
||||
RUN poetry install --no-root && rm -rf $POETRY_CACHE_DIR
|
||||
|
||||
# Install additional dev tools
|
||||
RUN apt-get update && apt-get install -y \
|
||||
git \
|
||||
vim \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy application code
|
||||
COPY --chown=app:app . .
|
||||
|
||||
# Switch to non-root user
|
||||
USER app
|
||||
|
||||
# Set environment for development
|
||||
ENV APP_ENV=dev \
|
||||
PYTHONPATH=/app \
|
||||
PYTHONUNBUFFERED=1
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
# Development command with hot reload
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
|
||||
Binary file not shown.
|
|
@ -111,7 +111,7 @@ async def websocket_job_status(
|
|||
except Exception as e:
|
||||
logger.error(f"WebSocket job status error: {e}")
|
||||
finally:
|
||||
manager.disconnect(websocket, user_id)
|
||||
await manager.disconnect(websocket, user_id)
|
||||
|
||||
|
||||
@router.websocket("/ws/jobs")
|
||||
|
|
@ -191,7 +191,7 @@ async def websocket_job_list(
|
|||
except Exception as e:
|
||||
logger.error(f"WebSocket job list error: {e}")
|
||||
finally:
|
||||
manager.disconnect(websocket, user_id)
|
||||
await manager.disconnect(websocket, user_id)
|
||||
|
||||
|
||||
@router.get("/ws/status")
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -63,7 +63,7 @@ class Settings(BaseSettings):
|
|||
otel_exporter_otlp_endpoint: str = ""
|
||||
|
||||
# CORS
|
||||
cors_origins: list[str] = ["http://localhost:5173", "http://localhost:3000"]
|
||||
cors_origins: list[str] = ["http://localhost:5173", "http://localhost:5174", "http://localhost:3000"]
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -34,8 +34,8 @@ class GeminiService:
|
|||
|
||||
while total_waited < max_wait_seconds:
|
||||
try:
|
||||
# Get file status
|
||||
file_info = client.files.get(name=file_name)
|
||||
# Get file status - use asyncio.to_thread to avoid blocking event loop
|
||||
file_info = await asyncio.to_thread(client.files.get, name=file_name)
|
||||
logger.info(f"File {file_name} status: {file_info.state} (waited {total_waited}s)")
|
||||
|
||||
if file_info.state == "ACTIVE":
|
||||
|
|
@ -65,13 +65,15 @@ class GeminiService:
|
|||
Returns structured JSON with transcript, captions VTT, and audio description VTT
|
||||
"""
|
||||
prompt = self._load_prompt("gemini_ingestion.md")
|
||||
uploaded_file = None
|
||||
|
||||
try:
|
||||
logger.info(f"Starting Gemini processing for video: {video_file_path}")
|
||||
|
||||
# Upload video file to Gemini using new API
|
||||
# Upload video file to Gemini using new API - use asyncio.to_thread to avoid blocking
|
||||
logger.info("Uploading video file to Gemini API...")
|
||||
uploaded_file = client.files.upload(
|
||||
uploaded_file = await asyncio.to_thread(
|
||||
client.files.upload,
|
||||
file=video_file_path,
|
||||
config={
|
||||
"display_name": f"video_processing_{Path(video_file_path).name}",
|
||||
|
|
@ -86,9 +88,10 @@ class GeminiService:
|
|||
if not file_ready:
|
||||
raise Exception("File failed to become ACTIVE within timeout")
|
||||
|
||||
# Generate content using new API
|
||||
# Generate content using new API - use asyncio.to_thread to avoid blocking
|
||||
logger.info("Generating content with Gemini model...")
|
||||
response = client.models.generate_content(
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
contents=[
|
||||
genai.types.Part.from_text(text=prompt),
|
||||
|
|
@ -144,12 +147,6 @@ class GeminiService:
|
|||
f"Successfully extracted accessibility content with confidence: {result['confidence']}"
|
||||
)
|
||||
|
||||
# Clean up uploaded file
|
||||
try:
|
||||
client.files.delete(name=uploaded_file.name)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to cleanup uploaded file: {e}")
|
||||
|
||||
return result
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
|
|
@ -163,6 +160,14 @@ class GeminiService:
|
|||
# Print to stdout for immediate visibility
|
||||
print(f"🚨 GEMINI ERROR: {type(e).__name__}: {str(e)}")
|
||||
raise
|
||||
finally:
|
||||
# Guaranteed cleanup of uploaded file regardless of success/failure/cancellation
|
||||
if uploaded_file:
|
||||
try:
|
||||
await asyncio.to_thread(client.files.delete, name=uploaded_file.name)
|
||||
logger.info(f"Successfully cleaned up uploaded file: {uploaded_file.name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to cleanup uploaded file {uploaded_file.name}: {e}")
|
||||
|
||||
async def _self_heal_response(self, video_file_path: str, invalid_response: str) -> dict[str, Any]:
|
||||
"""Attempt to self-heal invalid JSON response from Gemini"""
|
||||
|
|
@ -196,7 +201,8 @@ Fix the JSON and return it:
|
|||
"""
|
||||
|
||||
try:
|
||||
response = client.models.generate_content(
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
contents=[genai.types.Part.from_text(text=self_heal_prompt)]
|
||||
)
|
||||
|
|
@ -316,7 +322,8 @@ JSON:
|
|||
"""
|
||||
|
||||
try:
|
||||
response = client.models.generate_content(
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=self.model_name,
|
||||
contents=[
|
||||
genai.types.Part.from_text(text=prompt + "\n\n" + user_prompt)
|
||||
|
|
|
|||
|
|
@ -8,16 +8,13 @@ message broadcasting across multiple worker processes.
|
|||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, List, Set, Optional, Any
|
||||
from datetime import datetime
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import WebSocket, WebSocketDisconnect
|
||||
import redis.asyncio as redis
|
||||
import redis as sync_redis
|
||||
from fastapi import WebSocket
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ..core.redis import get_redis_client
|
||||
from ..core.security import decode_token
|
||||
from ..core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -31,19 +28,22 @@ class JobStatusUpdate(BaseModel):
|
|||
job_title: Optional[str] = None # Job title for better user experience
|
||||
message: Optional[str] = None
|
||||
progress: Optional[int] = None # 0-100 percentage
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
metadata: Optional[dict[str, Any]] = None
|
||||
eligible_users: Optional[set[str]] = None # Pre-computed eligible users
|
||||
|
||||
|
||||
class ConnectionManager:
|
||||
"""Manages WebSocket connections and Redis pub/sub for job status updates"""
|
||||
|
||||
def __init__(self):
|
||||
# Active WebSocket connections by user_id
|
||||
self.active_connections: Dict[str, Set[WebSocket]] = {}
|
||||
# Job subscriptions: job_id -> set of user_ids
|
||||
self.job_subscriptions: Dict[str, Set[str]] = {}
|
||||
# Global job list subscriptions by user_id
|
||||
self.global_subscriptions: Set[str] = set()
|
||||
# WebSocket connections by user_id
|
||||
self.user_ws: dict[str, set[WebSocket]] = {}
|
||||
# WebSocket metadata: websocket -> {user_id, jobs, scopes}
|
||||
self.ws_meta: dict[WebSocket, dict[str, Any]] = {}
|
||||
# Job subscriptions: job_id -> set of websockets
|
||||
self.job_ws: dict[str, set[WebSocket]] = {}
|
||||
# Lock for thread safety
|
||||
self.lock = asyncio.Lock()
|
||||
# Redis client for pub/sub
|
||||
self.redis_client: Optional[redis.Redis] = None
|
||||
self.pubsub: Optional[redis.client.PubSub] = None
|
||||
|
|
@ -52,7 +52,7 @@ class ConnectionManager:
|
|||
async def start(self):
|
||||
"""Initialize Redis pub/sub subscriber"""
|
||||
try:
|
||||
self.redis_client = await redis.from_url(
|
||||
self.redis_client = redis.from_url(
|
||||
settings.redis_url,
|
||||
encoding="utf-8",
|
||||
decode_responses=True
|
||||
|
|
@ -94,15 +94,25 @@ class ConnectionManager:
|
|||
"""Connect a WebSocket for specific job status updates"""
|
||||
await websocket.accept()
|
||||
|
||||
# Add connection to active connections
|
||||
if user_id not in self.active_connections:
|
||||
self.active_connections[user_id] = set()
|
||||
self.active_connections[user_id].add(websocket)
|
||||
async with self.lock:
|
||||
# Add to user connections
|
||||
if user_id not in self.user_ws:
|
||||
self.user_ws[user_id] = set()
|
||||
self.user_ws[user_id].add(websocket)
|
||||
|
||||
# Add job subscription
|
||||
if job_id not in self.job_subscriptions:
|
||||
self.job_subscriptions[job_id] = set()
|
||||
self.job_subscriptions[job_id].add(user_id)
|
||||
# Initialize/update websocket metadata
|
||||
if websocket not in self.ws_meta:
|
||||
self.ws_meta[websocket] = {
|
||||
"user_id": user_id,
|
||||
"jobs": set(),
|
||||
"scopes": set()
|
||||
}
|
||||
self.ws_meta[websocket]["jobs"].add(job_id)
|
||||
|
||||
# Add to job subscriptions
|
||||
if job_id not in self.job_ws:
|
||||
self.job_ws[job_id] = set()
|
||||
self.job_ws[job_id].add(websocket)
|
||||
|
||||
logger.info(f"User {user_id} connected for job {job_id} status updates")
|
||||
|
||||
|
|
@ -117,13 +127,20 @@ class ConnectionManager:
|
|||
"""Connect a WebSocket for job list updates (all jobs for a user)"""
|
||||
await websocket.accept()
|
||||
|
||||
# Add connection to active connections
|
||||
if user_id not in self.active_connections:
|
||||
self.active_connections[user_id] = set()
|
||||
self.active_connections[user_id].add(websocket)
|
||||
async with self.lock:
|
||||
# Add to user connections
|
||||
if user_id not in self.user_ws:
|
||||
self.user_ws[user_id] = set()
|
||||
self.user_ws[user_id].add(websocket)
|
||||
|
||||
# Add to global subscriptions
|
||||
self.global_subscriptions.add(user_id)
|
||||
# Initialize/update websocket metadata
|
||||
if websocket not in self.ws_meta:
|
||||
self.ws_meta[websocket] = {
|
||||
"user_id": user_id,
|
||||
"jobs": set(),
|
||||
"scopes": set()
|
||||
}
|
||||
self.ws_meta[websocket]["scopes"].add("job_list")
|
||||
|
||||
logger.info(f"User {user_id} connected for job list updates")
|
||||
|
||||
|
|
@ -134,23 +151,26 @@ class ConnectionManager:
|
|||
"timestamp": datetime.utcnow().isoformat()
|
||||
})
|
||||
|
||||
def disconnect(self, websocket: WebSocket, user_id: str):
|
||||
async def disconnect(self, websocket: WebSocket, user_id: str):
|
||||
"""Disconnect a WebSocket and clean up subscriptions"""
|
||||
# Remove from active connections
|
||||
if user_id in self.active_connections:
|
||||
self.active_connections[user_id].discard(websocket)
|
||||
if not self.active_connections[user_id]:
|
||||
del self.active_connections[user_id]
|
||||
|
||||
# Remove from global subscriptions if no connections left
|
||||
if user_id not in self.active_connections:
|
||||
self.global_subscriptions.discard(user_id)
|
||||
async with self.lock:
|
||||
# Get websocket metadata
|
||||
meta = self.ws_meta.pop(websocket, None)
|
||||
if not meta:
|
||||
return
|
||||
|
||||
# Remove from job subscriptions
|
||||
for job_id in list(self.job_subscriptions.keys()):
|
||||
self.job_subscriptions[job_id].discard(user_id)
|
||||
if not self.job_subscriptions[job_id]:
|
||||
del self.job_subscriptions[job_id]
|
||||
for job_id in meta.get("jobs", set()):
|
||||
if job_id in self.job_ws:
|
||||
self.job_ws[job_id].discard(websocket)
|
||||
if not self.job_ws[job_id]:
|
||||
del self.job_ws[job_id]
|
||||
|
||||
# Remove from user connections
|
||||
if user_id in self.user_ws:
|
||||
self.user_ws[user_id].discard(websocket)
|
||||
if not self.user_ws[user_id]:
|
||||
del self.user_ws[user_id]
|
||||
|
||||
logger.info(f"User {user_id} disconnected from WebSocket")
|
||||
|
||||
|
|
@ -159,66 +179,70 @@ class ConnectionManager:
|
|||
job_id: str,
|
||||
status: str,
|
||||
job_title: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
message: Optional[str] = None,
|
||||
progress: Optional[int] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
metadata: Optional[dict[str, Any]] = None
|
||||
):
|
||||
"""
|
||||
Broadcast job status update to Redis pub/sub
|
||||
This will be called from Celery workers
|
||||
Async wrapper for broadcasting job status updates from API routes
|
||||
For Celery workers, use websocket_publisher.publish_job_update_with_eligibility() directly
|
||||
"""
|
||||
update = JobStatusUpdate(
|
||||
job_id=job_id,
|
||||
status=status,
|
||||
updated_at=datetime.utcnow(),
|
||||
job_title=job_title,
|
||||
message=message,
|
||||
progress=progress,
|
||||
metadata=metadata
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from .websocket_publisher import publish_job_update_with_eligibility
|
||||
|
||||
# Run the sync publisher in a thread pool
|
||||
loop = asyncio.get_event_loop()
|
||||
with ThreadPoolExecutor(max_workers=1) as executor:
|
||||
await loop.run_in_executor(
|
||||
executor,
|
||||
publish_job_update_with_eligibility,
|
||||
job_id,
|
||||
status,
|
||||
job_title,
|
||||
message,
|
||||
progress,
|
||||
metadata
|
||||
)
|
||||
|
||||
try:
|
||||
# Create a synchronous Redis client for Celery workers
|
||||
redis_client = sync_redis.Redis.from_url(
|
||||
settings.redis_url,
|
||||
encoding="utf-8",
|
||||
decode_responses=True
|
||||
)
|
||||
|
||||
# Publish to global channel
|
||||
redis_client.publish(
|
||||
"job_status_updates",
|
||||
update.model_dump_json()
|
||||
)
|
||||
|
||||
# Publish to specific job channel
|
||||
redis_client.publish(
|
||||
f"job_status_updates:{job_id}",
|
||||
update.model_dump_json()
|
||||
)
|
||||
|
||||
# Close the connection
|
||||
redis_client.close()
|
||||
|
||||
logger.debug(f"Broadcasted status update for job {job_id}: {status}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to broadcast job status update: {e}")
|
||||
|
||||
async def _redis_subscriber(self):
|
||||
"""Background task to handle Redis pub/sub messages"""
|
||||
"""Background task to handle Redis pub/sub messages with reconnection logic"""
|
||||
delay = 1 # Start with 1 second delay
|
||||
max_delay = 30 # Maximum delay of 30 seconds
|
||||
|
||||
while True:
|
||||
try:
|
||||
# (Re)create pubsub connection
|
||||
if self.pubsub:
|
||||
try:
|
||||
await self.pubsub.aclose()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self.pubsub = self.redis_client.pubsub()
|
||||
|
||||
# Subscribe to channels
|
||||
await self.pubsub.subscribe("job_status_updates")
|
||||
await self.pubsub.psubscribe("job_status_updates:*")
|
||||
|
||||
logger.info("Redis subscriber connected and subscribed")
|
||||
delay = 1 # Reset delay on successful connection
|
||||
|
||||
# Listen for messages
|
||||
async for message in self.pubsub.listen():
|
||||
# Handle both regular messages and pattern messages
|
||||
if message["type"] in ("message", "pmessage"):
|
||||
await self._handle_redis_message(message)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Redis subscriber task cancelled")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Redis subscriber error: {e}")
|
||||
logger.error(f"Redis subscriber error, retrying in {delay}s: {e}")
|
||||
await asyncio.sleep(delay)
|
||||
delay = min(delay * 2, max_delay) # Exponential backoff
|
||||
|
||||
async def _handle_redis_message(self, message: Dict[str, Any]):
|
||||
async def _handle_redis_message(self, message: dict[str, Any]):
|
||||
"""Handle incoming Redis pub/sub message"""
|
||||
try:
|
||||
# For pattern messages, the channel is in the "channel" field
|
||||
|
|
@ -237,7 +261,7 @@ class ConnectionManager:
|
|||
|
||||
# Send to global subscribers (job list updates)
|
||||
elif channel == "job_status_updates":
|
||||
logger.debug(f"Sending global job status update to subscribers")
|
||||
logger.debug("Sending global job status update to subscribers")
|
||||
await self._send_job_status_to_global_subscribers(update)
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -245,35 +269,48 @@ class ConnectionManager:
|
|||
|
||||
async def _send_job_status_to_subscribers(self, job_id: str, update: JobStatusUpdate):
|
||||
"""Send job status update to specific job subscribers"""
|
||||
if job_id not in self.job_subscriptions:
|
||||
async with self.lock:
|
||||
target_websockets = list(self.job_ws.get(job_id, set()))
|
||||
|
||||
if not target_websockets:
|
||||
return
|
||||
|
||||
# Convert to JSON-serializable dict
|
||||
message = {
|
||||
"type": "job_status_update",
|
||||
"data": json.loads(update.model_dump_json())
|
||||
"data": update.model_dump(mode="json")
|
||||
}
|
||||
|
||||
for user_id in list(self.job_subscriptions[job_id]):
|
||||
await self._send_to_user(user_id, message)
|
||||
await self._send_to_websockets(target_websockets, message)
|
||||
|
||||
async def _send_job_status_to_global_subscribers(self, update: JobStatusUpdate):
|
||||
"""Send job status update to global (job list) subscribers with user filtering"""
|
||||
# Convert to JSON-serializable dict
|
||||
message_data = update.model_dump(mode="json")
|
||||
# Remove eligible_users from the client message
|
||||
message_data.pop("eligible_users", None)
|
||||
message = {
|
||||
"type": "job_list_update",
|
||||
"data": json.loads(update.model_dump_json())
|
||||
"data": message_data
|
||||
}
|
||||
|
||||
# Get users who should receive this notification
|
||||
# Use pre-computed eligible users if available, otherwise compute them
|
||||
eligible_users = getattr(update, 'eligible_users', None)
|
||||
if eligible_users is None:
|
||||
eligible_users = await self._get_job_related_users(update.job_id)
|
||||
|
||||
# Only send to users who are both subscribed and have access to this job
|
||||
for user_id in list(self.global_subscriptions):
|
||||
if user_id in eligible_users:
|
||||
await self._send_to_user(user_id, message)
|
||||
# Find websockets for eligible users that have job_list scope
|
||||
target_websockets = []
|
||||
async with self.lock:
|
||||
for user_id in eligible_users:
|
||||
for websocket in self.user_ws.get(user_id, set()):
|
||||
meta = self.ws_meta.get(websocket, {})
|
||||
if "job_list" in meta.get("scopes", set()):
|
||||
target_websockets.append(websocket)
|
||||
|
||||
async def _get_job_related_users(self, job_id: str) -> Set[str]:
|
||||
await self._send_to_websockets(target_websockets, message)
|
||||
|
||||
async def _get_job_related_users(self, job_id: str) -> set[str]:
|
||||
"""
|
||||
Get all users who should receive notifications for a specific job.
|
||||
Returns set of user IDs for:
|
||||
|
|
@ -321,25 +358,27 @@ class ConnectionManager:
|
|||
|
||||
return eligible_users
|
||||
|
||||
async def _send_to_user(self, user_id: str, message: Dict[str, Any]):
|
||||
"""Send message to all WebSocket connections for a user"""
|
||||
if user_id not in self.active_connections:
|
||||
return
|
||||
async def _send_to_websockets(self, websockets: list[WebSocket], message: dict[str, Any]):
|
||||
"""Send message to a list of WebSocket connections"""
|
||||
disconnected_websockets = []
|
||||
|
||||
# Send to all connections for this user
|
||||
disconnected_connections = set()
|
||||
for websocket in list(self.active_connections[user_id]):
|
||||
for websocket in websockets:
|
||||
try:
|
||||
await self._send_to_websocket(websocket, message)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to send to websocket for user {user_id}: {e}")
|
||||
disconnected_connections.add(websocket)
|
||||
logger.warning(f"Failed to send to websocket: {e}")
|
||||
disconnected_websockets.append(websocket)
|
||||
|
||||
# Clean up disconnected connections
|
||||
for websocket in disconnected_connections:
|
||||
self.disconnect(websocket, user_id)
|
||||
for websocket in disconnected_websockets:
|
||||
# Get user_id from metadata before disconnecting
|
||||
async with self.lock:
|
||||
meta = self.ws_meta.get(websocket, {})
|
||||
user_id = meta.get("user_id")
|
||||
if user_id:
|
||||
await self.disconnect(websocket, user_id)
|
||||
|
||||
async def _send_to_websocket(self, websocket: WebSocket, message: Dict[str, Any]):
|
||||
async def _send_to_websocket(self, websocket: WebSocket, message: dict[str, Any]):
|
||||
"""Send message to a specific WebSocket connection"""
|
||||
try:
|
||||
await websocket.send_json(message)
|
||||
|
|
@ -352,30 +391,6 @@ class ConnectionManager:
|
|||
connection_manager = ConnectionManager()
|
||||
|
||||
|
||||
async def authenticate_websocket(websocket: WebSocket, token: str) -> Optional[str]:
|
||||
"""
|
||||
Authenticate WebSocket connection using JWT token
|
||||
Returns user_id if valid, None if invalid
|
||||
"""
|
||||
try:
|
||||
if not token:
|
||||
await websocket.close(code=4001, reason="Missing authentication token")
|
||||
return None
|
||||
|
||||
# Decode JWT token
|
||||
payload = decode_token(token)
|
||||
if not payload or "sub" not in payload:
|
||||
await websocket.close(code=4001, reason="Invalid authentication token")
|
||||
return None
|
||||
|
||||
return payload["sub"] # user_id
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"WebSocket authentication failed: {e}")
|
||||
await websocket.close(code=4001, reason="Authentication failed")
|
||||
return None
|
||||
|
||||
|
||||
async def authenticate_websocket(websocket: WebSocket, token: Optional[str]) -> Optional[str]:
|
||||
"""
|
||||
Authenticate a WebSocket connection using a JWT token
|
||||
|
|
|
|||
214
backend/app/services/websocket_publisher.py
Normal file
214
backend/app/services/websocket_publisher.py
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
"""
|
||||
Synchronous WebSocket Publisher for Celery Workers
|
||||
|
||||
This module provides a synchronous Redis publisher for broadcasting job status
|
||||
updates from Celery workers. It maintains a persistent Redis connection pool
|
||||
to avoid connection overhead per publish operation.
|
||||
"""
|
||||
import logging
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from typing import Any, Optional
|
||||
|
||||
import redis
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ..core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Channel name constants
|
||||
CHANNEL_GLOBAL = "job_status_updates"
|
||||
CHANNEL_JOB_FMT = "job_status_updates:{job_id}"
|
||||
|
||||
|
||||
class JobStatusUpdate(BaseModel):
|
||||
"""Schema for job status update messages"""
|
||||
job_id: str
|
||||
status: str
|
||||
updated_at: datetime
|
||||
job_title: Optional[str] = None
|
||||
message: Optional[str] = None
|
||||
progress: Optional[int] = None
|
||||
metadata: Optional[dict[str, Any]] = None
|
||||
eligible_users: Optional[set[str]] = None # Pre-computed eligible users
|
||||
|
||||
|
||||
class WebSocketPublisher:
|
||||
"""Synchronous Redis publisher for WebSocket updates from Celery workers"""
|
||||
|
||||
def __init__(self):
|
||||
self._redis_client: Optional[redis.Redis] = None
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _get_client(self) -> redis.Redis:
|
||||
"""Get or create Redis client (thread-safe)"""
|
||||
if self._redis_client is None:
|
||||
with self._lock:
|
||||
if self._redis_client is None:
|
||||
self._redis_client = redis.Redis.from_url(
|
||||
settings.redis_url,
|
||||
encoding="utf-8",
|
||||
decode_responses=True
|
||||
)
|
||||
return self._redis_client
|
||||
|
||||
def publish_job_status_update(
|
||||
self,
|
||||
job_id: str,
|
||||
status: str,
|
||||
job_title: Optional[str] = None,
|
||||
message: Optional[str] = None,
|
||||
progress: Optional[int] = None,
|
||||
metadata: Optional[dict[str, Any]] = None,
|
||||
eligible_users: Optional[set[str]] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Publish job status update to Redis pub/sub channels
|
||||
Returns True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
update = JobStatusUpdate(
|
||||
job_id=job_id,
|
||||
status=status,
|
||||
updated_at=datetime.utcnow(),
|
||||
job_title=job_title,
|
||||
message=message,
|
||||
progress=progress,
|
||||
metadata=metadata,
|
||||
eligible_users=eligible_users
|
||||
)
|
||||
|
||||
# Serialize once for efficiency
|
||||
payload = update.model_dump_json(separators=(",", ":"))
|
||||
|
||||
# Use pipeline for atomic publish
|
||||
client = self._get_client()
|
||||
with client.pipeline() as pipe:
|
||||
# Publish to global channel
|
||||
pipe.publish(CHANNEL_GLOBAL, payload)
|
||||
# Publish to job-specific channel
|
||||
pipe.publish(CHANNEL_JOB_FMT.format(job_id=job_id), payload)
|
||||
pipe.execute()
|
||||
|
||||
logger.debug(f"Published status update for job {job_id}: {status}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to publish job status update for {job_id}: {e}")
|
||||
return False
|
||||
|
||||
def close(self):
|
||||
"""Close Redis connection"""
|
||||
if self._redis_client:
|
||||
with self._lock:
|
||||
if self._redis_client:
|
||||
self._redis_client.connection_pool.disconnect()
|
||||
self._redis_client = None
|
||||
logger.info("WebSocket publisher Redis connection closed")
|
||||
|
||||
|
||||
# Global publisher instance for Celery workers
|
||||
_publisher = WebSocketPublisher()
|
||||
|
||||
|
||||
def publish_job_status_update(
|
||||
job_id: str,
|
||||
status: str,
|
||||
job_title: Optional[str] = None,
|
||||
message: Optional[str] = None,
|
||||
progress: Optional[int] = None,
|
||||
metadata: Optional[dict[str, Any]] = None,
|
||||
eligible_users: Optional[set[str]] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Convenience function to publish job status update
|
||||
This is the function that Celery workers should use
|
||||
"""
|
||||
return _publisher.publish_job_status_update(
|
||||
job_id=job_id,
|
||||
status=status,
|
||||
job_title=job_title,
|
||||
message=message,
|
||||
progress=progress,
|
||||
metadata=metadata,
|
||||
eligible_users=eligible_users
|
||||
)
|
||||
|
||||
|
||||
def close_publisher():
|
||||
"""Close the global publisher - call this on worker shutdown"""
|
||||
_publisher.close()
|
||||
|
||||
|
||||
def get_job_eligible_users(job_id: str) -> set[str]:
|
||||
"""
|
||||
Get eligible users for a job (synchronous version for Celery workers)
|
||||
This should be called at publish time to avoid DB lookups in the hot path
|
||||
"""
|
||||
eligible_users = set()
|
||||
|
||||
try:
|
||||
# Import MongoDB client (synchronous)
|
||||
from pymongo import MongoClient
|
||||
|
||||
# Use synchronous MongoDB client for Celery workers
|
||||
client = MongoClient(settings.mongodb_url)
|
||||
db = client[settings.database_name]
|
||||
|
||||
# Get the job
|
||||
job = db["jobs"].find_one({"_id": job_id})
|
||||
if not job:
|
||||
logger.warning(f"Job {job_id} not found for eligibility check")
|
||||
return eligible_users
|
||||
|
||||
# Add job creator
|
||||
if job.get("client_id"):
|
||||
eligible_users.add(job["client_id"])
|
||||
|
||||
# Add reviewers from review history
|
||||
review = job.get("review", {})
|
||||
if review.get("reviewer_id"):
|
||||
eligible_users.add(review["reviewer_id"])
|
||||
|
||||
# Add reviewers from history
|
||||
for history_item in review.get("history", []):
|
||||
if history_item.get("by"):
|
||||
eligible_users.add(history_item["by"])
|
||||
|
||||
# Add all admin users (they can see all jobs)
|
||||
for admin_user in db["users"].find({"role": "admin"}):
|
||||
user_id = str(admin_user["_id"])
|
||||
eligible_users.add(user_id)
|
||||
|
||||
client.close()
|
||||
logger.debug(f"Job {job_id} eligible users: {len(eligible_users)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting eligible users for job {job_id}: {e}")
|
||||
|
||||
return eligible_users
|
||||
|
||||
|
||||
def publish_job_update_with_eligibility(
|
||||
job_id: str,
|
||||
status: str,
|
||||
job_title: Optional[str] = None,
|
||||
message: Optional[str] = None,
|
||||
progress: Optional[int] = None,
|
||||
metadata: Optional[dict[str, Any]] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Convenience function that computes eligible users and publishes
|
||||
This is the recommended function for Celery workers to use
|
||||
"""
|
||||
eligible_users = get_job_eligible_users(job_id)
|
||||
return publish_job_status_update(
|
||||
job_id=job_id,
|
||||
status=status,
|
||||
job_title=job_title,
|
||||
message=message,
|
||||
progress=progress,
|
||||
metadata=metadata,
|
||||
eligible_users=eligible_users
|
||||
)
|
||||
|
|
@ -1,110 +1,64 @@
|
|||
# =============================================================================
|
||||
# Docker Compose Production Overrides
|
||||
# =============================================================================
|
||||
# Usage: docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d
|
||||
# =============================================================================
|
||||
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
# MongoDB with Replica Set
|
||||
# ---------------------------------------------------------------------------
|
||||
# MongoDB - Production Settings
|
||||
# ---------------------------------------------------------------------------
|
||||
mongodb:
|
||||
image: mongo:7.0
|
||||
container_name: accessible-video-mongo-prod
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
MONGO_INITDB_ROOT_USERNAME: ${MONGODB_ROOT_USER:-admin}
|
||||
MONGO_INITDB_ROOT_PASSWORD: ${MONGODB_ROOT_PASSWORD}
|
||||
MONGO_INITDB_DATABASE: accessible_video
|
||||
ports:
|
||||
- "27017:27017"
|
||||
volumes:
|
||||
- mongodb_data_prod:/data/db
|
||||
- ./mongo-init.js:/docker-entrypoint-initdb.d/init.js:ro
|
||||
- ./mongo-keyfile:/data/keyfile:ro
|
||||
command: ["mongod", "--replSet", "rs0", "--bind_ip_all", "--keyFile", "/data/keyfile"]
|
||||
networks:
|
||||
- app-network-prod
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 4G
|
||||
cpus: '1.0'
|
||||
reservations:
|
||||
memory: 2G
|
||||
cpus: '0.5'
|
||||
|
||||
# Redis
|
||||
# ---------------------------------------------------------------------------
|
||||
# Redis - Production Settings
|
||||
# ---------------------------------------------------------------------------
|
||||
redis:
|
||||
image: redis:7.2-alpine
|
||||
container_name: accessible-video-redis-prod
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "6379:6379"
|
||||
volumes:
|
||||
- redis_data_prod:/data
|
||||
networks:
|
||||
- app-network-prod
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 2G
|
||||
cpus: '0.5'
|
||||
reservations:
|
||||
memory: 1G
|
||||
cpus: '0.25'
|
||||
|
||||
# Backend API
|
||||
# ---------------------------------------------------------------------------
|
||||
# API - Production Settings
|
||||
# ---------------------------------------------------------------------------
|
||||
api:
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
target: production
|
||||
container_name: accessible-video-api-prod
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 4G
|
||||
cpus: '2.0'
|
||||
reservations:
|
||||
memory: 2G
|
||||
cpus: '1.0'
|
||||
environment:
|
||||
- APP_ENV=production
|
||||
- MONGODB_URL=${MONGODB_URL}
|
||||
- REDIS_URL=${REDIS_URL}
|
||||
- JWT_SECRET_KEY=${JWT_SECRET_KEY}
|
||||
- JWT_REFRESH_SECRET_KEY=${JWT_REFRESH_SECRET_KEY}
|
||||
- GEMINI_API_KEY=${GEMINI_API_KEY}
|
||||
- SENDGRID_API_KEY=${SENDGRID_API_KEY}
|
||||
- ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
|
||||
- GCS_BUCKET_NAME=${GCS_BUCKET_NAME}
|
||||
- GOOGLE_CLOUD_PROJECT=${GOOGLE_CLOUD_PROJECT}
|
||||
- OTEL_SERVICE_NAME=accessible-video-api-prod
|
||||
- SENTRY_DSN=${SENTRY_DSN}
|
||||
- SENTRY_ENVIRONMENT=production
|
||||
- CORS_ORIGINS=${CORS_ORIGINS:-https://your-domain.com,https://www.your-domain.com}
|
||||
ports:
|
||||
- "8000:8000"
|
||||
depends_on:
|
||||
- mongodb
|
||||
- redis
|
||||
networks:
|
||||
- app-network-prod
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
APP_ENV: prod
|
||||
|
||||
# Celery Worker
|
||||
# ---------------------------------------------------------------------------
|
||||
# Worker - Production Settings
|
||||
# ---------------------------------------------------------------------------
|
||||
worker:
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
target: production
|
||||
container_name: accessible-video-worker-prod
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 8G
|
||||
cpus: '4.0'
|
||||
reservations:
|
||||
memory: 4G
|
||||
cpus: '2.0'
|
||||
environment:
|
||||
- APP_ENV=production
|
||||
- MONGODB_URL=${MONGODB_URL}
|
||||
- REDIS_URL=${REDIS_URL}
|
||||
- CELERY_BROKER_URL=${REDIS_URL}
|
||||
- CELERY_RESULT_BACKEND=${REDIS_URL}
|
||||
- GEMINI_API_KEY=${GEMINI_API_KEY}
|
||||
- ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
|
||||
- GCS_BUCKET_NAME=${GCS_BUCKET_NAME}
|
||||
- GOOGLE_CLOUD_PROJECT=${GOOGLE_CLOUD_PROJECT}
|
||||
- OTEL_SERVICE_NAME=accessible-video-worker-prod
|
||||
- SENTRY_DSN=${SENTRY_DSN}
|
||||
- SENTRY_ENVIRONMENT=production
|
||||
depends_on:
|
||||
- mongodb
|
||||
- redis
|
||||
command: ["celery", "-A", "app.tasks", "worker", "--loglevel=info", "--concurrency=2"]
|
||||
networks:
|
||||
- app-network-prod
|
||||
|
||||
|
||||
# Note: Frontend will be built separately and hosted on Apache webserver
|
||||
# Build command: cd frontend && npm run build
|
||||
# Deploy the 'dist' folder contents to your Apache document root
|
||||
|
||||
volumes:
|
||||
mongodb_data_prod:
|
||||
redis_data_prod:
|
||||
|
||||
networks:
|
||||
app-network-prod:
|
||||
driver: bridge
|
||||
APP_ENV: prod
|
||||
|
|
|
|||
|
|
@ -1,132 +1,210 @@
|
|||
# =============================================================================
|
||||
# Docker Compose Configuration for Accessible Video Processing Platform
|
||||
# =============================================================================
|
||||
# Services:
|
||||
# - api: FastAPI + Gunicorn REST API
|
||||
# - worker: Celery worker for background processing
|
||||
# - mongodb: MongoDB database
|
||||
# - redis: Redis for Celery broker and cache
|
||||
# =============================================================================
|
||||
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
# MongoDB with Replica Set
|
||||
# ---------------------------------------------------------------------------
|
||||
# MongoDB Database
|
||||
# ---------------------------------------------------------------------------
|
||||
mongodb:
|
||||
image: mongo:7.0
|
||||
container_name: accessible-video-mongo
|
||||
container_name: accessible-video-mongodb
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
MONGO_INITDB_ROOT_USERNAME: admin
|
||||
MONGO_INITDB_ROOT_PASSWORD: password123
|
||||
MONGO_INITDB_DATABASE: accessible_video
|
||||
ports:
|
||||
- "27017:27017"
|
||||
MONGO_INITDB_DATABASE: ${MONGODB_DB:-accessible_video}
|
||||
volumes:
|
||||
- mongodb_data:/data/db
|
||||
- ./mongo-init.js:/docker-entrypoint-initdb.d/init.js:ro
|
||||
- ./mongo-keyfile:/data/keyfile:ro
|
||||
command: ["mongod", "--replSet", "rs0", "--bind_ip_all", "--keyFile", "/data/keyfile"]
|
||||
- mongodb-data:/data/db
|
||||
- mongodb-config:/data/configdb
|
||||
networks:
|
||||
- app-network
|
||||
- accessible-video-network
|
||||
healthcheck:
|
||||
test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# Redis
|
||||
# ---------------------------------------------------------------------------
|
||||
# Redis Cache and Message Broker
|
||||
# ---------------------------------------------------------------------------
|
||||
redis:
|
||||
image: redis:7.2-alpine
|
||||
image: redis:7-alpine
|
||||
container_name: accessible-video-redis
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "6379:6379"
|
||||
command: redis-server --appendonly yes --maxmemory 2gb --maxmemory-policy allkeys-lru
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
- redis-data:/data
|
||||
networks:
|
||||
- app-network
|
||||
- accessible-video-network
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 5s
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# Backend API
|
||||
# ---------------------------------------------------------------------------
|
||||
# FastAPI Backend API
|
||||
# ---------------------------------------------------------------------------
|
||||
api:
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
target: development
|
||||
target: api
|
||||
container_name: accessible-video-api
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- APP_ENV=dev
|
||||
- MONGODB_URL=mongodb://admin:password123@mongodb:27017/accessible_video?authSource=admin&replicaSet=rs0
|
||||
- REDIS_URL=redis://redis:6379/0
|
||||
- JWT_SECRET_KEY=dev-secret-key-change-in-production
|
||||
- JWT_REFRESH_SECRET_KEY=dev-refresh-secret-key-change-in-production
|
||||
- GEMINI_API_KEY=${GEMINI_API_KEY}
|
||||
- SENDGRID_API_KEY=${SENDGRID_API_KEY}
|
||||
- ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
|
||||
- GCS_BUCKET_NAME=accessible-video-dev
|
||||
- GOOGLE_CLOUD_PROJECT=${GOOGLE_CLOUD_PROJECT}
|
||||
- OTEL_SERVICE_NAME=accessible-video-api-dev
|
||||
- OTEL_TRACES_EXPORTER=console
|
||||
- OTEL_METRICS_EXPORTER=prometheus
|
||||
- SENTRY_DSN=${SENTRY_DSN}
|
||||
- SENTRY_ENVIRONMENT=development
|
||||
depends_on:
|
||||
mongodb:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
- /app/.venv # Keep venv in container
|
||||
depends_on:
|
||||
- mongodb
|
||||
- redis
|
||||
networks:
|
||||
- app-network
|
||||
environment:
|
||||
# App configuration
|
||||
APP_ENV: ${APP_ENV:-dev}
|
||||
API_BASE_URL: ${API_BASE_URL:-http://localhost:8000}
|
||||
|
||||
# Celery Worker
|
||||
# Auth
|
||||
JWT_SECRET: ${JWT_SECRET}
|
||||
JWT_ALG: ${JWT_ALG:-HS256}
|
||||
JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240}
|
||||
JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7}
|
||||
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions}
|
||||
COOKIE_SECURE: ${COOKIE_SECURE:-true}
|
||||
COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax}
|
||||
|
||||
# Database
|
||||
MONGODB_URI: mongodb://mongodb:27017/${MONGODB_DB:-accessible_video}
|
||||
MONGODB_DB: ${MONGODB_DB:-accessible_video}
|
||||
|
||||
# Redis
|
||||
REDIS_URL: redis://redis:6379/0
|
||||
CELERY_BROKER_URL: redis://redis:6379/0
|
||||
CELERY_RESULT_BACKEND: redis://redis:6379/0
|
||||
|
||||
# GCP
|
||||
GCP_PROJECT_ID: ${GCP_PROJECT_ID}
|
||||
GCS_BUCKET: ${GCS_BUCKET:-accessible-video}
|
||||
GOOGLE_APPLICATION_CREDENTIALS: /secrets/gcp-credentials.json
|
||||
|
||||
# AI Services
|
||||
GEMINI_API_KEY: ${GEMINI_API_KEY}
|
||||
TRANSLATE_API_KEY: ${TRANSLATE_API_KEY:-}
|
||||
ELEVENLABS_API_KEY: ${ELEVENLABS_API_KEY:-}
|
||||
|
||||
# Email
|
||||
SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
|
||||
EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions}
|
||||
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility}
|
||||
|
||||
# Observability
|
||||
SENTRY_DSN: ${SENTRY_DSN:-}
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: ${OTEL_EXPORTER_OTLP_ENDPOINT:-}
|
||||
volumes:
|
||||
- ./secrets:/secrets:ro
|
||||
- api-logs:/app/logs
|
||||
networks:
|
||||
- accessible-video-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Celery Worker for Background Processing
|
||||
# ---------------------------------------------------------------------------
|
||||
worker:
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
target: development
|
||||
target: worker
|
||||
container_name: accessible-video-worker
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- APP_ENV=dev
|
||||
- MONGODB_URL=mongodb://admin:password123@mongodb:27017/accessible_video?authSource=admin&replicaSet=rs0
|
||||
- REDIS_URL=redis://redis:6379/0
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/0
|
||||
- GEMINI_API_KEY=${GEMINI_API_KEY}
|
||||
- ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
|
||||
- GCS_BUCKET_NAME=accessible-video-dev
|
||||
- GOOGLE_CLOUD_PROJECT=${GOOGLE_CLOUD_PROJECT}
|
||||
- OTEL_SERVICE_NAME=accessible-video-worker-dev
|
||||
- OTEL_TRACES_EXPORTER=console
|
||||
- OTEL_METRICS_EXPORTER=prometheus
|
||||
- SENTRY_DSN=${SENTRY_DSN}
|
||||
- SENTRY_ENVIRONMENT=development
|
||||
- C_FORCE_ROOT=1
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
- /app/.venv # Keep venv in container
|
||||
depends_on:
|
||||
- mongodb
|
||||
- redis
|
||||
command: ["celery", "-A", "app.tasks", "worker", "--loglevel=info", "--concurrency=1"]
|
||||
networks:
|
||||
- app-network
|
||||
|
||||
|
||||
# Frontend (for local development)
|
||||
frontend:
|
||||
build:
|
||||
context: ./frontend
|
||||
dockerfile: Dockerfile
|
||||
container_name: accessible-video-frontend
|
||||
restart: unless-stopped
|
||||
mongodb:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
- VITE_API_URL=http://localhost:8000
|
||||
- VITE_SENTRY_DSN=${VITE_SENTRY_DSN}
|
||||
- VITE_ENVIRONMENT=development
|
||||
ports:
|
||||
- "5173:5173"
|
||||
volumes:
|
||||
- ./frontend:/app
|
||||
- /app/node_modules # Keep node_modules in container
|
||||
depends_on:
|
||||
- api
|
||||
networks:
|
||||
- app-network
|
||||
# App configuration
|
||||
APP_ENV: ${APP_ENV:-dev}
|
||||
|
||||
volumes:
|
||||
mongodb_data:
|
||||
redis_data:
|
||||
# Database
|
||||
MONGODB_URI: mongodb://mongodb:27017/${MONGODB_DB:-accessible_video}
|
||||
MONGODB_DB: ${MONGODB_DB:-accessible_video}
|
||||
|
||||
# Redis
|
||||
REDIS_URL: redis://redis:6379/0
|
||||
CELERY_BROKER_URL: redis://redis:6379/0
|
||||
CELERY_RESULT_BACKEND: redis://redis:6379/0
|
||||
|
||||
# GCP
|
||||
GCP_PROJECT_ID: ${GCP_PROJECT_ID}
|
||||
GCS_BUCKET: ${GCS_BUCKET:-accessible-video}
|
||||
GOOGLE_APPLICATION_CREDENTIALS: /secrets/gcp-credentials.json
|
||||
|
||||
# AI Services
|
||||
GEMINI_API_KEY: ${GEMINI_API_KEY}
|
||||
TRANSLATE_API_KEY: ${TRANSLATE_API_KEY:-}
|
||||
ELEVENLABS_API_KEY: ${ELEVENLABS_API_KEY:-}
|
||||
|
||||
# Email
|
||||
SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
|
||||
EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions}
|
||||
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility}
|
||||
|
||||
# Observability
|
||||
SENTRY_DSN: ${SENTRY_DSN:-}
|
||||
volumes:
|
||||
- ./secrets:/secrets:ro
|
||||
- worker-logs:/app/logs
|
||||
networks:
|
||||
app-network:
|
||||
- accessible-video-network
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# =============================================================================
|
||||
# Networks
|
||||
# =============================================================================
|
||||
networks:
|
||||
accessible-video-network:
|
||||
driver: bridge
|
||||
name: accessible-video-network
|
||||
|
||||
# =============================================================================
|
||||
# Volumes
|
||||
# =============================================================================
|
||||
volumes:
|
||||
mongodb-data:
|
||||
name: accessible-video-mongodb-data
|
||||
mongodb-config:
|
||||
name: accessible-video-mongodb-config
|
||||
redis-data:
|
||||
name: accessible-video-redis-data
|
||||
api-logs:
|
||||
name: accessible-video-api-logs
|
||||
worker-logs:
|
||||
name: accessible-video-worker-logs
|
||||
|
|
|
|||
132
docker-compose.yml.old
Normal file
132
docker-compose.yml.old
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
# MongoDB with Replica Set
|
||||
mongodb:
|
||||
image: mongo:7.0
|
||||
container_name: accessible-video-mongo
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
MONGO_INITDB_ROOT_USERNAME: admin
|
||||
MONGO_INITDB_ROOT_PASSWORD: password123
|
||||
MONGO_INITDB_DATABASE: accessible_video
|
||||
ports:
|
||||
- "27017:27017"
|
||||
volumes:
|
||||
- mongodb_data:/data/db
|
||||
- ./mongo-init.js:/docker-entrypoint-initdb.d/init.js:ro
|
||||
- ./mongo-keyfile:/data/keyfile:ro
|
||||
command: ["mongod", "--replSet", "rs0", "--bind_ip_all", "--keyFile", "/data/keyfile"]
|
||||
networks:
|
||||
- app-network
|
||||
|
||||
# Redis
|
||||
redis:
|
||||
image: redis:7.2-alpine
|
||||
container_name: accessible-video-redis
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "6379:6379"
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
networks:
|
||||
- app-network
|
||||
|
||||
# Backend API
|
||||
api:
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
target: development
|
||||
container_name: accessible-video-api
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- APP_ENV=dev
|
||||
- MONGODB_URL=mongodb://admin:password123@mongodb:27017/accessible_video?authSource=admin&replicaSet=rs0
|
||||
- REDIS_URL=redis://redis:6379/0
|
||||
- JWT_SECRET_KEY=dev-secret-key-change-in-production
|
||||
- JWT_REFRESH_SECRET_KEY=dev-refresh-secret-key-change-in-production
|
||||
- GEMINI_API_KEY=${GEMINI_API_KEY}
|
||||
- SENDGRID_API_KEY=${SENDGRID_API_KEY}
|
||||
- ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
|
||||
- GCS_BUCKET_NAME=accessible-video-dev
|
||||
- GOOGLE_CLOUD_PROJECT=${GOOGLE_CLOUD_PROJECT}
|
||||
- OTEL_SERVICE_NAME=accessible-video-api-dev
|
||||
- OTEL_TRACES_EXPORTER=console
|
||||
- OTEL_METRICS_EXPORTER=prometheus
|
||||
- SENTRY_DSN=${SENTRY_DSN}
|
||||
- SENTRY_ENVIRONMENT=development
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
- /app/.venv # Keep venv in container
|
||||
depends_on:
|
||||
- mongodb
|
||||
- redis
|
||||
networks:
|
||||
- app-network
|
||||
|
||||
# Celery Worker
|
||||
worker:
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
target: development
|
||||
container_name: accessible-video-worker
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- APP_ENV=dev
|
||||
- MONGODB_URL=mongodb://admin:password123@mongodb:27017/accessible_video?authSource=admin&replicaSet=rs0
|
||||
- REDIS_URL=redis://redis:6379/0
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/0
|
||||
- GEMINI_API_KEY=${GEMINI_API_KEY}
|
||||
- ELEVENLABS_API_KEY=${ELEVENLABS_API_KEY}
|
||||
- GCS_BUCKET_NAME=accessible-video-dev
|
||||
- GOOGLE_CLOUD_PROJECT=${GOOGLE_CLOUD_PROJECT}
|
||||
- OTEL_SERVICE_NAME=accessible-video-worker-dev
|
||||
- OTEL_TRACES_EXPORTER=console
|
||||
- OTEL_METRICS_EXPORTER=prometheus
|
||||
- SENTRY_DSN=${SENTRY_DSN}
|
||||
- SENTRY_ENVIRONMENT=development
|
||||
- C_FORCE_ROOT=1
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
- /app/.venv # Keep venv in container
|
||||
depends_on:
|
||||
- mongodb
|
||||
- redis
|
||||
command: ["celery", "-A", "app.tasks", "worker", "--loglevel=info", "--concurrency=1"]
|
||||
networks:
|
||||
- app-network
|
||||
|
||||
|
||||
# Frontend (for local development)
|
||||
frontend:
|
||||
build:
|
||||
context: ./frontend
|
||||
dockerfile: Dockerfile
|
||||
container_name: accessible-video-frontend
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- VITE_API_URL=http://localhost:8000
|
||||
- VITE_SENTRY_DSN=${VITE_SENTRY_DSN}
|
||||
- VITE_ENVIRONMENT=development
|
||||
ports:
|
||||
- "5173:5173"
|
||||
volumes:
|
||||
- ./frontend:/app
|
||||
- /app/node_modules # Keep node_modules in container
|
||||
depends_on:
|
||||
- api
|
||||
networks:
|
||||
- app-network
|
||||
|
||||
volumes:
|
||||
mongodb_data:
|
||||
redis_data:
|
||||
|
||||
networks:
|
||||
app-network:
|
||||
driver: bridge
|
||||
100
docs/prompt_closed_captions.md
Normal file
100
docs/prompt_closed_captions.md
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
This is a comprehensive AI prompt created by converting the DCMP closed captioning guidelines into a set of actionable instructions.
|
||||
|
||||
This prompt is designed to be given to an AI model along with a raw transcript of a video. It instructs the AI on how to format the text, add non-speech elements, and adhere to accessibility best practices.
|
||||
|
||||
These rules significantly enhance the quality and accessibility of the captions by focusing on grammatical integrity, speaker context, and emotional tone.
|
||||
|
||||
---
|
||||
# AI Prompt for Generating and Verifying Accessible Closed Captions (Broadcast Standard)
|
||||
|
||||
**Your Role:** You are an expert, end-to-end AI Closed Captioning Engine. Your function is to analyze, create, and quality-control professional, accessible WEBVTT caption files to a broadcast-ready standard.
|
||||
|
||||
**Primary Goal:** To autonomously produce a single, production-ready, and error-free WEBVTT file that is perfectly synchronized with the provided video. The final output must be so accurate and well-formatted that it requires no human intervention.
|
||||
|
||||
---
|
||||
|
||||
## Your Workflow: A Three-Step Process
|
||||
|
||||
You must execute the following three steps internally for every task:
|
||||
|
||||
### Step 1: Comprehensive Analysis
|
||||
* First, thoroughly analyze the video's audio and visual content.
|
||||
* Identify all spoken dialogue, distinguish between different speakers, and note their tone, dialect, and any regional accents.
|
||||
* Listen for and identify all non-speech audio cues essential for a deaf or hard-of-hearing viewer, including music, sound effects, and significant silences.
|
||||
|
||||
### Step 2: Creation & Synchronization
|
||||
* Based on your analysis, generate the caption text according to the **Core Captioning Instructions & Rules** listed below.
|
||||
* Meticulously synchronize each caption cue with the audio timeline. Timestamps must be precise, marking the exact start and end of each audio event.
|
||||
|
||||
### Step 3: Final Quality Control (QC) Verification
|
||||
* **Before finalizing your output, you must perform a rigorous self-check.** Review your generated WEBVTT file against the following critical QC checklist. If any point fails, you must correct it before presenting the final result.
|
||||
|
||||
* **QC Checklist:**
|
||||
* **Format:** Is the file in valid WEBVTT format? Is the `WEBVTT` header present? Are timestamps in the exact `HH:MM:SS.mmm --> HH:MM:SS.mmm` format? Are blank lines correctly separating each cue?
|
||||
* **Synchronization:** Do captions appear and disappear in perfect sync with the audio?
|
||||
* **Spelling & Capitalization:** Is all spelling correct according to **Merriam-Webster Online**? Is capitalization used consistently and only for screaming (not emphasis)?
|
||||
* **Speaker IDs:** Is the speaker ID (`NARRATOR:`) used only on the *first* caption of a continuous block of speech and correctly re-introduced after any interruption?
|
||||
* **Language & Dialect:** Are foreign words captioned verbatim (not translated)? Are accents and dialects preserved correctly?
|
||||
* **Music & Lyrics:** Are music descriptions objective? Is the `♪...♪` and `♪...♪♪` format used correctly for lyrics?
|
||||
* **Completeness:** Have all meaningful audio cues been captured?
|
||||
|
||||
---
|
||||
|
||||
## Core Captioning Instructions & Rules (For Step 2)
|
||||
|
||||
### 1. Output Format
|
||||
* The output must be a single, complete **WEBVTT (.vtt) file**.
|
||||
* The file must start with the header `WEBVTT` on the first line, followed by a blank line.
|
||||
* Each caption cue consists of a timestamp line followed by the caption text, separated by a blank line.
|
||||
* **Do not** include any sequential numbers (e.g., `1`, `2`) in the output.
|
||||
|
||||
### 2. Spelling & Capitalization
|
||||
* **Primary Source:** Use **Merriam-Webster Online** for all spelling and capitalization.
|
||||
* **Consistency:** Ensure consistent spelling of all words and names throughout the file.
|
||||
* **Emphasis:** Do not use all caps for emphasis. Reserve ALL CAPS for indicating **screaming or shouting**.
|
||||
|
||||
### 3. Language, Dialect, and Accents
|
||||
* **Foreign Language:** Caption foreign words verbatim using correct accent marks and diacriticals (e.g., résumé, piñata). If the words are unintelligible, use a description (e.g., `[speaking French]`). **Never translate foreign speech into English.**
|
||||
* **Dialect:** Keep the flavor of the speaker's language (e.g., caption "gonna," "ain't," etc., as spoken).
|
||||
* **Accents:** If a speaker has a distinct regional accent, indicate it at the beginning of their first caption (e.g., `[with a Southern accent] My goodness.`).
|
||||
|
||||
### 4. Speaker Identification
|
||||
* **Format:** Identify speakers with a label in **ALL CAPS**, followed by a colon (e.g., `NARRATOR:`).
|
||||
* **Redundancy:** For a continuous block of speech from the same speaker, **only use the speaker ID on the first caption of that block.** Do not repeat the ID for subsequent captions by that same person. If another sound or speaker interrupts, re-introduce the ID when they resume.
|
||||
|
||||
### 5. Sound Effects, Music, and Lyrics
|
||||
* **Sound Effects:** Describe meaningful sounds in `[lowercase letters]`.
|
||||
* **Music Mood:** Use **objective** descriptions for music (e.g., "tense," "somber," "upbeat"). Avoid subjective words like "beautiful" or "delightful."
|
||||
* **Lyrics:**
|
||||
* Caption lyrics verbatim.
|
||||
* Use one music icon at the **beginning and end** of each caption line within a song (e.g., `♪ I can see clearly now ♪`).
|
||||
* Use two music icons at the end of the **last line** of a song (e.g., `♪ the rain is gone ♪♪`).
|
||||
* **Background Music:** For non-essential background music, place a single music icon (♪) in the upper right corner using VTT positioning (`line:0 position:90% align:end`).
|
||||
|
||||
---
|
||||
|
||||
## Example Scenario
|
||||
|
||||
**Input:** A video clip where a character named Maria speaks continuously.
|
||||
|
||||
**Correct WEBVTT Output:**
|
||||
|
||||
```vtt
|
||||
WEBVTT
|
||||
|
||||
00:00:21.500 --> 00:00:24.000
|
||||
MARIA: This is the first part
|
||||
of my statement.
|
||||
|
||||
00:00:24.500 --> 00:06.100
|
||||
I will continue speaking now
|
||||
without being interrupted.
|
||||
|
||||
00:00:26.500 --> 00:27.300
|
||||
[phone rings]
|
||||
|
||||
00:00:28.100 --> 00:30.250
|
||||
MARIA: As I was saying,
|
||||
it's important to be clear.
|
||||
|
||||
Now, apply this entire three-step analysis, creation, and verification process to the provided video. The final output must be a single, verified WEBVTT file.
|
||||
792
docs/video_accessibility_spec.md
Normal file
792
docs/video_accessibility_spec.md
Normal file
|
|
@ -0,0 +1,792 @@
|
|||
# Video Accessibility Processing Platform - Software Specification
|
||||
|
||||
## 1. Executive Summary
|
||||
|
||||
The Video Accessibility Processing Platform is a comprehensive web application designed to automatically generate closed captions and audio descriptions for video content using artificial intelligence. The platform provides a complete workflow from video upload through AI processing, human quality control, multi-language translation, and final content delivery.
|
||||
|
||||
**Core Capabilities:**
|
||||
- Automated generation of closed captions and audio descriptions using Google Gemini 2.5 Pro
|
||||
- Multi-language translation and transcreation services
|
||||
- Professional quality control workflow for reviewers
|
||||
- Text-to-speech generation for audio descriptions
|
||||
- Role-based access control for clients, reviewers, and administrators
|
||||
- Real-time job status updates via WebSocket connections
|
||||
- Secure file storage and signed URL download system
|
||||
|
||||
**Target Users:**
|
||||
- **Clients**: Organizations needing video accessibility services
|
||||
- **Reviewers**: Professional accessibility specialists who review and approve content
|
||||
- **Administrators**: System administrators managing users and system operations
|
||||
|
||||
## 2. System Architecture
|
||||
|
||||
### 2.1 Technology Stack
|
||||
|
||||
**Frontend:**
|
||||
- React 18 with TypeScript
|
||||
- Vite for build tooling
|
||||
- TanStack Query for state management
|
||||
- React Router for navigation
|
||||
- Tailwind CSS for styling
|
||||
|
||||
**Backend:**
|
||||
- FastAPI (Python 3.11+) for REST API
|
||||
- Celery with Redis for background task processing
|
||||
- MongoDB Atlas for data storage
|
||||
- JWT authentication with HttpOnly refresh cookies
|
||||
|
||||
**External Services:**
|
||||
- Google Cloud Storage for file storage
|
||||
- Google Gemini 2.5 Pro for AI processing
|
||||
- Google Cloud Translate for language translation
|
||||
- ElevenLabs for text-to-speech synthesis
|
||||
|
||||
**Infrastructure:**
|
||||
- Docker containerization
|
||||
- Redis for caching and task queues
|
||||
- WebSocket support for real-time updates
|
||||
|
||||
### 2.2 System Components
|
||||
|
||||
```
|
||||
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
|
||||
│ React SPA │ │ FastAPI │ │ Celery │
|
||||
│ Frontend │◄──►│ Backend │◄──►│ Workers │
|
||||
└─────────────────┘ └─────────────────┘ └─────────────────┘
|
||||
│ │
|
||||
▼ ▼
|
||||
┌─────────────────┐ ┌─────────────────┐
|
||||
│ MongoDB │ │ Redis │
|
||||
│ Database │ │ Queue/Cache │
|
||||
└─────────────────┘ └─────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Google Cloud │
|
||||
│ Storage │
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
## 3. User Roles and Access Control
|
||||
|
||||
### 3.1 Role Definitions
|
||||
|
||||
**Client Role:**
|
||||
- Upload videos and create processing jobs
|
||||
- View own job status and progress
|
||||
- Download completed accessibility assets
|
||||
- Limited to own content only
|
||||
|
||||
**Reviewer Role:**
|
||||
- Access quality control dashboard
|
||||
- Review AI-generated content for accuracy
|
||||
- Edit VTT files (captions and audio descriptions)
|
||||
- Approve or reject English content
|
||||
- Perform final review of completed jobs
|
||||
- Access to all jobs in system
|
||||
|
||||
**Admin Role:**
|
||||
- Full system access including all reviewer capabilities
|
||||
- User management (create, edit, deactivate users)
|
||||
- System monitoring and health checks
|
||||
- Bulk operations and maintenance tasks
|
||||
- Access to audit logs and system statistics
|
||||
|
||||
### 3.2 Authentication System
|
||||
|
||||
**JWT Token Management:**
|
||||
- Access tokens stored in memory (15-minute expiry)
|
||||
- Refresh tokens stored in HttpOnly cookies (7-day expiry)
|
||||
- Automatic token refresh for active sessions
|
||||
- Secure logout with cookie clearing
|
||||
|
||||
**Security Features:**
|
||||
- Password hashing using bcrypt
|
||||
- CORS protection with configurable origins
|
||||
- Rate limiting on authentication endpoints
|
||||
- Session-based security with proper token rotation
|
||||
|
||||
## 4. Job Processing Workflow
|
||||
|
||||
### 4.1 Job Status State Machine
|
||||
|
||||
The system implements a comprehensive state machine for tracking job progress:
|
||||
|
||||
```
|
||||
created → ingesting → ai_processing → pending_qc → approved_english → translating → tts_generating → pending_final_review → completed
|
||||
↓
|
||||
rejected → (manual intervention required)
|
||||
↓
|
||||
qc_feedback → (back to pending_qc after fixes)
|
||||
```
|
||||
|
||||
**Status Definitions:**
|
||||
|
||||
- **created**: Job record created, video uploaded to storage
|
||||
- **ingesting**: Video being processed for metadata extraction
|
||||
- **ai_processing**: AI analyzing video content and generating captions/audio descriptions
|
||||
- **pending_qc**: Awaiting human quality control review
|
||||
- **approved_english**: English content approved, ready for translation
|
||||
- **rejected**: Content rejected, requires client revision
|
||||
- **qc_feedback**: Reviewer provided feedback, awaiting fixes
|
||||
- **translating**: Processing multi-language translations
|
||||
- **tts_generating**: Generating audio files from text descriptions
|
||||
- **pending_final_review**: All content ready, awaiting final approval
|
||||
- **completed**: Job finished, all assets available for download
|
||||
|
||||
### 4.2 Processing Pipeline
|
||||
|
||||
**Phase 1: Upload and Ingestion**
|
||||
1. Client uploads MP4 video file through web interface
|
||||
2. File stored in Google Cloud Storage with unique job ID path
|
||||
3. Job record created in MongoDB with metadata
|
||||
4. Background Celery task queued for processing
|
||||
|
||||
**Phase 2: AI Content Generation**
|
||||
1. Video file sent to Google Gemini 2.5 Pro API
|
||||
2. AI generates:
|
||||
- Plain text transcript
|
||||
- Closed captions in WebVTT format
|
||||
- Audio description script in WebVTT format
|
||||
- Confidence score for generated content
|
||||
3. Generated content stored in GCS and linked to job
|
||||
4. Job status updated to `pending_qc`
|
||||
|
||||
**Phase 3: Quality Control Review**
|
||||
1. Reviewer accesses job through QC dashboard
|
||||
2. Side-by-side video player with generated captions/audio descriptions
|
||||
3. Inline VTT editor for making corrections
|
||||
4. Timing adjustment tools for synchronization
|
||||
5. Approve or reject with reviewer notes
|
||||
6. If approved, job moves to translation phase
|
||||
|
||||
**Phase 4: Translation and Localization**
|
||||
1. Automatic translation of approved English content
|
||||
2. Support for standard translation and cultural transcreation
|
||||
3. Available target languages: Spanish, French, German (expandable)
|
||||
4. Translated VTT files stored per language
|
||||
|
||||
**Phase 5: Audio Generation**
|
||||
1. Text-to-speech synthesis using ElevenLabs API
|
||||
2. MP3 files generated for each audio description track
|
||||
3. Language-specific voice selection
|
||||
4. Audio files stored alongside VTT content
|
||||
|
||||
**Phase 6: Final Review and Delivery**
|
||||
1. Final review by authorized reviewer
|
||||
2. Asset validation to ensure all requested outputs present
|
||||
3. Client notification of job completion
|
||||
4. Signed URL generation for secure downloads
|
||||
|
||||
## 5. User Interface and Experience
|
||||
|
||||
### 5.1 Client Workflow
|
||||
|
||||
**Dashboard:**
|
||||
- Overview of all jobs with status indicators
|
||||
- Quick actions for creating new jobs
|
||||
- Real-time status updates via WebSocket
|
||||
- Notification system for job completion
|
||||
|
||||
**Job Creation Process:**
|
||||
1. **Video Upload**: Drag-and-drop interface with progress tracking
|
||||
2. **Job Configuration**:
|
||||
- Descriptive title
|
||||
- Source language selection
|
||||
- Output format selection (captions VTT, audio description VTT, audio MP3)
|
||||
- Target languages for translation
|
||||
3. **Processing Initiation**: Automatic background processing begins
|
||||
4. **Confirmation**: Success page with job tracking link
|
||||
|
||||
**Job Monitoring:**
|
||||
- Detailed status view with progress indicators
|
||||
- Processing history timeline
|
||||
- Real-time updates without page refresh
|
||||
- Error notifications with context
|
||||
|
||||
**Content Download:**
|
||||
- Secure download links for completed assets
|
||||
- Organized by language (en/, es/, fr/, de/)
|
||||
- File format options (VTT, MP3)
|
||||
- Source video access
|
||||
|
||||
### 5.2 Reviewer Workflow
|
||||
|
||||
**Quality Control Dashboard:**
|
||||
- Queue view of jobs pending review
|
||||
- Priority sorting by creation date
|
||||
- Job metadata preview
|
||||
- Quick status filtering
|
||||
|
||||
**Review Interface:**
|
||||
- **Video Player**: HTML5 player with custom controls
|
||||
- **VTT Editor**: Syntax-highlighted editor with validation
|
||||
- **Side-by-Side View**: Simultaneous video and text editing
|
||||
- **Timing Tools**: Bulk timing adjustment with offset controls
|
||||
- **Review Controls**: Approve/reject with mandatory notes
|
||||
|
||||
**Advanced Features:**
|
||||
- Keyboard shortcuts for efficient workflow (A=Approve, R=Reject, S=Save)
|
||||
- View mode switching (side-by-side, video-only, editor-only)
|
||||
- Real-time VTT validation and error highlighting
|
||||
- Unsaved changes warnings
|
||||
|
||||
**Final Review Process:**
|
||||
- Asset validation before completion
|
||||
- Final quality checks
|
||||
- Client notification triggering
|
||||
- Completion workflow
|
||||
|
||||
### 5.3 Administrator Interface
|
||||
|
||||
**User Management:**
|
||||
- Create users with role assignment
|
||||
- Password reset functionality
|
||||
- User activation/deactivation
|
||||
- Role-based permission enforcement
|
||||
|
||||
**System Monitoring:**
|
||||
- Health check dashboard with component status
|
||||
- Job processing statistics and metrics
|
||||
- Queue monitoring for background tasks
|
||||
- Performance analytics
|
||||
|
||||
**Audit and Security:**
|
||||
- Comprehensive audit logging
|
||||
- Security event monitoring
|
||||
- User activity tracking
|
||||
- System maintenance tools
|
||||
|
||||
## 6. Data Models and Storage
|
||||
|
||||
### 6.1 Job Data Structure
|
||||
|
||||
```typescript
|
||||
interface Job {
|
||||
id: string; // Unique job identifier
|
||||
client_id: string; // Owner client ID
|
||||
title: string; // Human-readable job name
|
||||
status: JobStatus; // Current processing status
|
||||
|
||||
source: {
|
||||
filename: string; // Storage path
|
||||
original_filename: string; // User's original filename
|
||||
gcs_uri: string; // Google Cloud Storage URI
|
||||
duration_s: number; // Video duration in seconds
|
||||
language: string; // Source language code
|
||||
};
|
||||
|
||||
requested_outputs: {
|
||||
captions_vtt: boolean; // Closed captions requested
|
||||
audio_description_vtt: boolean; // Audio description script requested
|
||||
audio_description_mp3: boolean; // Audio voiceover requested
|
||||
languages: string[]; // Target languages
|
||||
transcreation: string[]; // Languages requiring cultural adaptation
|
||||
};
|
||||
|
||||
outputs: {
|
||||
[language: string]: {
|
||||
captions_vtt_gcs?: string; // VTT file location
|
||||
ad_vtt_gcs?: string; // Audio description VTT location
|
||||
ad_mp3_gcs?: string; // Audio MP3 file location
|
||||
origin: "translate" | "transcreate"; // Processing method
|
||||
qa_notes?: string; // Quality assurance notes
|
||||
};
|
||||
};
|
||||
|
||||
ai: {
|
||||
ingestion_json: object; // Full AI response data
|
||||
confidence: number; // AI confidence score (0-1)
|
||||
};
|
||||
|
||||
review: {
|
||||
notes: string; // Current reviewer notes
|
||||
reviewer_id?: string; // Last reviewer ID
|
||||
history: ReviewHistoryItem[]; // Complete review history
|
||||
};
|
||||
|
||||
created_at: Date;
|
||||
updated_at: Date;
|
||||
error?: ErrorInfo; // Processing error details
|
||||
}
|
||||
```
|
||||
|
||||
### 6.2 User Data Structure
|
||||
|
||||
```typescript
|
||||
interface User {
|
||||
id: string;
|
||||
email: string; // Unique login identifier
|
||||
hashed_password: string; // Bcrypt hashed password
|
||||
full_name: string; // Display name
|
||||
role: "client" | "reviewer" | "admin";
|
||||
is_active: boolean; // Account status
|
||||
created_at: Date;
|
||||
updated_at: Date;
|
||||
}
|
||||
```
|
||||
|
||||
### 6.3 File Storage Organization
|
||||
|
||||
**Google Cloud Storage Bucket Structure:**
|
||||
```
|
||||
gs://accessible-video/
|
||||
├── {jobId}/
|
||||
│ ├── source.mp4 # Original video
|
||||
│ ├── en/
|
||||
│ │ ├── captions.vtt # English captions
|
||||
│ │ ├── ad.vtt # English audio description
|
||||
│ │ └── ad.mp3 # English audio file
|
||||
│ ├── es/
|
||||
│ │ ├── captions.vtt # Spanish captions
|
||||
│ │ ├── ad.vtt # Spanish audio description
|
||||
│ │ └── ad.mp3 # Spanish audio file
|
||||
│ └── [other languages]/
|
||||
└── health_check_dummy # System health verification
|
||||
```
|
||||
|
||||
**Security Features:**
|
||||
- Signed URLs with 24-hour expiration
|
||||
- Role-based access control
|
||||
- Automatic cleanup on job deletion
|
||||
- Secure upload with content-type validation
|
||||
|
||||
## 7. API Design
|
||||
|
||||
### 7.1 Authentication Endpoints
|
||||
|
||||
```
|
||||
POST /api/v1/auth/login
|
||||
POST /api/v1/auth/refresh
|
||||
POST /api/v1/auth/logout
|
||||
```
|
||||
|
||||
### 7.2 Job Management Endpoints
|
||||
|
||||
```
|
||||
POST /api/v1/jobs # Create new job
|
||||
GET /api/v1/jobs # List jobs (filtered by role)
|
||||
GET /api/v1/jobs/{id} # Get job details
|
||||
DELETE /api/v1/jobs/{id} # Delete job
|
||||
DELETE /api/v1/jobs/bulk # Bulk delete (admin only)
|
||||
|
||||
# Job Actions
|
||||
POST /api/v1/jobs/{id}/actions/approve_english
|
||||
POST /api/v1/jobs/{id}/actions/reject
|
||||
POST /api/v1/jobs/{id}/actions/complete
|
||||
POST /api/v1/jobs/{id}/actions/reject_final
|
||||
|
||||
# Content Management
|
||||
GET /api/v1/jobs/{id}/vtt # Get VTT content
|
||||
PATCH /api/v1/jobs/{id}/vtt # Update VTT content
|
||||
POST /api/v1/jobs/{id}/vtt/adjust-timing # Adjust timing
|
||||
GET /api/v1/jobs/{id}/downloads # Get download URLs
|
||||
GET /api/v1/jobs/{id}/validate # Validate assets
|
||||
```
|
||||
|
||||
### 7.3 Administrative Endpoints
|
||||
|
||||
```
|
||||
# User Management
|
||||
GET /api/v1/admin/users
|
||||
POST /api/v1/admin/users
|
||||
GET /api/v1/admin/users/{id}
|
||||
PATCH /api/v1/admin/users/{id}
|
||||
DELETE /api/v1/admin/users/{id}
|
||||
|
||||
# System Monitoring
|
||||
GET /api/v1/admin/stats
|
||||
GET /api/v1/admin/health/detailed
|
||||
GET /api/v1/admin/jobs/stats
|
||||
GET /api/v1/admin/audit-logs
|
||||
```
|
||||
|
||||
### 7.4 File Management
|
||||
|
||||
```
|
||||
GET /api/v1/files/signed-url/{path} # Generate signed download URL
|
||||
POST /api/v1/files/upload # Direct file upload endpoint
|
||||
```
|
||||
|
||||
### 7.5 Real-time Updates
|
||||
|
||||
**WebSocket Endpoints:**
|
||||
- `/ws/jobs` - General job status updates
|
||||
- `/ws/jobs/{job_id}` - Job-specific status updates
|
||||
|
||||
**WebSocket Message Format:**
|
||||
```json
|
||||
{
|
||||
"job_id": "string",
|
||||
"status": "string",
|
||||
"updated_at": "ISO8601",
|
||||
"job_title": "string",
|
||||
"message": "string",
|
||||
"progress": "number"
|
||||
}
|
||||
```
|
||||
|
||||
## 8. AI Services Integration
|
||||
|
||||
### 8.1 Google Gemini 2.5 Pro Integration
|
||||
|
||||
**Content Generation Capabilities:**
|
||||
- Video content analysis and understanding
|
||||
- Automatic transcript generation
|
||||
- Closed caption creation with proper timing
|
||||
- Audio description generation for visual elements
|
||||
- Content confidence scoring
|
||||
|
||||
**Processing Flow:**
|
||||
1. Video upload to Gemini Files API
|
||||
2. Content generation using multimodal prompt
|
||||
3. Structured JSON response parsing
|
||||
4. Error handling and self-healing for invalid responses
|
||||
5. Automatic file cleanup after processing
|
||||
|
||||
**Quality Assurance:**
|
||||
- VTT format validation
|
||||
- Timestamp accuracy verification
|
||||
- Content completeness checks
|
||||
- Fallback content generation for missing elements
|
||||
|
||||
### 8.2 Translation Services
|
||||
|
||||
**Google Cloud Translate:**
|
||||
- High-quality machine translation for standard content
|
||||
- Support for multiple target languages
|
||||
- VTT format preservation during translation
|
||||
- Batch processing for efficiency
|
||||
|
||||
**Transcreation via Gemini:**
|
||||
- Cultural adaptation for marketing content
|
||||
- Context-aware translation with brand guidelines
|
||||
- Maintained timing synchronization
|
||||
- Creative adaptation while preserving meaning
|
||||
|
||||
### 8.3 Text-to-Speech Integration
|
||||
|
||||
**ElevenLabs TTS Service:**
|
||||
- High-quality voice synthesis
|
||||
- Language-specific voice selection
|
||||
- MP3 output format
|
||||
- Proper pronunciation for accessibility terms
|
||||
|
||||
**Audio Processing:**
|
||||
- Per-cue synthesis for precise timing
|
||||
- Audio quality optimization
|
||||
- File format standardization
|
||||
- Integration with VTT timing
|
||||
|
||||
## 9. Quality Control Features
|
||||
|
||||
### 9.1 Review Workflow
|
||||
|
||||
**Content Review Process:**
|
||||
1. **Initial Review**: AI-generated content assessment
|
||||
2. **Content Editing**: Direct VTT file modification
|
||||
3. **Synchronization Check**: Video timing validation
|
||||
4. **Quality Verification**: Accessibility standards compliance
|
||||
5. **Final Approval**: Content ready for translation
|
||||
|
||||
**Review Tools:**
|
||||
- Integrated video player with caption overlay
|
||||
- Syntax-highlighted VTT editor
|
||||
- Real-time content validation
|
||||
- Timing adjustment utilities
|
||||
- Review history tracking
|
||||
|
||||
### 9.2 Quality Metrics
|
||||
|
||||
**AI Confidence Scoring:**
|
||||
- Content generation confidence (0-100%)
|
||||
- Quality indicators for reviewer guidance
|
||||
- Threshold-based workflow routing
|
||||
|
||||
**Review Analytics:**
|
||||
- Processing time tracking
|
||||
- Reviewer performance metrics
|
||||
- Quality score trending
|
||||
- Error rate monitoring
|
||||
|
||||
## 10. Security and Compliance
|
||||
|
||||
### 10.1 Data Security
|
||||
|
||||
**Authentication Security:**
|
||||
- JWT token-based authentication
|
||||
- HttpOnly cookie refresh tokens
|
||||
- Automatic token rotation
|
||||
- Secure password hashing (bcrypt)
|
||||
|
||||
**File Security:**
|
||||
- Signed URL access control
|
||||
- Time-limited download permissions
|
||||
- Secure file upload validation
|
||||
- Automatic cleanup procedures
|
||||
|
||||
**API Security:**
|
||||
- CORS protection
|
||||
- Rate limiting
|
||||
- Input validation and sanitization
|
||||
- SQL injection prevention (NoSQL)
|
||||
|
||||
### 10.2 Privacy Protection
|
||||
|
||||
**Data Handling:**
|
||||
- Client data isolation
|
||||
- Role-based access enforcement
|
||||
- Audit trail maintenance
|
||||
- Secure data deletion
|
||||
|
||||
**Content Protection:**
|
||||
- Temporary file processing
|
||||
- Secure cloud storage
|
||||
- Access logging
|
||||
- Data retention policies
|
||||
|
||||
### 10.3 Audit and Compliance
|
||||
|
||||
**Audit Logging:**
|
||||
- User action tracking
|
||||
- System event logging
|
||||
- Security event monitoring
|
||||
- Performance metric collection
|
||||
|
||||
**Compliance Features:**
|
||||
- Data export capabilities
|
||||
- User consent management
|
||||
- Access control documentation
|
||||
- Security incident tracking
|
||||
|
||||
## 11. Performance and Scalability
|
||||
|
||||
### 11.1 System Performance
|
||||
|
||||
**Backend Performance:**
|
||||
- Async request handling with FastAPI
|
||||
- Background task processing via Celery
|
||||
- Database query optimization
|
||||
- Caching strategy with Redis
|
||||
|
||||
**Frontend Performance:**
|
||||
- React Query for data caching
|
||||
- Lazy loading of components
|
||||
- Optimized bundle splitting
|
||||
- Progressive web app features
|
||||
|
||||
### 11.2 Scalability Architecture
|
||||
|
||||
**Horizontal Scaling:**
|
||||
- Stateless API servers
|
||||
- Independent worker processes
|
||||
- Load balancing ready
|
||||
- Database connection pooling
|
||||
|
||||
**Resource Optimization:**
|
||||
- File compression and optimization
|
||||
- CDN integration ready
|
||||
- Memory-efficient processing
|
||||
- Garbage collection optimization
|
||||
|
||||
### 11.3 Monitoring and Observability
|
||||
|
||||
**Health Monitoring:**
|
||||
- Component health checks
|
||||
- Service dependency monitoring
|
||||
- Performance metric collection
|
||||
- Error rate tracking
|
||||
|
||||
**Logging and Debugging:**
|
||||
- Structured logging with correlation IDs
|
||||
- Error tracking and alerting
|
||||
- Performance profiling
|
||||
- Debug mode capabilities
|
||||
|
||||
## 12. Deployment and Infrastructure
|
||||
|
||||
### 12.1 Containerization
|
||||
|
||||
**Docker Configuration:**
|
||||
- Multi-stage builds for optimization
|
||||
- Health check integration
|
||||
- Environment-based configuration
|
||||
- Security-hardened images
|
||||
|
||||
### 12.2 Environment Configuration
|
||||
|
||||
**Development Environment:**
|
||||
- Local Docker Compose setup
|
||||
- Hot-reload development servers
|
||||
- Test database seeding
|
||||
- Mock external services
|
||||
|
||||
**Production Environment:**
|
||||
- Cloud-native deployment
|
||||
- SSL/TLS termination
|
||||
- Environment variable management
|
||||
- Secret management integration
|
||||
|
||||
### 12.3 Database Management
|
||||
|
||||
**MongoDB Configuration:**
|
||||
- Document schema validation
|
||||
- Index optimization
|
||||
- Replica set support
|
||||
- Backup and recovery procedures
|
||||
|
||||
**Migration System:**
|
||||
- Schema version tracking
|
||||
- Safe migration procedures
|
||||
- Rollback capabilities
|
||||
- Data integrity validation
|
||||
|
||||
## 13. Testing Strategy
|
||||
|
||||
### 13.1 Testing Levels
|
||||
|
||||
**Unit Testing:**
|
||||
- Service layer testing
|
||||
- Utility function testing
|
||||
- Component testing
|
||||
- Mock external dependencies
|
||||
|
||||
**Integration Testing:**
|
||||
- API endpoint testing
|
||||
- Database integration testing
|
||||
- File storage integration
|
||||
- Authentication flow testing
|
||||
|
||||
**End-to-End Testing:**
|
||||
- Complete user workflow testing
|
||||
- Cross-browser compatibility
|
||||
- Mobile responsiveness
|
||||
- Performance testing
|
||||
|
||||
### 13.2 Testing Tools
|
||||
|
||||
**Backend Testing:**
|
||||
- PyTest for unit and integration tests
|
||||
- Factory Boy for test data generation
|
||||
- Async test support
|
||||
- Mock external services
|
||||
|
||||
**Frontend Testing:**
|
||||
- Jest for unit testing
|
||||
- React Testing Library
|
||||
- Playwright for E2E testing
|
||||
- Visual regression testing
|
||||
|
||||
## 14. Error Handling and Recovery
|
||||
|
||||
### 14.1 Error Classification
|
||||
|
||||
**User Errors:**
|
||||
- Invalid file formats
|
||||
- Insufficient permissions
|
||||
- Validation failures
|
||||
- Authentication errors
|
||||
|
||||
**System Errors:**
|
||||
- External service failures
|
||||
- Database connection issues
|
||||
- File storage problems
|
||||
- Processing timeouts
|
||||
|
||||
**Recovery Strategies:**
|
||||
- Automatic retry mechanisms
|
||||
- Graceful degradation
|
||||
- User-friendly error messages
|
||||
- Administrative error resolution
|
||||
|
||||
### 14.2 Reliability Features
|
||||
|
||||
**Fault Tolerance:**
|
||||
- Circuit breaker patterns
|
||||
- Timeout configurations
|
||||
- Retry logic with exponential backoff
|
||||
- Fallback procedures
|
||||
|
||||
**Data Integrity:**
|
||||
- Transaction management
|
||||
- Consistent state handling
|
||||
- Backup and recovery
|
||||
- Data validation
|
||||
|
||||
## 15. Configuration and Customization
|
||||
|
||||
### 15.1 System Configuration
|
||||
|
||||
**Application Settings:**
|
||||
- Environment-specific configurations
|
||||
- Feature flag support
|
||||
- Service endpoint configuration
|
||||
- Security parameter tuning
|
||||
|
||||
**Processing Configuration:**
|
||||
- AI model parameters
|
||||
- Translation service options
|
||||
- File size limits
|
||||
- Processing timeouts
|
||||
|
||||
### 15.2 User Customization
|
||||
|
||||
**Client Settings:**
|
||||
- Language preferences
|
||||
- Notification preferences
|
||||
- Default job settings
|
||||
- Download preferences
|
||||
|
||||
**Reviewer Settings:**
|
||||
- Workflow preferences
|
||||
- Editor configurations
|
||||
- Keyboard shortcuts
|
||||
- Quality thresholds
|
||||
|
||||
## 16. Future Enhancements
|
||||
|
||||
### 16.1 Planned Features
|
||||
|
||||
**Enhanced AI Capabilities:**
|
||||
- Multi-modal content analysis
|
||||
- Improved accuracy metrics
|
||||
- Custom model training
|
||||
- Advanced quality scoring
|
||||
|
||||
**Extended Language Support:**
|
||||
- Additional target languages
|
||||
- Regional dialect support
|
||||
- Custom transcreation workflows
|
||||
- Cultural adaptation tools
|
||||
|
||||
**Advanced Workflow Features:**
|
||||
- Batch processing capabilities
|
||||
- Template-based job creation
|
||||
- Advanced approval workflows
|
||||
- Custom review stages
|
||||
|
||||
### 16.2 Integration Opportunities
|
||||
|
||||
**Third-Party Integrations:**
|
||||
- Content management systems
|
||||
- Video hosting platforms
|
||||
- Accessibility testing tools
|
||||
- Quality assurance services
|
||||
|
||||
**API Extensions:**
|
||||
- Webhook support for job events
|
||||
- Advanced reporting APIs
|
||||
- Bulk operation endpoints
|
||||
- Custom integration points
|
||||
|
||||
## 17. Conclusion
|
||||
|
||||
The Video Accessibility Processing Platform represents a comprehensive solution for automated video accessibility content generation. Built with modern web technologies and integrated with leading AI services, the platform provides an end-to-end workflow from video upload to final content delivery.
|
||||
|
||||
The system's architecture supports scalability, security, and reliability while maintaining a focus on user experience and content quality. The role-based access control ensures appropriate separation of concerns between content creators, quality reviewers, and system administrators.
|
||||
|
||||
With its robust API design, real-time updates, and comprehensive error handling, the platform serves as a professional-grade solution for organizations requiring high-quality accessibility content at scale.
|
||||
|
||||
---
|
||||
|
||||
*This specification document serves as the comprehensive technical and functional guide for the Video Accessibility Processing Platform, detailing all implemented features, workflows, and system capabilities as of the current release.*
|
||||
25
frontend/.env.production
Normal file
25
frontend/.env.production
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
# =============================================================================
|
||||
# Frontend Production Environment Variables
|
||||
# =============================================================================
|
||||
# These variables are embedded into the built JavaScript at build time
|
||||
# They are NOT secret - they will be visible in the browser
|
||||
# =============================================================================
|
||||
|
||||
# Backend API base URL (proxied through Apache)
|
||||
VITE_API_BASE_URL=https://ai-sandbox.oliver.solutions/video-accessibility-back
|
||||
|
||||
# Application environment
|
||||
VITE_APP_ENV=production
|
||||
|
||||
# Sentry DSN for error tracking (optional - leave empty to disable)
|
||||
VITE_SENTRY_DSN=
|
||||
|
||||
# =============================================================================
|
||||
# Important Notes:
|
||||
# =============================================================================
|
||||
# 1. All VITE_ prefixed variables are embedded in the build
|
||||
# 2. Never put secrets here - they will be visible in browser dev tools
|
||||
# 3. After changing these values, rebuild the frontend:
|
||||
# npm run build
|
||||
# 4. The API URL must match your Apache reverse proxy configuration
|
||||
# =============================================================================
|
||||
|
|
@ -5,6 +5,8 @@ import react from '@vitejs/plugin-react'
|
|||
// https://vite.dev/config/
|
||||
export default defineConfig({
|
||||
plugins: [react()],
|
||||
// Base path for production deployment in Apache subdirectory
|
||||
base: '/video-accessibility/',
|
||||
server: {
|
||||
proxy: {
|
||||
'/api': {
|
||||
|
|
|
|||
239
scripts/build-frontend.sh
Executable file
239
scripts/build-frontend.sh
Executable file
|
|
@ -0,0 +1,239 @@
|
|||
#!/bin/bash
|
||||
# =============================================================================
|
||||
# Frontend Build and Deploy Script
|
||||
# =============================================================================
|
||||
# Builds the React frontend and deploys to Apache document root
|
||||
# Run from: /opt/accessible-video/
|
||||
# Usage: ./scripts/build-frontend.sh
|
||||
# =============================================================================
|
||||
|
||||
set -e # Exit on any error
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Configuration
|
||||
PROJECT_DIR="/opt/accessible-video"
|
||||
FRONTEND_DIR="$PROJECT_DIR/frontend"
|
||||
DEPLOY_DIR="/var/www/html/video-accessibility"
|
||||
|
||||
# =============================================================================
|
||||
# Helper Functions
|
||||
# =============================================================================
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN} $1${NC}"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED} $1${NC}"
|
||||
}
|
||||
|
||||
print_info() {
|
||||
echo -e "${BLUE}9 $1${NC}"
|
||||
}
|
||||
|
||||
print_header() {
|
||||
echo -e "${BLUE}==============================================================================${NC}"
|
||||
echo -e "${BLUE}$1${NC}"
|
||||
echo -e "${BLUE}==============================================================================${NC}"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Pre-flight Checks
|
||||
# =============================================================================
|
||||
|
||||
preflight_checks() {
|
||||
print_header "Pre-flight Checks"
|
||||
|
||||
# Check if frontend directory exists
|
||||
if [ ! -d "$FRONTEND_DIR" ]; then
|
||||
print_error "Frontend directory not found at $FRONTEND_DIR"
|
||||
exit 1
|
||||
fi
|
||||
print_success "Frontend directory found"
|
||||
|
||||
# Check if package.json exists
|
||||
if [ ! -f "$FRONTEND_DIR/package.json" ]; then
|
||||
print_error "package.json not found in frontend directory"
|
||||
exit 1
|
||||
fi
|
||||
print_success "package.json found"
|
||||
|
||||
# Check if .env.production exists
|
||||
if [ ! -f "$FRONTEND_DIR/.env.production" ]; then
|
||||
print_error ".env.production not found in frontend directory"
|
||||
print_info "Creating .env.production from template..."
|
||||
cp "$FRONTEND_DIR/.env.example" "$FRONTEND_DIR/.env.production" || exit 1
|
||||
print_info "Please edit $FRONTEND_DIR/.env.production and run again"
|
||||
exit 1
|
||||
fi
|
||||
print_success ".env.production found"
|
||||
|
||||
# Check if Node.js is installed
|
||||
if ! command -v node &> /dev/null; then
|
||||
print_error "Node.js is not installed"
|
||||
exit 1
|
||||
fi
|
||||
print_success "Node.js $(node --version) is installed"
|
||||
|
||||
# Check if npm is installed
|
||||
if ! command -v npm &> /dev/null; then
|
||||
print_error "npm is not installed"
|
||||
exit 1
|
||||
fi
|
||||
print_success "npm $(npm --version) is installed"
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Build Frontend
|
||||
# =============================================================================
|
||||
|
||||
build_frontend() {
|
||||
print_header "Building Frontend"
|
||||
|
||||
cd "$FRONTEND_DIR"
|
||||
|
||||
# Install dependencies
|
||||
print_info "Installing dependencies..."
|
||||
npm ci --only=production
|
||||
print_success "Dependencies installed"
|
||||
|
||||
# Build the application
|
||||
print_info "Building React application (this may take a minute)..."
|
||||
npm run build
|
||||
print_success "Build completed"
|
||||
|
||||
# Check if dist directory was created
|
||||
if [ ! -d "dist" ]; then
|
||||
print_error "Build failed - dist directory not found"
|
||||
exit 1
|
||||
fi
|
||||
print_success "Build artifacts created in dist/"
|
||||
|
||||
# Display build size
|
||||
BUILD_SIZE=$(du -sh dist | cut -f1)
|
||||
print_info "Build size: $BUILD_SIZE"
|
||||
|
||||
cd "$PROJECT_DIR"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Deploy to Apache
|
||||
# =============================================================================
|
||||
|
||||
deploy_to_apache() {
|
||||
print_header "Deploying to Apache"
|
||||
|
||||
# Create deployment directory if it doesn't exist
|
||||
print_info "Creating deployment directory..."
|
||||
sudo mkdir -p "$DEPLOY_DIR"
|
||||
print_success "Deployment directory ready"
|
||||
|
||||
# Backup existing deployment (if any)
|
||||
if [ -d "$DEPLOY_DIR" ] && [ "$(ls -A $DEPLOY_DIR)" ]; then
|
||||
BACKUP_DIR="$DEPLOY_DIR.backup.$(date +%Y%m%d_%H%M%S)"
|
||||
print_info "Backing up existing deployment to $BACKUP_DIR"
|
||||
sudo cp -r "$DEPLOY_DIR" "$BACKUP_DIR"
|
||||
print_success "Backup created"
|
||||
fi
|
||||
|
||||
# Clear deployment directory
|
||||
print_info "Clearing deployment directory..."
|
||||
sudo rm -rf "$DEPLOY_DIR"/*
|
||||
print_success "Deployment directory cleared"
|
||||
|
||||
# Copy build artifacts
|
||||
print_info "Copying build artifacts..."
|
||||
sudo cp -r "$FRONTEND_DIR/dist"/* "$DEPLOY_DIR"/
|
||||
print_success "Build artifacts copied"
|
||||
|
||||
# Set proper ownership
|
||||
print_info "Setting file ownership to www-data..."
|
||||
sudo chown -R www-data:www-data "$DEPLOY_DIR"
|
||||
print_success "Ownership set"
|
||||
|
||||
# Set proper permissions
|
||||
print_info "Setting file permissions..."
|
||||
sudo find "$DEPLOY_DIR" -type d -exec chmod 755 {} \;
|
||||
sudo find "$DEPLOY_DIR" -type f -exec chmod 644 {} \;
|
||||
print_success "Permissions set"
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Verify Deployment
|
||||
# =============================================================================
|
||||
|
||||
verify_deployment() {
|
||||
print_header "Verifying Deployment"
|
||||
|
||||
# Check if index.html exists
|
||||
if [ ! -f "$DEPLOY_DIR/index.html" ]; then
|
||||
print_error "index.html not found in deployment directory!"
|
||||
exit 1
|
||||
fi
|
||||
print_success "index.html found"
|
||||
|
||||
# Check if assets directory exists
|
||||
if [ ! -d "$DEPLOY_DIR/assets" ]; then
|
||||
print_error "assets/ directory not found in deployment!"
|
||||
exit 1
|
||||
fi
|
||||
print_success "assets/ directory found"
|
||||
|
||||
# Count files in deployment
|
||||
FILE_COUNT=$(find "$DEPLOY_DIR" -type f | wc -l)
|
||||
print_info "Total files deployed: $FILE_COUNT"
|
||||
|
||||
# Display deployment size
|
||||
DEPLOY_SIZE=$(sudo du -sh "$DEPLOY_DIR" | cut -f1)
|
||||
print_info "Deployment size: $DEPLOY_SIZE"
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Display Summary
|
||||
# =============================================================================
|
||||
|
||||
display_summary() {
|
||||
print_header "Deployment Summary"
|
||||
|
||||
echo -e "${GREEN}Frontend successfully deployed!${NC}"
|
||||
echo ""
|
||||
echo "Deployment location: $DEPLOY_DIR"
|
||||
echo "Frontend URL: https://ai-sandbox.oliver.solutions/video-accessibility"
|
||||
echo ""
|
||||
echo "To verify the deployment, visit the URL above in your browser."
|
||||
echo ""
|
||||
echo "If you need to rollback, backups are stored in:"
|
||||
echo " $DEPLOY_DIR.backup.*"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Main Function
|
||||
# =============================================================================
|
||||
|
||||
main() {
|
||||
print_header "Frontend Build & Deploy"
|
||||
echo ""
|
||||
|
||||
preflight_checks
|
||||
build_frontend
|
||||
deploy_to_apache
|
||||
verify_deployment
|
||||
display_summary
|
||||
}
|
||||
|
||||
# Run main function
|
||||
main
|
||||
287
scripts/deploy.sh
Executable file
287
scripts/deploy.sh
Executable file
|
|
@ -0,0 +1,287 @@
|
|||
#!/bin/bash
|
||||
# =============================================================================
|
||||
# Deployment Script for Accessible Video Platform
|
||||
# =============================================================================
|
||||
# This script handles building and deploying the application
|
||||
# Run from: /opt/accessible-video/
|
||||
# Usage: ./scripts/deploy.sh [options]
|
||||
# =============================================================================
|
||||
|
||||
set -e # Exit on any error
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Configuration
|
||||
PROJECT_DIR="/opt/accessible-video"
|
||||
COMPOSE_FILES="-f docker-compose.yml -f docker-compose.prod.yml"
|
||||
|
||||
# =============================================================================
|
||||
# Helper Functions
|
||||
# =============================================================================
|
||||
|
||||
print_header() {
|
||||
echo -e "${BLUE}==============================================================================${NC}"
|
||||
echo -e "${BLUE}$1${NC}"
|
||||
echo -e "${BLUE}==============================================================================${NC}"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN} $1${NC}"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED} $1${NC}"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW} $1${NC}"
|
||||
}
|
||||
|
||||
print_info() {
|
||||
echo -e "${BLUE}9 $1${NC}"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Pre-flight Checks
|
||||
# =============================================================================
|
||||
|
||||
preflight_checks() {
|
||||
print_header "Running Pre-flight Checks"
|
||||
|
||||
# Check if running from correct directory
|
||||
if [ ! -f "docker-compose.yml" ]; then
|
||||
print_error "docker-compose.yml not found. Please run from /opt/accessible-video/"
|
||||
exit 1
|
||||
fi
|
||||
print_success "Running from correct directory"
|
||||
|
||||
# Check if .env.production exists
|
||||
if [ ! -f ".env.production" ]; then
|
||||
print_error ".env.production not found. Please create it first."
|
||||
exit 1
|
||||
fi
|
||||
print_success ".env.production found"
|
||||
|
||||
# Check if secrets directory exists
|
||||
if [ ! -d "secrets" ]; then
|
||||
print_error "secrets/ directory not found. Please create it and add gcp-credentials.json"
|
||||
exit 1
|
||||
fi
|
||||
print_success "secrets/ directory found"
|
||||
|
||||
# Check if GCP credentials exist
|
||||
if [ ! -f "secrets/gcp-credentials.json" ]; then
|
||||
print_error "secrets/gcp-credentials.json not found"
|
||||
exit 1
|
||||
fi
|
||||
print_success "GCP credentials found"
|
||||
|
||||
# Check if Docker is running
|
||||
if ! docker info > /dev/null 2>&1; then
|
||||
print_error "Docker is not running"
|
||||
exit 1
|
||||
fi
|
||||
print_success "Docker is running"
|
||||
|
||||
# Check if docker-compose is available
|
||||
if ! command -v docker-compose &> /dev/null; then
|
||||
print_error "docker-compose is not installed"
|
||||
exit 1
|
||||
fi
|
||||
print_success "docker-compose is available"
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Pull Latest Code
|
||||
# =============================================================================
|
||||
|
||||
pull_code() {
|
||||
print_header "Pulling Latest Code"
|
||||
|
||||
# Pull backend
|
||||
if [ -d "backend/.git" ]; then
|
||||
print_info "Pulling backend repository..."
|
||||
cd backend
|
||||
git pull
|
||||
cd ..
|
||||
print_success "Backend code updated"
|
||||
else
|
||||
print_warning "Backend is not a git repository, skipping pull"
|
||||
fi
|
||||
|
||||
# Pull frontend
|
||||
if [ -d "frontend/.git" ]; then
|
||||
print_info "Pulling frontend repository..."
|
||||
cd frontend
|
||||
git pull
|
||||
cd ..
|
||||
print_success "Frontend code updated"
|
||||
else
|
||||
print_warning "Frontend is not a git repository, skipping pull"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Build and Deploy Backend
|
||||
# =============================================================================
|
||||
|
||||
deploy_backend() {
|
||||
print_header "Building and Deploying Backend Services"
|
||||
|
||||
# Load environment variables
|
||||
export $(cat .env.production | grep -v '^#' | xargs)
|
||||
|
||||
# Build images
|
||||
print_info "Building Docker images (this may take a few minutes)..."
|
||||
docker-compose $COMPOSE_FILES build --no-cache
|
||||
print_success "Docker images built"
|
||||
|
||||
# Stop existing containers
|
||||
print_info "Stopping existing containers..."
|
||||
docker-compose $COMPOSE_FILES down
|
||||
print_success "Containers stopped"
|
||||
|
||||
# Start services
|
||||
print_info "Starting services..."
|
||||
docker-compose $COMPOSE_FILES up -d
|
||||
print_success "Services started"
|
||||
|
||||
# Wait for services to be healthy
|
||||
print_info "Waiting for services to be healthy..."
|
||||
sleep 10
|
||||
|
||||
# Check service health
|
||||
if docker-compose $COMPOSE_FILES ps | grep -q "unhealthy"; then
|
||||
print_error "Some services are unhealthy!"
|
||||
docker-compose $COMPOSE_FILES ps
|
||||
exit 1
|
||||
fi
|
||||
print_success "All services are healthy"
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Build and Deploy Frontend
|
||||
# =============================================================================
|
||||
|
||||
deploy_frontend() {
|
||||
print_header "Building and Deploying Frontend"
|
||||
|
||||
cd frontend
|
||||
|
||||
# Install dependencies
|
||||
print_info "Installing frontend dependencies..."
|
||||
npm ci --only=production
|
||||
print_success "Dependencies installed"
|
||||
|
||||
# Build frontend
|
||||
print_info "Building frontend..."
|
||||
npm run build
|
||||
print_success "Frontend built"
|
||||
|
||||
# Deploy to Apache
|
||||
print_info "Deploying frontend to /var/www/html/video-accessibility/..."
|
||||
|
||||
# Create directory if it doesn't exist
|
||||
sudo mkdir -p /var/www/html/video-accessibility
|
||||
|
||||
# Copy built files
|
||||
sudo rm -rf /var/www/html/video-accessibility/*
|
||||
sudo cp -r dist/* /var/www/html/video-accessibility/
|
||||
|
||||
# Set proper permissions
|
||||
sudo chown -R www-data:www-data /var/www/html/video-accessibility
|
||||
sudo chmod -R 755 /var/www/html/video-accessibility
|
||||
|
||||
print_success "Frontend deployed to Apache"
|
||||
|
||||
cd ..
|
||||
echo ""
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Run Database Migrations
|
||||
# =============================================================================
|
||||
|
||||
run_migrations() {
|
||||
print_header "Running Database Migrations"
|
||||
|
||||
print_info "Running migrations..."
|
||||
docker-compose $COMPOSE_FILES exec -T api python migrate.py
|
||||
print_success "Migrations completed"
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Display Status
|
||||
# =============================================================================
|
||||
|
||||
display_status() {
|
||||
print_header "Deployment Status"
|
||||
|
||||
echo -e "${BLUE}Container Status:${NC}"
|
||||
docker-compose $COMPOSE_FILES ps
|
||||
|
||||
echo ""
|
||||
echo -e "${BLUE}Service URLs:${NC}"
|
||||
echo "Frontend: https://ai-sandbox.oliver.solutions/video-accessibility"
|
||||
echo "Backend API: https://ai-sandbox.oliver.solutions/video-accessibility-back"
|
||||
echo "API Health: https://ai-sandbox.oliver.solutions/video-accessibility-back/health"
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}Deployment completed successfully!${NC}"
|
||||
echo ""
|
||||
echo "To view logs:"
|
||||
echo " docker-compose $COMPOSE_FILES logs -f [service]"
|
||||
echo ""
|
||||
echo "To restart a service:"
|
||||
echo " docker-compose $COMPOSE_FILES restart [service]"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Main Deployment Flow
|
||||
# =============================================================================
|
||||
|
||||
main() {
|
||||
print_header "Accessible Video Platform Deployment"
|
||||
echo ""
|
||||
|
||||
# Run checks
|
||||
preflight_checks
|
||||
|
||||
# Pull latest code
|
||||
if [ "$1" != "--skip-pull" ]; then
|
||||
pull_code
|
||||
fi
|
||||
|
||||
# Deploy backend
|
||||
deploy_backend
|
||||
|
||||
# Deploy frontend
|
||||
if [ "$1" != "--skip-frontend" ]; then
|
||||
deploy_frontend
|
||||
fi
|
||||
|
||||
# Run migrations
|
||||
if [ "$1" != "--skip-migrations" ]; then
|
||||
run_migrations
|
||||
fi
|
||||
|
||||
# Display status
|
||||
display_status
|
||||
}
|
||||
|
||||
# Run main function
|
||||
main "$@"
|
||||
147
scripts/mongodb-init.js
Normal file
147
scripts/mongodb-init.js
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
// =============================================================================
|
||||
// MongoDB Initialization Script for Accessible Video Platform
|
||||
// =============================================================================
|
||||
// Run this script ONCE after starting the MongoDB container
|
||||
// Usage: docker-compose exec mongodb mongosh < scripts/mongodb-init.js
|
||||
// =============================================================================
|
||||
|
||||
// Connect to the accessible_video database
|
||||
db = db.getSiblingDB('accessible_video');
|
||||
|
||||
print('=============================================================================');
|
||||
print('MongoDB Initialization for Accessible Video Platform');
|
||||
print('=============================================================================');
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Create Collections with Validation
|
||||
// -----------------------------------------------------------------------------
|
||||
print('\n1. Creating collections with schema validation...');
|
||||
|
||||
// Jobs collection
|
||||
db.createCollection('jobs', {
|
||||
validator: {
|
||||
$jsonSchema: {
|
||||
bsonType: 'object',
|
||||
required: ['_id', 'title', 'status', 'client_id', 'created_at', 'updated_at'],
|
||||
properties: {
|
||||
_id: { bsonType: 'string' },
|
||||
title: { bsonType: 'string' },
|
||||
status: {
|
||||
enum: ['created', 'ingesting', 'ai_processing', 'pending_qc',
|
||||
'approved_english', 'rejected', 'translating', 'tts_generating',
|
||||
'pending_final_review', 'completed']
|
||||
},
|
||||
client_id: { bsonType: 'string' },
|
||||
created_at: { bsonType: 'date' },
|
||||
updated_at: { bsonType: 'date' }
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
print(' Created jobs collection');
|
||||
|
||||
// Users collection
|
||||
db.createCollection('users', {
|
||||
validator: {
|
||||
$jsonSchema: {
|
||||
bsonType: 'object',
|
||||
required: ['_id', 'email', 'hashed_password', 'role', 'created_at'],
|
||||
properties: {
|
||||
_id: { bsonType: 'string' },
|
||||
email: { bsonType: 'string' },
|
||||
hashed_password: { bsonType: 'string' },
|
||||
role: { enum: ['client', 'reviewer', 'admin'] },
|
||||
is_active: { bsonType: 'bool' },
|
||||
created_at: { bsonType: 'date' }
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
print(' Created users collection');
|
||||
|
||||
// Audit logs collection
|
||||
db.createCollection('audit_logs');
|
||||
print(' Created audit_logs collection');
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Create Indexes for Performance
|
||||
// -----------------------------------------------------------------------------
|
||||
print('\n2. Creating indexes for optimized queries...');
|
||||
|
||||
// Jobs collection indexes
|
||||
db.jobs.createIndex({ 'status': 1, 'created_at': -1 }, { name: 'idx_status_created' });
|
||||
print(' Created index: jobs.idx_status_created');
|
||||
|
||||
db.jobs.createIndex({ 'client_id': 1 }, { name: 'idx_client_id' });
|
||||
print(' Created index: jobs.idx_client_id');
|
||||
|
||||
db.jobs.createIndex({ 'created_at': -1 }, { name: 'idx_created_at' });
|
||||
print(' Created index: jobs.idx_created_at');
|
||||
|
||||
// Users collection indexes
|
||||
db.users.createIndex({ 'email': 1 }, { unique: true, name: 'idx_email_unique' });
|
||||
print(' Created index: users.idx_email_unique (unique)');
|
||||
|
||||
db.users.createIndex({ 'role': 1 }, { name: 'idx_role' });
|
||||
print(' Created index: users.idx_role');
|
||||
|
||||
// Audit logs collection indexes
|
||||
db.audit_logs.createIndex({ 'timestamp': -1 }, { name: 'idx_timestamp' });
|
||||
print(' Created index: audit_logs.idx_timestamp');
|
||||
|
||||
db.audit_logs.createIndex({ 'action': 1, 'timestamp': -1 }, { name: 'idx_action_timestamp' });
|
||||
print(' Created index: audit_logs.idx_action_timestamp');
|
||||
|
||||
db.audit_logs.createIndex({ 'user_id': 1, 'timestamp': -1 }, { name: 'idx_user_timestamp' });
|
||||
print(' Created index: audit_logs.idx_user_timestamp');
|
||||
|
||||
db.audit_logs.createIndex({ 'severity': 1, 'timestamp': -1 }, { name: 'idx_severity_timestamp' });
|
||||
print(' Created index: audit_logs.idx_severity_timestamp');
|
||||
|
||||
db.audit_logs.createIndex({ 'resource_type': 1, 'resource_id': 1 }, { name: 'idx_resource' });
|
||||
print(' Created index: audit_logs.idx_resource');
|
||||
|
||||
// Text search index for audit logs
|
||||
db.audit_logs.createIndex(
|
||||
{
|
||||
'description': 'text',
|
||||
'details': 'text',
|
||||
'error_message': 'text'
|
||||
},
|
||||
{
|
||||
name: 'idx_text_search',
|
||||
weights: {
|
||||
'description': 10,
|
||||
'details': 5,
|
||||
'error_message': 8
|
||||
}
|
||||
}
|
||||
);
|
||||
print(' Created index: audit_logs.idx_text_search (full-text)');
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Display Database Statistics
|
||||
// -----------------------------------------------------------------------------
|
||||
print('\n3. Database initialization complete!');
|
||||
print('\nDatabase statistics:');
|
||||
print('-------------------');
|
||||
|
||||
const stats = db.stats();
|
||||
print('Database: ' + stats.db);
|
||||
print('Collections: ' + stats.collections);
|
||||
print('Indexes: ' + stats.indexes);
|
||||
print('Data Size: ' + (stats.dataSize / 1024).toFixed(2) + ' KB');
|
||||
print('Storage Size: ' + (stats.storageSize / 1024).toFixed(2) + ' KB');
|
||||
|
||||
print('\nCollections created:');
|
||||
db.getCollectionNames().forEach(function(collName) {
|
||||
const collStats = db.getCollection(collName).stats();
|
||||
const indexCount = db.getCollection(collName).getIndexes().length;
|
||||
print(' - ' + collName + ' (indexes: ' + indexCount + ')');
|
||||
});
|
||||
|
||||
print('\n=============================================================================');
|
||||
print('Next steps:');
|
||||
print('1. Run migrations: docker-compose exec api python migrate.py');
|
||||
print('2. Create admin user: docker-compose exec api python create_test_users.py');
|
||||
print('=============================================================================');
|
||||
Loading…
Add table
Reference in a new issue