video-accessibility/docker-compose.optical-dev.yml

# =============================================================================
# optical-dev overrides — 4 vCPU / 15 GiB RAM server (Xeon @ 2.80 GHz)
#
# CPU-heavy work (ffmpeg encoding, Whisper inference) is offloaded to
# Cloud Run HTTP services — ffmpeg-worker and whisper-worker on this server
# become lightweight HTTP dispatchers (< 5% CPU each).
#
# MongoDB and Redis remain local (DB stays on server, no VPC Connector needed).
# Celery workers stay local (broker is local Redis).
#
# Cloud Run HTTP services (us-central1):
#   FFMPEG_SERVICE_URL  = https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app
#   WHISPER_SERVICE_URL = https://whisper-http-service-bcb6ipdqka-uc.a.run.app
# IAM: accessible-video-worker@ and video-accessibility@ SAs already have
#      roles/run.invoker on both services — no additional IAM setup needed.
#
# Usage:
#   docker compose -f docker-compose.yml \
#                  -f docker-compose.prod.yml \
#                  -f docker-compose.optical-dev.yml \
#                  --env-file .env.production up -d
# =============================================================================

services:
  # ── Keep on this server (MongoDB/Redis constraint — DB stays local) ─────────

  mongodb:
    deploy:
      resources:
        limits:
          memory: 1G
          cpus: '0.5'
        reservations:
          memory: 512M
          cpus: '0.25'

  redis:
    deploy:
      resources:
        limits:
          memory: 512M
          cpus: '0.25'
        reservations:
          memory: 256M
          cpus: '0.1'

  api:
    deploy:
      resources:
        limits:
          memory: 2G
          cpus: '1.0'
        reservations:
          memory: 1G
          cpus: '0.5'
    environment:
      APP_ENV: prod
      USE_CELERY_FALLBACK: "true"
      WORKER_CONCURRENCY: "2"
      FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"
      WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"

  # Full worker: handles ALL queues in fallback mode
  worker:
    deploy:
      replicas: 1
      resources:
        limits:
          memory: 2G
          cpus: '0.75'
        reservations:
          memory: 1G
          cpus: '0.25'
    environment:
      APP_ENV: prod
      WORKER_CONCURRENCY: "2"
      FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"
      WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"
    command: >
      celery -A app.tasks worker
      --loglevel=info
      --queues=default,ingest,tts,render,ffmpeg,whisper,notify,embed
      --concurrency=2
      --hostname=full-worker@%h

  # ── Pipeline workers — enabled in fallback mode ────────────────────────────

  # ffmpeg-worker: CPU-intensive encoding runs on Cloud Run (ffmpeg-http-service).
  # Memory limit kept at 1G — local ffmpeg may still run during GCS file staging.
  ffmpeg-worker:
    deploy:
      replicas: 1
      resources:
        limits:
          memory: 1G
          cpus: '0.5'
        reservations:
          memory: 256M
          cpus: '0.1'
    environment:
      FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"

  tts-worker:
    deploy:
      replicas: 1
      resources:
        limits:
          memory: 512M
          cpus: '0.25'
        reservations:
          memory: 128M
          cpus: '0.1'

  # whisper-worker: Whisper inference runs on Cloud Run (whisper-http-service).
  # Memory limit kept at 2G — faster_whisper loads the model into memory at startup
  # regardless of whether tasks are routed to Cloud Run.
  whisper-worker:
    deploy:
      replicas: 1
      resources:
        limits:
          memory: 2G
          cpus: '0.5'
        reservations:
          memory: 512M
          cpus: '0.25'
    environment:
      WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"