video-accessibility/docker-compose.optical-dev.yml
Vadym Samoilenko ec1ce5c13a feat(deploy): offload ffmpeg+whisper to Cloud Run HTTP services on optical-dev
Sets FFMPEG_SERVICE_URL and WHISPER_SERVICE_URL so video_renderer.py and
whisper_transcribe.py route CPU-heavy work to Cloud Run instead of running
ffmpeg/Whisper locally. Both Cloud Run services and IAM (roles/run.invoker
for accessible-video-worker@ and video-accessibility@ SAs) are already
provisioned — only the env vars were missing.

ffmpeg-worker container: 1G/0.5CPU → 256M/0.25CPU (HTTP dispatcher only)
whisper-worker container: 2G/0.5CPU → 512M/0.25CPU (HTTP dispatcher only)

Expected outcome: ffmpeg-worker drops from 51% CPU / 97% RAM to < 5% CPU.
Server load avg should fall from ~2.2 to ~1.0-1.3.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-30 14:28:58 +01:00

127 lines
3.8 KiB
YAML

# =============================================================================
# optical-dev overrides — 4 vCPU / 15 GiB RAM server (Xeon @ 2.80 GHz)
#
# CPU-heavy work (ffmpeg encoding, Whisper inference) is offloaded to
# Cloud Run HTTP services — ffmpeg-worker and whisper-worker on this server
# become lightweight HTTP dispatchers (< 5% CPU each).
#
# MongoDB and Redis remain local (DB stays on server, no VPC Connector needed).
# Celery workers stay local (broker is local Redis).
#
# Cloud Run HTTP services (us-central1):
# FFMPEG_SERVICE_URL = https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app
# WHISPER_SERVICE_URL = https://whisper-http-service-bcb6ipdqka-uc.a.run.app
# IAM: accessible-video-worker@ and video-accessibility@ SAs already have
# roles/run.invoker on both services — no additional IAM setup needed.
#
# Usage:
# docker compose -f docker-compose.yml \
# -f docker-compose.prod.yml \
# -f docker-compose.optical-dev.yml \
# --env-file .env.production up -d
# =============================================================================
services:
# ── Keep on this server (MongoDB/Redis constraint — DB stays local) ─────────
mongodb:
deploy:
resources:
limits:
memory: 1G
cpus: '0.5'
reservations:
memory: 512M
cpus: '0.25'
redis:
deploy:
resources:
limits:
memory: 512M
cpus: '0.25'
reservations:
memory: 256M
cpus: '0.1'
api:
deploy:
resources:
limits:
memory: 2G
cpus: '1.0'
reservations:
memory: 1G
cpus: '0.5'
environment:
APP_ENV: prod
USE_CELERY_FALLBACK: "true"
WORKER_CONCURRENCY: "2"
FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"
WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"
# Full worker: handles ALL queues in fallback mode
worker:
deploy:
replicas: 1
resources:
limits:
memory: 2G
cpus: '0.75'
reservations:
memory: 1G
cpus: '0.25'
environment:
APP_ENV: prod
WORKER_CONCURRENCY: "2"
FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"
WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"
command: >
celery -A app.tasks worker
--loglevel=info
--queues=default,ingest,tts,render,ffmpeg,whisper,notify,embed
--concurrency=2
--hostname=full-worker@%h
# ── Pipeline workers — enabled in fallback mode ────────────────────────────
# ffmpeg-worker: CPU-intensive encoding now runs on Cloud Run (ffmpeg-http-service).
# Container is a lightweight HTTP dispatcher — reduced resource limits.
ffmpeg-worker:
deploy:
replicas: 1
resources:
limits:
memory: 256M
cpus: '0.25'
reservations:
memory: 128M
cpus: '0.05'
environment:
FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"
tts-worker:
deploy:
replicas: 1
resources:
limits:
memory: 512M
cpus: '0.25'
reservations:
memory: 128M
cpus: '0.1'
# whisper-worker: Whisper inference now runs on Cloud Run (whisper-http-service).
# Container is a lightweight HTTP dispatcher — reduced resource limits.
whisper-worker:
deploy:
replicas: 1
resources:
limits:
memory: 512M
cpus: '0.25'
reservations:
memory: 256M
cpus: '0.05'
environment:
WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"