faster_whisper loads its model into RAM at startup regardless of whether tasks are routed to Cloud Run — reducing the limit to 512M caused OOM kill on container start. Restored original limits (ffmpeg: 1G, whisper: 2G). Cloud Run URLs (FFMPEG_SERVICE_URL / WHISPER_SERVICE_URL) remain set so CPU offload is still active. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
128 lines
3.9 KiB
YAML
128 lines
3.9 KiB
YAML
# =============================================================================
|
|
# optical-dev overrides — 4 vCPU / 15 GiB RAM server (Xeon @ 2.80 GHz)
|
|
#
|
|
# CPU-heavy work (ffmpeg encoding, Whisper inference) is offloaded to
|
|
# Cloud Run HTTP services — ffmpeg-worker and whisper-worker on this server
|
|
# become lightweight HTTP dispatchers (< 5% CPU each).
|
|
#
|
|
# MongoDB and Redis remain local (DB stays on server, no VPC Connector needed).
|
|
# Celery workers stay local (broker is local Redis).
|
|
#
|
|
# Cloud Run HTTP services (us-central1):
|
|
# FFMPEG_SERVICE_URL = https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app
|
|
# WHISPER_SERVICE_URL = https://whisper-http-service-bcb6ipdqka-uc.a.run.app
|
|
# IAM: accessible-video-worker@ and video-accessibility@ SAs already have
|
|
# roles/run.invoker on both services — no additional IAM setup needed.
|
|
#
|
|
# Usage:
|
|
# docker compose -f docker-compose.yml \
|
|
# -f docker-compose.prod.yml \
|
|
# -f docker-compose.optical-dev.yml \
|
|
# --env-file .env.production up -d
|
|
# =============================================================================
|
|
|
|
services:
|
|
# ── Keep on this server (MongoDB/Redis constraint — DB stays local) ─────────
|
|
|
|
mongodb:
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 1G
|
|
cpus: '0.5'
|
|
reservations:
|
|
memory: 512M
|
|
cpus: '0.25'
|
|
|
|
redis:
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 512M
|
|
cpus: '0.25'
|
|
reservations:
|
|
memory: 256M
|
|
cpus: '0.1'
|
|
|
|
api:
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 2G
|
|
cpus: '1.0'
|
|
reservations:
|
|
memory: 1G
|
|
cpus: '0.5'
|
|
environment:
|
|
APP_ENV: prod
|
|
USE_CELERY_FALLBACK: "true"
|
|
WORKER_CONCURRENCY: "2"
|
|
FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"
|
|
WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"
|
|
|
|
# Full worker: handles ALL queues in fallback mode
|
|
worker:
|
|
deploy:
|
|
replicas: 1
|
|
resources:
|
|
limits:
|
|
memory: 2G
|
|
cpus: '0.75'
|
|
reservations:
|
|
memory: 1G
|
|
cpus: '0.25'
|
|
environment:
|
|
APP_ENV: prod
|
|
WORKER_CONCURRENCY: "2"
|
|
FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"
|
|
WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"
|
|
command: >
|
|
celery -A app.tasks worker
|
|
--loglevel=info
|
|
--queues=default,ingest,tts,render,ffmpeg,whisper,notify,embed
|
|
--concurrency=2
|
|
--hostname=full-worker@%h
|
|
|
|
# ── Pipeline workers — enabled in fallback mode ────────────────────────────
|
|
|
|
# ffmpeg-worker: CPU-intensive encoding runs on Cloud Run (ffmpeg-http-service).
|
|
# Memory limit kept at 1G — local ffmpeg may still run during GCS file staging.
|
|
ffmpeg-worker:
|
|
deploy:
|
|
replicas: 1
|
|
resources:
|
|
limits:
|
|
memory: 1G
|
|
cpus: '0.5'
|
|
reservations:
|
|
memory: 256M
|
|
cpus: '0.1'
|
|
environment:
|
|
FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"
|
|
|
|
tts-worker:
|
|
deploy:
|
|
replicas: 1
|
|
resources:
|
|
limits:
|
|
memory: 512M
|
|
cpus: '0.25'
|
|
reservations:
|
|
memory: 128M
|
|
cpus: '0.1'
|
|
|
|
# whisper-worker: Whisper inference runs on Cloud Run (whisper-http-service).
|
|
# Memory limit kept at 2G — faster_whisper loads the model into memory at startup
|
|
# regardless of whether tasks are routed to Cloud Run.
|
|
whisper-worker:
|
|
deploy:
|
|
replicas: 1
|
|
resources:
|
|
limits:
|
|
memory: 2G
|
|
cpus: '0.5'
|
|
reservations:
|
|
memory: 512M
|
|
cpus: '0.25'
|
|
environment:
|
|
WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"
|