feat(deploy): offload ffmpeg+whisper to Cloud Run HTTP services on optical-dev
Sets FFMPEG_SERVICE_URL and WHISPER_SERVICE_URL so video_renderer.py and whisper_transcribe.py route CPU-heavy work to Cloud Run instead of running ffmpeg/Whisper locally. Both Cloud Run services and IAM (roles/run.invoker for accessible-video-worker@ and video-accessibility@ SAs) are already provisioned — only the env vars were missing. ffmpeg-worker container: 1G/0.5CPU → 256M/0.25CPU (HTTP dispatcher only) whisper-worker container: 2G/0.5CPU → 512M/0.25CPU (HTTP dispatcher only) Expected outcome: ffmpeg-worker drops from 51% CPU / 97% RAM to < 5% CPU. Server load avg should fall from ~2.2 to ~1.0-1.3. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
5fd370c093
commit
ec1ce5c13a
1 changed files with 33 additions and 14 deletions
|
|
@ -1,10 +1,18 @@
|
|||
# =============================================================================
|
||||
# optical-dev overrides — 2 CPU / ~8 GB RAM server
|
||||
# optical-dev overrides — 4 vCPU / 15 GiB RAM server (Xeon @ 2.80 GHz)
|
||||
#
|
||||
# Cloud Run Jobs (va-worker) are NOT yet reachable from this server
|
||||
# (VPC Connector pending). Until then USE_CELERY_FALLBACK=true routes all
|
||||
# heavy tasks through local Celery workers constrained to WORKER_CONCURRENCY=2
|
||||
# so they fit in 2 CPU without OOM on large videos.
|
||||
# CPU-heavy work (ffmpeg encoding, Whisper inference) is offloaded to
|
||||
# Cloud Run HTTP services — ffmpeg-worker and whisper-worker on this server
|
||||
# become lightweight HTTP dispatchers (< 5% CPU each).
|
||||
#
|
||||
# MongoDB and Redis remain local (DB stays on server, no VPC Connector needed).
|
||||
# Celery workers stay local (broker is local Redis).
|
||||
#
|
||||
# Cloud Run HTTP services (us-central1):
|
||||
# FFMPEG_SERVICE_URL = https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app
|
||||
# WHISPER_SERVICE_URL = https://whisper-http-service-bcb6ipdqka-uc.a.run.app
|
||||
# IAM: accessible-video-worker@ and video-accessibility@ SAs already have
|
||||
# roles/run.invoker on both services — no additional IAM setup needed.
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f docker-compose.yml \
|
||||
|
|
@ -14,7 +22,7 @@
|
|||
# =============================================================================
|
||||
|
||||
services:
|
||||
# ── Keep on this server, resource limits fit in 2 CPU ──────────────────────
|
||||
# ── Keep on this server (MongoDB/Redis constraint — DB stays local) ─────────
|
||||
|
||||
mongodb:
|
||||
deploy:
|
||||
|
|
@ -47,9 +55,10 @@ services:
|
|||
cpus: '0.5'
|
||||
environment:
|
||||
APP_ENV: prod
|
||||
# Fallback mode: bypass Cloud Run, dispatch heavy tasks to local workers
|
||||
USE_CELERY_FALLBACK: "true"
|
||||
WORKER_CONCURRENCY: "2"
|
||||
FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"
|
||||
WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"
|
||||
|
||||
# Full worker: handles ALL queues in fallback mode
|
||||
worker:
|
||||
|
|
@ -65,6 +74,8 @@ services:
|
|||
environment:
|
||||
APP_ENV: prod
|
||||
WORKER_CONCURRENCY: "2"
|
||||
FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"
|
||||
WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"
|
||||
command: >
|
||||
celery -A app.tasks worker
|
||||
--loglevel=info
|
||||
|
|
@ -74,16 +85,20 @@ services:
|
|||
|
||||
# ── Pipeline workers — enabled in fallback mode ────────────────────────────
|
||||
|
||||
# ffmpeg-worker: CPU-intensive encoding now runs on Cloud Run (ffmpeg-http-service).
|
||||
# Container is a lightweight HTTP dispatcher — reduced resource limits.
|
||||
ffmpeg-worker:
|
||||
deploy:
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
memory: 1G
|
||||
cpus: '0.5'
|
||||
reservations:
|
||||
memory: 256M
|
||||
cpus: '0.1'
|
||||
cpus: '0.25'
|
||||
reservations:
|
||||
memory: 128M
|
||||
cpus: '0.05'
|
||||
environment:
|
||||
FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"
|
||||
|
||||
tts-worker:
|
||||
deploy:
|
||||
|
|
@ -96,13 +111,17 @@ services:
|
|||
memory: 128M
|
||||
cpus: '0.1'
|
||||
|
||||
# whisper-worker: Whisper inference now runs on Cloud Run (whisper-http-service).
|
||||
# Container is a lightweight HTTP dispatcher — reduced resource limits.
|
||||
whisper-worker:
|
||||
deploy:
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
memory: 2G
|
||||
cpus: '0.5'
|
||||
reservations:
|
||||
memory: 512M
|
||||
cpus: '0.25'
|
||||
reservations:
|
||||
memory: 256M
|
||||
cpus: '0.05'
|
||||
environment:
|
||||
WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue