From ec1ce5c13a0e0c230710f236a161aa4ba93d91a8 Mon Sep 17 00:00:00 2001 From: Vadym Samoilenko Date: Thu, 30 Apr 2026 14:28:58 +0100 Subject: [PATCH] feat(deploy): offload ffmpeg+whisper to Cloud Run HTTP services on optical-dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sets FFMPEG_SERVICE_URL and WHISPER_SERVICE_URL so video_renderer.py and whisper_transcribe.py route CPU-heavy work to Cloud Run instead of running ffmpeg/Whisper locally. Both Cloud Run services and IAM (roles/run.invoker for accessible-video-worker@ and video-accessibility@ SAs) are already provisioned — only the env vars were missing. ffmpeg-worker container: 1G/0.5CPU → 256M/0.25CPU (HTTP dispatcher only) whisper-worker container: 2G/0.5CPU → 512M/0.25CPU (HTTP dispatcher only) Expected outcome: ffmpeg-worker drops from 51% CPU / 97% RAM to < 5% CPU. Server load avg should fall from ~2.2 to ~1.0-1.3. Co-Authored-By: Claude Opus 4.7 --- docker-compose.optical-dev.yml | 47 ++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/docker-compose.optical-dev.yml b/docker-compose.optical-dev.yml index fef6a7f..a839a26 100644 --- a/docker-compose.optical-dev.yml +++ b/docker-compose.optical-dev.yml @@ -1,10 +1,18 @@ # ============================================================================= -# optical-dev overrides — 2 CPU / ~8 GB RAM server +# optical-dev overrides — 4 vCPU / 15 GiB RAM server (Xeon @ 2.80 GHz) # -# Cloud Run Jobs (va-worker) are NOT yet reachable from this server -# (VPC Connector pending). Until then USE_CELERY_FALLBACK=true routes all -# heavy tasks through local Celery workers constrained to WORKER_CONCURRENCY=2 -# so they fit in 2 CPU without OOM on large videos. +# CPU-heavy work (ffmpeg encoding, Whisper inference) is offloaded to +# Cloud Run HTTP services — ffmpeg-worker and whisper-worker on this server +# become lightweight HTTP dispatchers (< 5% CPU each). +# +# MongoDB and Redis remain local (DB stays on server, no VPC Connector needed). +# Celery workers stay local (broker is local Redis). +# +# Cloud Run HTTP services (us-central1): +# FFMPEG_SERVICE_URL = https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app +# WHISPER_SERVICE_URL = https://whisper-http-service-bcb6ipdqka-uc.a.run.app +# IAM: accessible-video-worker@ and video-accessibility@ SAs already have +# roles/run.invoker on both services — no additional IAM setup needed. # # Usage: # docker compose -f docker-compose.yml \ @@ -14,7 +22,7 @@ # ============================================================================= services: - # ── Keep on this server, resource limits fit in 2 CPU ────────────────────── + # ── Keep on this server (MongoDB/Redis constraint — DB stays local) ───────── mongodb: deploy: @@ -47,9 +55,10 @@ services: cpus: '0.5' environment: APP_ENV: prod - # Fallback mode: bypass Cloud Run, dispatch heavy tasks to local workers USE_CELERY_FALLBACK: "true" WORKER_CONCURRENCY: "2" + FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app" + WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app" # Full worker: handles ALL queues in fallback mode worker: @@ -65,6 +74,8 @@ services: environment: APP_ENV: prod WORKER_CONCURRENCY: "2" + FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app" + WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app" command: > celery -A app.tasks worker --loglevel=info @@ -74,16 +85,20 @@ services: # ── Pipeline workers — enabled in fallback mode ──────────────────────────── + # ffmpeg-worker: CPU-intensive encoding now runs on Cloud Run (ffmpeg-http-service). + # Container is a lightweight HTTP dispatcher — reduced resource limits. ffmpeg-worker: deploy: replicas: 1 resources: limits: - memory: 1G - cpus: '0.5' - reservations: memory: 256M - cpus: '0.1' + cpus: '0.25' + reservations: + memory: 128M + cpus: '0.05' + environment: + FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app" tts-worker: deploy: @@ -96,13 +111,17 @@ services: memory: 128M cpus: '0.1' + # whisper-worker: Whisper inference now runs on Cloud Run (whisper-http-service). + # Container is a lightweight HTTP dispatcher — reduced resource limits. whisper-worker: deploy: replicas: 1 resources: limits: - memory: 2G - cpus: '0.5' - reservations: memory: 512M cpus: '0.25' + reservations: + memory: 256M + cpus: '0.05' + environment: + WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"