From ec1ce5c13a0e0c230710f236a161aa4ba93d91a8 Mon Sep 17 00:00:00 2001
From: Vadym Samoilenko <vadymsamoilenko@oliver.agency>
Date: Thu, 30 Apr 2026 14:28:58 +0100
Subject: [PATCH] feat(deploy): offload ffmpeg+whisper to Cloud Run HTTP
 services on optical-dev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sets FFMPEG_SERVICE_URL and WHISPER_SERVICE_URL so video_renderer.py and
whisper_transcribe.py route CPU-heavy work to Cloud Run instead of running
ffmpeg/Whisper locally. Both Cloud Run services and IAM (roles/run.invoker
for accessible-video-worker@ and video-accessibility@ SAs) are already
provisioned — only the env vars were missing.

ffmpeg-worker container: 1G/0.5CPU → 256M/0.25CPU (HTTP dispatcher only)
whisper-worker container: 2G/0.5CPU → 512M/0.25CPU (HTTP dispatcher only)

Expected outcome: ffmpeg-worker drops from 51% CPU / 97% RAM to < 5% CPU.
Server load avg should fall from ~2.2 to ~1.0-1.3.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 docker-compose.optical-dev.yml | 47 ++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/docker-compose.optical-dev.yml b/docker-compose.optical-dev.yml
index fef6a7f..a839a26 100644
--- a/docker-compose.optical-dev.yml
+++ b/docker-compose.optical-dev.yml
@@ -1,10 +1,18 @@
 # =============================================================================
-# optical-dev overrides — 2 CPU / ~8 GB RAM server
+# optical-dev overrides — 4 vCPU / 15 GiB RAM server (Xeon @ 2.80 GHz)
 #
-# Cloud Run Jobs (va-worker) are NOT yet reachable from this server
-# (VPC Connector pending). Until then USE_CELERY_FALLBACK=true routes all
-# heavy tasks through local Celery workers constrained to WORKER_CONCURRENCY=2
-# so they fit in 2 CPU without OOM on large videos.
+# CPU-heavy work (ffmpeg encoding, Whisper inference) is offloaded to
+# Cloud Run HTTP services — ffmpeg-worker and whisper-worker on this server
+# become lightweight HTTP dispatchers (< 5% CPU each).
+#
+# MongoDB and Redis remain local (DB stays on server, no VPC Connector needed).
+# Celery workers stay local (broker is local Redis).
+#
+# Cloud Run HTTP services (us-central1):
+#   FFMPEG_SERVICE_URL  = https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app
+#   WHISPER_SERVICE_URL = https://whisper-http-service-bcb6ipdqka-uc.a.run.app
+# IAM: accessible-video-worker@ and video-accessibility@ SAs already have
+#      roles/run.invoker on both services — no additional IAM setup needed.
 #
 # Usage:
 #   docker compose -f docker-compose.yml \
@@ -14,7 +22,7 @@
 # =============================================================================
 
 services:
-  # ── Keep on this server, resource limits fit in 2 CPU ──────────────────────
+  # ── Keep on this server (MongoDB/Redis constraint — DB stays local) ─────────
 
   mongodb:
     deploy:
@@ -47,9 +55,10 @@ services:
           cpus: '0.5'
     environment:
       APP_ENV: prod
-      # Fallback mode: bypass Cloud Run, dispatch heavy tasks to local workers
       USE_CELERY_FALLBACK: "true"
       WORKER_CONCURRENCY: "2"
+      FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"
+      WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"
 
   # Full worker: handles ALL queues in fallback mode
   worker:
@@ -65,6 +74,8 @@ services:
     environment:
       APP_ENV: prod
       WORKER_CONCURRENCY: "2"
+      FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"
+      WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"
     command: >
       celery -A app.tasks worker
       --loglevel=info
@@ -74,16 +85,20 @@ services:
 
   # ── Pipeline workers — enabled in fallback mode ────────────────────────────
 
+  # ffmpeg-worker: CPU-intensive encoding now runs on Cloud Run (ffmpeg-http-service).
+  # Container is a lightweight HTTP dispatcher — reduced resource limits.
   ffmpeg-worker:
     deploy:
       replicas: 1
       resources:
         limits:
-          memory: 1G
-          cpus: '0.5'
-        reservations:
           memory: 256M
-          cpus: '0.1'
+          cpus: '0.25'
+        reservations:
+          memory: 128M
+          cpus: '0.05'
+    environment:
+      FFMPEG_SERVICE_URL: "https://ffmpeg-http-service-bcb6ipdqka-uc.a.run.app"
 
   tts-worker:
     deploy:
@@ -96,13 +111,17 @@ services:
           memory: 128M
           cpus: '0.1'
 
+  # whisper-worker: Whisper inference now runs on Cloud Run (whisper-http-service).
+  # Container is a lightweight HTTP dispatcher — reduced resource limits.
   whisper-worker:
     deploy:
       replicas: 1
       resources:
         limits:
-          memory: 2G
-          cpus: '0.5'
-        reservations:
           memory: 512M
           cpus: '0.25'
+        reservations:
+          memory: 256M
+          cpus: '0.05'
+    environment:
+      WHISPER_SERVICE_URL: "https://whisper-http-service-bcb6ipdqka-uc.a.run.app"