video-accessibility/infra/cloud-run/ffmpeg-http-service.yaml
michael 95852f1357 fix: update Cloud Run service configs for compatibility
- FFmpeg: Enable CPU throttling to reduce idle costs
- Whisper: Keep CPU throttling disabled (model loading needs full CPU)
- Remove readinessProbe (requires BETA launch stage)
- Both services scale to zero when idle for cost savings

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-02 17:34:10 -06:00

117 lines
3.5 KiB
YAML

# =============================================================================
# Cloud Run Service: FFmpeg HTTP Service
# =============================================================================
# Autoscaling FFmpeg processing service for Cloud Run deployment.
# This service handles CPU-intensive video encoding via HTTP endpoints.
#
# Key features:
# - Scale to zero when idle (pay only for compute time used)
# - Up to 20 instances for parallel video processing
# - 8 vCPU / 32GB RAM for fast encoding
# - Startup CPU boost for faster cold starts
# - Faster startup than Whisper (no model loading)
# =============================================================================
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
name: ffmpeg-http-service
annotations:
run.googleapis.com/ingress: internal # Only accessible from within GCP
run.googleapis.com/execution-environment: gen2 # Required for 8 vCPU
spec:
template:
metadata:
annotations:
# Autoscaling configuration
autoscaling.knative.dev/minScale: "0" # Scale to zero when idle
autoscaling.knative.dev/maxScale: "20" # Max 20 concurrent instances
# Cloud Run Gen2 features
run.googleapis.com/execution-environment: gen2 # Required for 8 vCPU
run.googleapis.com/cpu-throttling: "true" # Throttle CPU when idle to reduce costs
run.googleapis.com/startup-cpu-boost: "true" # Faster cold start
spec:
# Only 1 FFmpeg operation at a time per instance (CPU-intensive)
containerConcurrency: 1
# 10-minute timeout for long encoding operations
timeoutSeconds: 600
serviceAccountName: accessible-video-worker@PROJECT_ID.iam.gserviceaccount.com
containers:
- image: gcr.io/PROJECT_ID/ffmpeg-http-service:latest
ports:
- containerPort: 8080
env:
- name: APP_ENV
value: "prod"
- name: PYTHONPATH
value: "/app"
- name: PYTHONUNBUFFERED
value: "1"
- name: PYTHONDONTWRITEBYTECODE
value: "1"
# GCP Configuration
- name: GCP_PROJECT_ID
value: "PROJECT_ID"
- name: GCS_BUCKET
valueFrom:
secretKeyRef:
name: gcs-bucket-name
key: latest
# MongoDB for job tracking (optional, for logging)
- name: MONGODB_URL
valueFrom:
secretKeyRef:
name: mongodb-url
key: latest
# OpenTelemetry configuration
- name: OTEL_SERVICE_NAME
value: "ffmpeg-http-service"
- name: OTEL_SERVICE_VERSION
value: "1.0.0"
- name: OTEL_TRACES_EXPORTER
value: "gcp_trace"
# Sentry configuration (optional)
- name: SENTRY_DSN
valueFrom:
secretKeyRef:
name: sentry-dsn
key: latest
- name: SENTRY_ENVIRONMENT
value: "production"
resources:
limits:
memory: "32Gi"
cpu: "8000m" # 8 vCPU
requests:
memory: "4Gi"
cpu: "2000m" # 2 vCPU minimum
# Health checks
startupProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 5 # FFmpeg starts fast (no model to load)
periodSeconds: 5
timeoutSeconds: 5
failureThreshold: 6
livenessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 15
periodSeconds: 30
timeoutSeconds: 10