- FFmpeg: Enable CPU throttling to reduce idle costs - Whisper: Keep CPU throttling disabled (model loading needs full CPU) - Remove readinessProbe (requires BETA launch stage) - Both services scale to zero when idle for cost savings 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
117 lines
3.5 KiB
YAML
117 lines
3.5 KiB
YAML
# =============================================================================
|
|
# Cloud Run Service: FFmpeg HTTP Service
|
|
# =============================================================================
|
|
# Autoscaling FFmpeg processing service for Cloud Run deployment.
|
|
# This service handles CPU-intensive video encoding via HTTP endpoints.
|
|
#
|
|
# Key features:
|
|
# - Scale to zero when idle (pay only for compute time used)
|
|
# - Up to 20 instances for parallel video processing
|
|
# - 8 vCPU / 32GB RAM for fast encoding
|
|
# - Startup CPU boost for faster cold starts
|
|
# - Faster startup than Whisper (no model loading)
|
|
# =============================================================================
|
|
|
|
apiVersion: serving.knative.dev/v1
|
|
kind: Service
|
|
metadata:
|
|
name: ffmpeg-http-service
|
|
annotations:
|
|
run.googleapis.com/ingress: internal # Only accessible from within GCP
|
|
run.googleapis.com/execution-environment: gen2 # Required for 8 vCPU
|
|
spec:
|
|
template:
|
|
metadata:
|
|
annotations:
|
|
# Autoscaling configuration
|
|
autoscaling.knative.dev/minScale: "0" # Scale to zero when idle
|
|
autoscaling.knative.dev/maxScale: "20" # Max 20 concurrent instances
|
|
|
|
# Cloud Run Gen2 features
|
|
run.googleapis.com/execution-environment: gen2 # Required for 8 vCPU
|
|
run.googleapis.com/cpu-throttling: "true" # Throttle CPU when idle to reduce costs
|
|
run.googleapis.com/startup-cpu-boost: "true" # Faster cold start
|
|
|
|
spec:
|
|
# Only 1 FFmpeg operation at a time per instance (CPU-intensive)
|
|
containerConcurrency: 1
|
|
|
|
# 10-minute timeout for long encoding operations
|
|
timeoutSeconds: 600
|
|
|
|
serviceAccountName: accessible-video-worker@PROJECT_ID.iam.gserviceaccount.com
|
|
|
|
containers:
|
|
- image: gcr.io/PROJECT_ID/ffmpeg-http-service:latest
|
|
|
|
ports:
|
|
- containerPort: 8080
|
|
|
|
env:
|
|
- name: APP_ENV
|
|
value: "prod"
|
|
- name: PYTHONPATH
|
|
value: "/app"
|
|
- name: PYTHONUNBUFFERED
|
|
value: "1"
|
|
- name: PYTHONDONTWRITEBYTECODE
|
|
value: "1"
|
|
|
|
# GCP Configuration
|
|
- name: GCP_PROJECT_ID
|
|
value: "PROJECT_ID"
|
|
- name: GCS_BUCKET
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: gcs-bucket-name
|
|
key: latest
|
|
|
|
# MongoDB for job tracking (optional, for logging)
|
|
- name: MONGODB_URL
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: mongodb-url
|
|
key: latest
|
|
|
|
# OpenTelemetry configuration
|
|
- name: OTEL_SERVICE_NAME
|
|
value: "ffmpeg-http-service"
|
|
- name: OTEL_SERVICE_VERSION
|
|
value: "1.0.0"
|
|
- name: OTEL_TRACES_EXPORTER
|
|
value: "gcp_trace"
|
|
|
|
# Sentry configuration (optional)
|
|
- name: SENTRY_DSN
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: sentry-dsn
|
|
key: latest
|
|
- name: SENTRY_ENVIRONMENT
|
|
value: "production"
|
|
|
|
resources:
|
|
limits:
|
|
memory: "32Gi"
|
|
cpu: "8000m" # 8 vCPU
|
|
requests:
|
|
memory: "4Gi"
|
|
cpu: "2000m" # 2 vCPU minimum
|
|
|
|
# Health checks
|
|
startupProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: 8080
|
|
initialDelaySeconds: 5 # FFmpeg starts fast (no model to load)
|
|
periodSeconds: 5
|
|
timeoutSeconds: 5
|
|
failureThreshold: 6
|
|
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: 8080
|
|
initialDelaySeconds: 15
|
|
periodSeconds: 30
|
|
timeoutSeconds: 10
|