video-accessibility/infra/cloud-run/ffmpeg-http-service.yaml

# =============================================================================
# Cloud Run Service: FFmpeg HTTP Service
# =============================================================================
# Autoscaling FFmpeg processing service for Cloud Run deployment.
# This service handles CPU-intensive video encoding via HTTP endpoints.
#
# Key features:
# - Scale to zero when idle (pay only for compute time used)
# - Up to 20 instances for parallel video processing
# - 8 vCPU / 32GB RAM for fast encoding
# - Startup CPU boost for faster cold starts
# - Faster startup than Whisper (no model loading)
# =============================================================================

apiVersion: serving.knative.dev/v1
kind: Service
metadata:
  name: ffmpeg-http-service
  annotations:
    run.googleapis.com/ingress: internal  # Only accessible from within GCP
    run.googleapis.com/execution-environment: gen2  # Required for 8 vCPU
spec:
  template:
    metadata:
      annotations:
        # Autoscaling configuration
        autoscaling.knative.dev/minScale: "0"  # Scale to zero when idle
        autoscaling.knative.dev/maxScale: "20"  # Max 20 concurrent instances

        # Cloud Run Gen2 features
        run.googleapis.com/execution-environment: gen2  # Required for 8 vCPU
        run.googleapis.com/cpu-throttling: "true"  # Throttle CPU when idle to reduce costs
        run.googleapis.com/startup-cpu-boost: "true"  # Faster cold start

    spec:
      # Only 1 FFmpeg operation at a time per instance (CPU-intensive)
      containerConcurrency: 1

      # 10-minute timeout for long encoding operations
      timeoutSeconds: 600

      serviceAccountName: accessible-video-worker@PROJECT_ID.iam.gserviceaccount.com

      containers:
      - image: gcr.io/PROJECT_ID/ffmpeg-http-service:latest

        ports:
        - containerPort: 8080

        env:
        - name: APP_ENV
          value: "prod"
        - name: PYTHONPATH
          value: "/app"
        - name: PYTHONUNBUFFERED
          value: "1"
        - name: PYTHONDONTWRITEBYTECODE
          value: "1"

        # GCP Configuration
        - name: GCP_PROJECT_ID
          value: "PROJECT_ID"
        - name: GCS_BUCKET
          valueFrom:
            secretKeyRef:
              name: gcs-bucket-name
              key: latest

        # MongoDB for job tracking (optional, for logging)
        - name: MONGODB_URL
          valueFrom:
            secretKeyRef:
              name: mongodb-url
              key: latest

        # OpenTelemetry configuration
        - name: OTEL_SERVICE_NAME
          value: "ffmpeg-http-service"
        - name: OTEL_SERVICE_VERSION
          value: "1.0.0"
        - name: OTEL_TRACES_EXPORTER
          value: "gcp_trace"

        # Sentry configuration (optional)
        - name: SENTRY_DSN
          valueFrom:
            secretKeyRef:
              name: sentry-dsn
              key: latest
        - name: SENTRY_ENVIRONMENT
          value: "production"

        resources:
          limits:
            memory: "32Gi"
            cpu: "8000m"  # 8 vCPU
          requests:
            memory: "4Gi"
            cpu: "2000m"  # 2 vCPU minimum

        # Health checks
        startupProbe:
          httpGet:
            path: /health
            port: 8080
          initialDelaySeconds: 5  # FFmpeg starts fast (no model to load)
          periodSeconds: 5
          timeoutSeconds: 5
          failureThreshold: 6

        livenessProbe:
          httpGet:
            path: /health
            port: 8080
          initialDelaySeconds: 15
          periodSeconds: 30
          timeoutSeconds: 10