modcomms/cloudrun.yaml

apiVersion: serving.knative.dev/v1
kind: Service
metadata:
  name: modcomms-backend
  annotations:
    # Allow unauthenticated access (frontend connects directly)
    run.googleapis.com/ingress: all
spec:
  template:
    metadata:
      annotations:
        # Keep 1 instance warm to prevent cold-start latency
        autoscaling.knative.dev/minScale: "1"
        autoscaling.knative.dev/maxScale: "10"
        # Each instance handles up to 10 concurrent analyses
        autoscaling.knative.dev/target: "10"
        run.googleapis.com/execution-environment: gen2
    spec:
      # 10-minute timeout — analysis (4 agents + lead agent) can take 2-3 minutes
      # for large multi-page PDFs; 600s gives headroom without being excessive
      timeoutSeconds: 600
      # Gemini API calls are I/O-bound; 10 concurrent slots prevents queuing at low traffic
      containerConcurrency: 10
      containers:
        - image: gcr.io/YOUR_PROJECT_ID/modcomms-backend:latest
          ports:
            - containerPort: 8000
          startupProbe:
            httpGet:
              path: /health
              port: 8000
            initialDelaySeconds: 5
            periodSeconds: 5
            failureThreshold: 10
          resources:
            limits:
              # 2 vCPU + 4Gi RAM: handles PDF rasterisation and parallel agent calls
              cpu: "2"
              memory: 4Gi
          env:
            # ── Gemini ────────────────────────────────────────────────────────
            - name: GEMINI_API_KEY
              valueFrom:
                secretKeyRef:
                  name: gemini-api-key
                  key: latest
            # ── Database ─────────────────────────────────────────────────────
            - name: DATABASE_URL
              valueFrom:
                secretKeyRef:
                  name: database-url
                  key: latest
            # ── Azure AD auth ─────────────────────────────────────────────────
            - name: AZURE_TENANT_ID
              valueFrom:
                secretKeyRef:
                  name: azure-tenant-id
                  key: latest
            - name: AZURE_CLIENT_ID
              valueFrom:
                secretKeyRef:
                  name: azure-client-id
                  key: latest
            # ── App settings ──────────────────────────────────────────────────
            - name: CORS_ORIGINS
              value: "https://YOUR_FRONTEND_DOMAIN"
            - name: HOST
              value: "0.0.0.0"
            - name: PORT
              value: "8000"
            # ── Dev/staging only ──────────────────────────────────────────────
            # Uncomment to disable Azure AD auth (e.g. staging environment):
            # - name: DISABLE_AUTH
            #   value: "true"