apiVersion: serving.knative.dev/v1 kind: Service metadata: name: modcomms-backend annotations: # Allow unauthenticated access (frontend connects directly) run.googleapis.com/ingress: all spec: template: metadata: annotations: # Keep 1 instance warm to prevent cold-start WebSocket failures autoscaling.knative.dev/minScale: "1" autoscaling.knative.dev/maxScale: "10" # Each instance handles up to 4 concurrent analyses (one per WebSocket) autoscaling.knative.dev/target: "4" # Required for WebSocket: disable HTTP/2 multiplexing run.googleapis.com/execution-environment: gen2 spec: # 10-minute timeout — analysis (4 agents + lead agent) can take 2-3 minutes # for large multi-page PDFs; 600s gives headroom without being excessive timeoutSeconds: 600 containerConcurrency: 4 containers: - image: gcr.io/YOUR_PROJECT_ID/modcomms-backend:latest ports: - containerPort: 8000 resources: limits: # 2 vCPU + 4Gi RAM: handles PDF rasterisation and parallel agent calls cpu: "2" memory: 4Gi env: # ── Gemini ──────────────────────────────────────────────────────── - name: GEMINI_API_KEY valueFrom: secretKeyRef: name: gemini-api-key key: latest # ── Database ───────────────────────────────────────────────────── - name: DATABASE_URL valueFrom: secretKeyRef: name: database-url key: latest # ── Azure AD auth ───────────────────────────────────────────────── - name: AZURE_TENANT_ID valueFrom: secretKeyRef: name: azure-tenant-id key: latest - name: AZURE_CLIENT_ID valueFrom: secretKeyRef: name: azure-client-id key: latest # ── App settings ────────────────────────────────────────────────── - name: CORS_ORIGINS value: "https://YOUR_FRONTEND_DOMAIN" - name: HOST value: "0.0.0.0" - name: PORT value: "8000"