apiVersion: serving.knative.dev/v1 kind: Service metadata: name: modcomms-backend annotations: # Allow unauthenticated access (frontend connects directly) run.googleapis.com/ingress: all spec: template: metadata: annotations: # Keep 1 instance warm to prevent cold-start latency autoscaling.knative.dev/minScale: "1" autoscaling.knative.dev/maxScale: "10" # Each instance handles up to 10 concurrent analyses autoscaling.knative.dev/target: "10" run.googleapis.com/execution-environment: gen2 spec: # 10-minute timeout — analysis (4 agents + lead agent) can take 2-3 minutes # for large multi-page PDFs; 600s gives headroom without being excessive timeoutSeconds: 600 # Gemini API calls are I/O-bound; 10 concurrent slots prevents queuing at low traffic containerConcurrency: 10 containers: - image: gcr.io/YOUR_PROJECT_ID/modcomms-backend:latest ports: - containerPort: 8000 startupProbe: httpGet: path: /health port: 8000 initialDelaySeconds: 5 periodSeconds: 5 failureThreshold: 10 resources: limits: # 2 vCPU + 4Gi RAM: handles PDF rasterisation and parallel agent calls cpu: "2" memory: 4Gi env: # ── Gemini ──────────────────────────────────────────────────────── - name: GEMINI_API_KEY valueFrom: secretKeyRef: name: gemini-api-key key: latest # ── Database ───────────────────────────────────────────────────── - name: DATABASE_URL valueFrom: secretKeyRef: name: database-url key: latest # ── Azure AD auth ───────────────────────────────────────────────── - name: AZURE_TENANT_ID valueFrom: secretKeyRef: name: azure-tenant-id key: latest - name: AZURE_CLIENT_ID valueFrom: secretKeyRef: name: azure-client-id key: latest # ── App settings ────────────────────────────────────────────────── - name: CORS_ORIGINS value: "https://YOUR_FRONTEND_DOMAIN" - name: HOST value: "0.0.0.0" - name: PORT value: "8000" # ── Dev/staging only ────────────────────────────────────────────── # Uncomment to disable Azure AD auth (e.g. staging environment): # - name: DISABLE_AUTH # value: "true"