- Add 25s heartbeat ping from backend to prevent Apache/proxy idle-timeout killing the connection during 1-3 min analysis runs - Handle heartbeat silently in both analyzeProof and analyzeWIPProof frontend handlers - Run PDF rasterization via asyncio.to_thread so heartbeats aren't blocked - Wrap analyze_proof with asyncio.wait_for(timeout=300) for a hard 5-min cap - Log dropped send_message calls in ConnectionManager instead of swallowing silently - cloudrun.yaml: add sessionAffinity, startup probe, raise containerConcurrency 4→10, document DISABLE_AUTH option Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
77 lines
3.5 KiB
YAML
77 lines
3.5 KiB
YAML
apiVersion: serving.knative.dev/v1
|
|
kind: Service
|
|
metadata:
|
|
name: modcomms-backend
|
|
annotations:
|
|
# Allow unauthenticated access (frontend connects directly)
|
|
run.googleapis.com/ingress: all
|
|
spec:
|
|
template:
|
|
metadata:
|
|
annotations:
|
|
# Keep 1 instance warm to prevent cold-start WebSocket failures
|
|
autoscaling.knative.dev/minScale: "1"
|
|
autoscaling.knative.dev/maxScale: "10"
|
|
# Each instance handles up to 10 concurrent analyses (one per WebSocket)
|
|
autoscaling.knative.dev/target: "10"
|
|
# Required for WebSocket: disable HTTP/2 multiplexing
|
|
run.googleapis.com/execution-environment: gen2
|
|
# Required for WebSocket: route all frames from a client to the same instance
|
|
run.googleapis.com/sessionAffinity: "true"
|
|
spec:
|
|
# 10-minute timeout — analysis (4 agents + lead agent) can take 2-3 minutes
|
|
# for large multi-page PDFs; 600s gives headroom without being excessive
|
|
timeoutSeconds: 600
|
|
# Gemini API calls are I/O-bound; 10 concurrent slots prevents queuing at low traffic
|
|
containerConcurrency: 10
|
|
containers:
|
|
- image: gcr.io/YOUR_PROJECT_ID/modcomms-backend:latest
|
|
ports:
|
|
- containerPort: 8000
|
|
startupProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: 8000
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 5
|
|
failureThreshold: 10
|
|
resources:
|
|
limits:
|
|
# 2 vCPU + 4Gi RAM: handles PDF rasterisation and parallel agent calls
|
|
cpu: "2"
|
|
memory: 4Gi
|
|
env:
|
|
# ── Gemini ────────────────────────────────────────────────────────
|
|
- name: GEMINI_API_KEY
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: gemini-api-key
|
|
key: latest
|
|
# ── Database ─────────────────────────────────────────────────────
|
|
- name: DATABASE_URL
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: database-url
|
|
key: latest
|
|
# ── Azure AD auth ─────────────────────────────────────────────────
|
|
- name: AZURE_TENANT_ID
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: azure-tenant-id
|
|
key: latest
|
|
- name: AZURE_CLIENT_ID
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: azure-client-id
|
|
key: latest
|
|
# ── App settings ──────────────────────────────────────────────────
|
|
- name: CORS_ORIGINS
|
|
value: "https://YOUR_FRONTEND_DOMAIN"
|
|
- name: HOST
|
|
value: "0.0.0.0"
|
|
- name: PORT
|
|
value: "8000"
|
|
# ── Dev/staging only ──────────────────────────────────────────────
|
|
# Uncomment to disable Azure AD auth (e.g. staging environment):
|
|
# - name: DISABLE_AUTH
|
|
# value: "true"
|