diff --git a/backend/app/services/gemini_service.py b/backend/app/services/gemini_service.py index 13939e2..14b7752 100755 --- a/backend/app/services/gemini_service.py +++ b/backend/app/services/gemini_service.py @@ -23,6 +23,26 @@ class GeminiService: """ self.client = genai.Client(api_key=api_key) self.model = "gemini-3.1-pro-preview" + self.fallback_model = "gemini-3-flash-preview" + + async def _generate_content(self, contents, config) -> any: + """Call generate_content, falling back to fallback_model if the primary fails.""" + try: + return await self.client.aio.models.generate_content( + model=self.model, + contents=contents, + config=config, + ) + except Exception as e: + logger.warning( + f"[GEMINI API] Primary model {self.model} failed: {e}. " + f"Retrying with fallback {self.fallback_model}" + ) + return await self.client.aio.models.generate_content( + model=self.fallback_model, + contents=contents, + config=config, + ) async def analyze_with_image( self, @@ -102,13 +122,12 @@ class GeminiService: # Make the API call logger.info(f"[GEMINI API] Calling Gemini model: {self.model}") - response = await self.client.aio.models.generate_content( - model=self.model, + response = await self._generate_content( contents=[file_part, prompt], config=types.GenerateContentConfig( response_mime_type="application/json", response_schema=response_schema - ) + ), ) logger.info(f"[GEMINI API] Response received from Gemini") @@ -236,13 +255,12 @@ class GeminiService: # Make the API call logger.info(f"[GEMINI API] Calling Gemini model: {self.model} with {len(images)} images") - response = await self.client.aio.models.generate_content( - model=self.model, + response = await self._generate_content( contents=contents, config=types.GenerateContentConfig( response_mime_type="application/json", response_schema=response_schema - ) + ), ) logger.info(f"[GEMINI API] Response received from Gemini (multi-image)") @@ -318,13 +336,12 @@ class GeminiService: "required": ["overallStatus", "summary"] } - response = await self.client.aio.models.generate_content( - model=self.model, + response = await self._generate_content( contents=prompt, config=types.GenerateContentConfig( response_mime_type="application/json", response_schema=response_schema - ) + ), ) result = json.loads(response.text.strip()) diff --git a/cloudrun.yaml b/cloudrun.yaml new file mode 100644 index 0000000..e45303f --- /dev/null +++ b/cloudrun.yaml @@ -0,0 +1,63 @@ +apiVersion: serving.knative.dev/v1 +kind: Service +metadata: + name: modcomms-backend + annotations: + # Allow unauthenticated access (frontend connects directly) + run.googleapis.com/ingress: all +spec: + template: + metadata: + annotations: + # Keep 1 instance warm to prevent cold-start WebSocket failures + autoscaling.knative.dev/minScale: "1" + autoscaling.knative.dev/maxScale: "10" + # Each instance handles up to 4 concurrent analyses (one per WebSocket) + autoscaling.knative.dev/target: "4" + # Required for WebSocket: disable HTTP/2 multiplexing + run.googleapis.com/execution-environment: gen2 + spec: + # 10-minute timeout — analysis (4 agents + lead agent) can take 2-3 minutes + # for large multi-page PDFs; 600s gives headroom without being excessive + timeoutSeconds: 600 + containerConcurrency: 4 + containers: + - image: gcr.io/YOUR_PROJECT_ID/modcomms-backend:latest + ports: + - containerPort: 8000 + resources: + limits: + # 2 vCPU + 4Gi RAM: handles PDF rasterisation and parallel agent calls + cpu: "2" + memory: 4Gi + env: + # ── Gemini ──────────────────────────────────────────────────────── + - name: GEMINI_API_KEY + valueFrom: + secretKeyRef: + name: gemini-api-key + key: latest + # ── Database ───────────────────────────────────────────────────── + - name: DATABASE_URL + valueFrom: + secretKeyRef: + name: database-url + key: latest + # ── Azure AD auth ───────────────────────────────────────────────── + - name: AZURE_TENANT_ID + valueFrom: + secretKeyRef: + name: azure-tenant-id + key: latest + - name: AZURE_CLIENT_ID + valueFrom: + secretKeyRef: + name: azure-client-id + key: latest + # ── App settings ────────────────────────────────────────────────── + - name: CORS_ORIGINS + value: "https://YOUR_FRONTEND_DOMAIN" + - name: HOST + value: "0.0.0.0" + - name: PORT + value: "8000"