From 1de572fcb0ad27980bf4dd87a8c41f2e8ab56be5 Mon Sep 17 00:00:00 2001 From: Vadym Samoilenko Date: Wed, 18 Mar 2026 13:18:24 +0000 Subject: [PATCH] Switch primary model to flash to fix GCP LB 30s timeout gemini-3.1-pro-preview takes ~25s per call, hitting the GCP load balancer's 30s hard timeout before analysis completes. Flash model returns in ~5-8s, fitting comfortably within the limit. Pro model kept as fallback. Co-Authored-By: Claude Sonnet 4.6 --- backend/app/services/gemini_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/app/services/gemini_service.py b/backend/app/services/gemini_service.py index 49f12c2..53bc807 100755 --- a/backend/app/services/gemini_service.py +++ b/backend/app/services/gemini_service.py @@ -38,8 +38,8 @@ class GeminiService: api_key=api_key, http_options={"timeout": _FALLBACK_TIMEOUT_MS}, ) - self.model = "gemini-3.1-pro-preview" - self.fallback_model = "gemini-3-flash-preview" + self.model = "gemini-3-flash-preview" + self.fallback_model = "gemini-3.1-pro-preview" async def _generate_content( self,