Fix Gemini timeout by using HTTP-level timeout on separate clients

asyncio.wait_for cannot reliably cancel SDK-internal HTTP connections. Replace with two genai.Client instances — one per model — each configured with http_options={'timeout': N} so the TCP connection is actually torn down when the deadline is reached. Primary model: 45s, Fallback model: 150s Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-02 12:49:11 +00:00 · 2026-03-02 12:49:11 +00:00 · 74585c5c18
commit 74585c5c18
parent a9bd6a2775
1 changed files with 21 additions and 21 deletions
--- a/backend/app/services/gemini_service.py
+++ b/backend/app/services/gemini_service.py
@ -11,7 +11,9 @@ from app.models.schemas import SubReview, RagStatus
 # Configure logging
 logger = logging.getLogger(__name__)

-# Timeout (seconds) for each Gemini API call
+# Timeout (seconds) for each Gemini API call.
+# Set at the HTTP client level so the network connection is actually closed
+# on timeout — asyncio.wait_for alone cannot cancel SDK-internal retries.
 _PRIMARY_TIMEOUT = 45
 _FALLBACK_TIMEOUT = 150

@ -26,38 +28,36 @@ class GeminiService:
        Args:
            api_key: Google Gemini API key
        """
-        self.client = genai.Client(api_key=api_key)
+        # Two separate clients with different HTTP-level timeouts so the
+        # network connection is torn down when the deadline is reached.
+        self.primary_client = genai.Client(
+            api_key=api_key,
+            http_options={"timeout": _PRIMARY_TIMEOUT},
+        )
+        self.fallback_client = genai.Client(
+            api_key=api_key,
+            http_options={"timeout": _FALLBACK_TIMEOUT},
+        )
        self.model = "gemini-3.1-pro-preview"
        self.fallback_model = "gemini-3-flash-preview"

    async def _generate_content(self, contents, config) -> any:
        """Call generate_content, falling back to fallback_model if the primary fails or times out."""
        try:
-            return await asyncio.wait_for(
-                self.client.aio.models.generate_content(
-                    model=self.model,
-                    contents=contents,
-                    config=config,
-                ),
-                timeout=_PRIMARY_TIMEOUT,
-            )
-        except asyncio.TimeoutError:
-            logger.warning(
-                f"[GEMINI API] Primary model {self.model} timed out after {_PRIMARY_TIMEOUT}s. "
-                f"Retrying with fallback {self.fallback_model}"
+            return await self.primary_client.aio.models.generate_content(
+                model=self.model,
+                contents=contents,
+                config=config,
            )
        except Exception as e:
            logger.warning(
                f"[GEMINI API] Primary model {self.model} failed: {e}. "
                f"Retrying with fallback {self.fallback_model}"
            )
-        return await asyncio.wait_for(
-            self.client.aio.models.generate_content(
-                model=self.fallback_model,
-                contents=contents,
-                config=config,
-            ),
-            timeout=_FALLBACK_TIMEOUT,
+        return await self.fallback_client.aio.models.generate_content(
+            model=self.fallback_model,
+            contents=contents,
+            config=config,
        )

    async def analyze_with_image(