From 74585c5c18be3a9a44d25d51dcae91da25ed1f7f Mon Sep 17 00:00:00 2001
From: Vadym Samoilenko <vadymsamoilenko@oliver.agency>
Date: Mon, 2 Mar 2026 12:49:11 +0000
Subject: [PATCH] Fix Gemini timeout by using HTTP-level timeout on separate
 clients
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

asyncio.wait_for cannot reliably cancel SDK-internal HTTP connections.
Replace with two genai.Client instances — one per model — each configured
with http_options={'timeout': N} so the TCP connection is actually torn
down when the deadline is reached.

Primary model: 45s, Fallback model: 150s

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 backend/app/services/gemini_service.py | 42 +++++++++++++-------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/backend/app/services/gemini_service.py b/backend/app/services/gemini_service.py
index 3a99807..4401541 100755
--- a/backend/app/services/gemini_service.py
+++ b/backend/app/services/gemini_service.py
@@ -11,7 +11,9 @@ from app.models.schemas import SubReview, RagStatus
 # Configure logging
 logger = logging.getLogger(__name__)
 
-# Timeout (seconds) for each Gemini API call
+# Timeout (seconds) for each Gemini API call.
+# Set at the HTTP client level so the network connection is actually closed
+# on timeout — asyncio.wait_for alone cannot cancel SDK-internal retries.
 _PRIMARY_TIMEOUT = 45
 _FALLBACK_TIMEOUT = 150
 
@@ -26,38 +28,36 @@ class GeminiService:
         Args:
             api_key: Google Gemini API key
         """
-        self.client = genai.Client(api_key=api_key)
+        # Two separate clients with different HTTP-level timeouts so the
+        # network connection is torn down when the deadline is reached.
+        self.primary_client = genai.Client(
+            api_key=api_key,
+            http_options={"timeout": _PRIMARY_TIMEOUT},
+        )
+        self.fallback_client = genai.Client(
+            api_key=api_key,
+            http_options={"timeout": _FALLBACK_TIMEOUT},
+        )
         self.model = "gemini-3.1-pro-preview"
         self.fallback_model = "gemini-3-flash-preview"
 
     async def _generate_content(self, contents, config) -> any:
         """Call generate_content, falling back to fallback_model if the primary fails or times out."""
         try:
-            return await asyncio.wait_for(
-                self.client.aio.models.generate_content(
-                    model=self.model,
-                    contents=contents,
-                    config=config,
-                ),
-                timeout=_PRIMARY_TIMEOUT,
-            )
-        except asyncio.TimeoutError:
-            logger.warning(
-                f"[GEMINI API] Primary model {self.model} timed out after {_PRIMARY_TIMEOUT}s. "
-                f"Retrying with fallback {self.fallback_model}"
+            return await self.primary_client.aio.models.generate_content(
+                model=self.model,
+                contents=contents,
+                config=config,
             )
         except Exception as e:
             logger.warning(
                 f"[GEMINI API] Primary model {self.model} failed: {e}. "
                 f"Retrying with fallback {self.fallback_model}"
             )
-        return await asyncio.wait_for(
-            self.client.aio.models.generate_content(
-                model=self.fallback_model,
-                contents=contents,
-                config=config,
-            ),
-            timeout=_FALLBACK_TIMEOUT,
+        return await self.fallback_client.aio.models.generate_content(
+            model=self.fallback_model,
+            contents=contents,
+            config=config,
         )
 
     async def analyze_with_image(