From 74585c5c18be3a9a44d25d51dcae91da25ed1f7f Mon Sep 17 00:00:00 2001 From: Vadym Samoilenko Date: Mon, 2 Mar 2026 12:49:11 +0000 Subject: [PATCH] Fix Gemini timeout by using HTTP-level timeout on separate clients MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit asyncio.wait_for cannot reliably cancel SDK-internal HTTP connections. Replace with two genai.Client instances — one per model — each configured with http_options={'timeout': N} so the TCP connection is actually torn down when the deadline is reached. Primary model: 45s, Fallback model: 150s Co-Authored-By: Claude Sonnet 4.6 --- backend/app/services/gemini_service.py | 42 +++++++++++++------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/backend/app/services/gemini_service.py b/backend/app/services/gemini_service.py index 3a99807..4401541 100755 --- a/backend/app/services/gemini_service.py +++ b/backend/app/services/gemini_service.py @@ -11,7 +11,9 @@ from app.models.schemas import SubReview, RagStatus # Configure logging logger = logging.getLogger(__name__) -# Timeout (seconds) for each Gemini API call +# Timeout (seconds) for each Gemini API call. +# Set at the HTTP client level so the network connection is actually closed +# on timeout — asyncio.wait_for alone cannot cancel SDK-internal retries. _PRIMARY_TIMEOUT = 45 _FALLBACK_TIMEOUT = 150 @@ -26,38 +28,36 @@ class GeminiService: Args: api_key: Google Gemini API key """ - self.client = genai.Client(api_key=api_key) + # Two separate clients with different HTTP-level timeouts so the + # network connection is torn down when the deadline is reached. + self.primary_client = genai.Client( + api_key=api_key, + http_options={"timeout": _PRIMARY_TIMEOUT}, + ) + self.fallback_client = genai.Client( + api_key=api_key, + http_options={"timeout": _FALLBACK_TIMEOUT}, + ) self.model = "gemini-3.1-pro-preview" self.fallback_model = "gemini-3-flash-preview" async def _generate_content(self, contents, config) -> any: """Call generate_content, falling back to fallback_model if the primary fails or times out.""" try: - return await asyncio.wait_for( - self.client.aio.models.generate_content( - model=self.model, - contents=contents, - config=config, - ), - timeout=_PRIMARY_TIMEOUT, - ) - except asyncio.TimeoutError: - logger.warning( - f"[GEMINI API] Primary model {self.model} timed out after {_PRIMARY_TIMEOUT}s. " - f"Retrying with fallback {self.fallback_model}" + return await self.primary_client.aio.models.generate_content( + model=self.model, + contents=contents, + config=config, ) except Exception as e: logger.warning( f"[GEMINI API] Primary model {self.model} failed: {e}. " f"Retrying with fallback {self.fallback_model}" ) - return await asyncio.wait_for( - self.client.aio.models.generate_content( - model=self.fallback_model, - contents=contents, - config=config, - ), - timeout=_FALLBACK_TIMEOUT, + return await self.fallback_client.aio.models.generate_content( + model=self.fallback_model, + contents=contents, + config=config, ) async def analyze_with_image(