Fix Gemini timeout by using HTTP-level timeout on separate clients

asyncio.wait_for cannot reliably cancel SDK-internal HTTP connections.
Replace with two genai.Client instances — one per model — each configured
with http_options={'timeout': N} so the TCP connection is actually torn
down when the deadline is reached.

Primary model: 45s, Fallback model: 150s

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-03-02 12:49:11 +00:00
parent a9bd6a2775
commit 74585c5c18

View file

@ -11,7 +11,9 @@ from app.models.schemas import SubReview, RagStatus
# Configure logging
logger = logging.getLogger(__name__)
# Timeout (seconds) for each Gemini API call
# Timeout (seconds) for each Gemini API call.
# Set at the HTTP client level so the network connection is actually closed
# on timeout — asyncio.wait_for alone cannot cancel SDK-internal retries.
_PRIMARY_TIMEOUT = 45
_FALLBACK_TIMEOUT = 150
@ -26,38 +28,36 @@ class GeminiService:
Args:
api_key: Google Gemini API key
"""
self.client = genai.Client(api_key=api_key)
# Two separate clients with different HTTP-level timeouts so the
# network connection is torn down when the deadline is reached.
self.primary_client = genai.Client(
api_key=api_key,
http_options={"timeout": _PRIMARY_TIMEOUT},
)
self.fallback_client = genai.Client(
api_key=api_key,
http_options={"timeout": _FALLBACK_TIMEOUT},
)
self.model = "gemini-3.1-pro-preview"
self.fallback_model = "gemini-3-flash-preview"
async def _generate_content(self, contents, config) -> any:
"""Call generate_content, falling back to fallback_model if the primary fails or times out."""
try:
return await asyncio.wait_for(
self.client.aio.models.generate_content(
model=self.model,
contents=contents,
config=config,
),
timeout=_PRIMARY_TIMEOUT,
)
except asyncio.TimeoutError:
logger.warning(
f"[GEMINI API] Primary model {self.model} timed out after {_PRIMARY_TIMEOUT}s. "
f"Retrying with fallback {self.fallback_model}"
return await self.primary_client.aio.models.generate_content(
model=self.model,
contents=contents,
config=config,
)
except Exception as e:
logger.warning(
f"[GEMINI API] Primary model {self.model} failed: {e}. "
f"Retrying with fallback {self.fallback_model}"
)
return await asyncio.wait_for(
self.client.aio.models.generate_content(
model=self.fallback_model,
contents=contents,
config=config,
),
timeout=_FALLBACK_TIMEOUT,
return await self.fallback_client.aio.models.generate_content(
model=self.fallback_model,
contents=contents,
config=config,
)
async def analyze_with_image(