From 6ee80e67aa1f36d0a85f649f92a5a1d4f51088eb Mon Sep 17 00:00:00 2001 From: michael Date: Fri, 19 Dec 2025 16:56:36 -0600 Subject: [PATCH] Create fresh LLM clients per call instead of caching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous event loop tracking approach still caused issues - when replacing a cached client, its garbage collection triggers aclose() which tries to close the aiohttp session on the wrong event loop. Simplest fix: create a fresh client for each call. The overhead is minimal compared to the actual LLM API call, and this completely avoids all event loop mismatch issues in ASGI environments. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- backend/app/services/llm_service.py | 36 +++++++++-------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/backend/app/services/llm_service.py b/backend/app/services/llm_service.py index 1dcf6a54..b6fb9d51 100755 --- a/backend/app/services/llm_service.py +++ b/backend/app/services/llm_service.py @@ -17,43 +17,29 @@ from typing import Dict, Any, Optional, Union, List from PIL import Image import io -# Set up the Gemini API key (client created lazily to avoid event loop issues) +# Set up API keys GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY', 'AIzaSyAc50jzC3k9K1PmKT1vGFi0sCdhhnqsvl0') -_gemini_client = None -_gemini_client_loop = None - -# Set up OpenAI API key (client created lazily to avoid event loop issues) OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'REDACTED_OPENAI_KEY') -_openai_client = None -_openai_client_loop = None def get_gemini_client(): - """Get or create the Gemini client for the current event loop. + """Create a fresh Gemini client for each call. - Recreates the client if the event loop has changed to avoid - 'Future attached to a different loop' errors in ASGI environments. + Creating a new client per call avoids event loop mismatch issues that occur + when caching clients in ASGI environments where requests may come on different + event loops. The overhead is minimal compared to the LLM API call. """ - global _gemini_client, _gemini_client_loop - current_loop = asyncio.get_running_loop() - if _gemini_client is None or _gemini_client_loop is not current_loop: - _gemini_client = genai.Client(api_key=GEMINI_API_KEY) - _gemini_client_loop = current_loop - return _gemini_client + return genai.Client(api_key=GEMINI_API_KEY) def get_openai_client(): - """Get or create the OpenAI client for the current event loop. + """Create a fresh OpenAI client for each call. - Recreates the client if the event loop has changed to avoid - 'Future attached to a different loop' errors in ASGI environments. + Creating a new client per call avoids event loop mismatch issues that occur + when caching clients in ASGI environments where requests may come on different + event loops. The overhead is minimal compared to the LLM API call. """ - global _openai_client, _openai_client_loop - current_loop = asyncio.get_running_loop() - if _openai_client is None or _openai_client_loop is not current_loop: - _openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY, timeout=600.0) - _openai_client_loop = current_loop - return _openai_client + return AsyncOpenAI(api_key=OPENAI_API_KEY, timeout=600.0) # The default model we're using DEFAULT_MODEL = "gemini-3-pro-preview"