Create fresh LLM clients per call instead of caching

The previous event loop tracking approach still caused issues - when replacing a cached client, its garbage collection triggers aclose() which tries to close the aiohttp session on the wrong event loop. Simplest fix: create a fresh client for each call. The overhead is minimal compared to the actual LLM API call, and this completely avoids all event loop mismatch issues in ASGI environments. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-19 16:56:36 -06:00 · 2025-12-19 16:56:36 -06:00 · 6ee80e67aa
commit 6ee80e67aa
parent 94f98b837b
1 changed files with 11 additions and 25 deletions
--- a/backend/app/services/llm_service.py
+++ b/backend/app/services/llm_service.py
@ -17,43 +17,29 @@ from typing import Dict, Any, Optional, Union, List
 from PIL import Image
 import io

-# Set up the Gemini API key (client created lazily to avoid event loop issues)
+# Set up API keys
 GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY', 'AIzaSyAc50jzC3k9K1PmKT1vGFi0sCdhhnqsvl0')
-_gemini_client = None
-_gemini_client_loop = None
-
-# Set up OpenAI API key (client created lazily to avoid event loop issues)
 OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'REDACTED_OPENAI_KEY')
-_openai_client = None
-_openai_client_loop = None


 def get_gemini_client():
-    """Get or create the Gemini client for the current event loop.
+    """Create a fresh Gemini client for each call.

-    Recreates the client if the event loop has changed to avoid
-    'Future attached to a different loop' errors in ASGI environments.
+    Creating a new client per call avoids event loop mismatch issues that occur
+    when caching clients in ASGI environments where requests may come on different
+    event loops. The overhead is minimal compared to the LLM API call.
    """
-    global _gemini_client, _gemini_client_loop
-    current_loop = asyncio.get_running_loop()
-    if _gemini_client is None or _gemini_client_loop is not current_loop:
-        _gemini_client = genai.Client(api_key=GEMINI_API_KEY)
-        _gemini_client_loop = current_loop
-    return _gemini_client
+    return genai.Client(api_key=GEMINI_API_KEY)


 def get_openai_client():
-    """Get or create the OpenAI client for the current event loop.
+    """Create a fresh OpenAI client for each call.

-    Recreates the client if the event loop has changed to avoid
-    'Future attached to a different loop' errors in ASGI environments.
+    Creating a new client per call avoids event loop mismatch issues that occur
+    when caching clients in ASGI environments where requests may come on different
+    event loops. The overhead is minimal compared to the LLM API call.
    """
-    global _openai_client, _openai_client_loop
-    current_loop = asyncio.get_running_loop()
-    if _openai_client is None or _openai_client_loop is not current_loop:
-        _openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY, timeout=600.0)
-        _openai_client_loop = current_loop
-    return _openai_client
+    return AsyncOpenAI(api_key=OPENAI_API_KEY, timeout=600.0)

 # The default model we're using
 DEFAULT_MODEL = "gemini-3-pro-preview"