From 6ee80e67aa1f36d0a85f649f92a5a1d4f51088eb Mon Sep 17 00:00:00 2001
From: michael <michael@modernfreedom.com>
Date: Fri, 19 Dec 2025 16:56:36 -0600
Subject: [PATCH] Create fresh LLM clients per call instead of caching
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous event loop tracking approach still caused issues - when replacing
a cached client, its garbage collection triggers aclose() which tries to close
the aiohttp session on the wrong event loop.

Simplest fix: create a fresh client for each call. The overhead is minimal
compared to the actual LLM API call, and this completely avoids all event
loop mismatch issues in ASGI environments.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/app/services/llm_service.py | 36 +++++++++--------------------
 1 file changed, 11 insertions(+), 25 deletions(-)

diff --git a/backend/app/services/llm_service.py b/backend/app/services/llm_service.py
index 1dcf6a54..b6fb9d51 100755
--- a/backend/app/services/llm_service.py
+++ b/backend/app/services/llm_service.py
@@ -17,43 +17,29 @@ from typing import Dict, Any, Optional, Union, List
 from PIL import Image
 import io
 
-# Set up the Gemini API key (client created lazily to avoid event loop issues)
+# Set up API keys
 GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY', 'AIzaSyAc50jzC3k9K1PmKT1vGFi0sCdhhnqsvl0')
-_gemini_client = None
-_gemini_client_loop = None
-
-# Set up OpenAI API key (client created lazily to avoid event loop issues)
 OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'REDACTED_OPENAI_KEY')
-_openai_client = None
-_openai_client_loop = None
 
 
 def get_gemini_client():
-    """Get or create the Gemini client for the current event loop.
+    """Create a fresh Gemini client for each call.
 
-    Recreates the client if the event loop has changed to avoid
-    'Future attached to a different loop' errors in ASGI environments.
+    Creating a new client per call avoids event loop mismatch issues that occur
+    when caching clients in ASGI environments where requests may come on different
+    event loops. The overhead is minimal compared to the LLM API call.
     """
-    global _gemini_client, _gemini_client_loop
-    current_loop = asyncio.get_running_loop()
-    if _gemini_client is None or _gemini_client_loop is not current_loop:
-        _gemini_client = genai.Client(api_key=GEMINI_API_KEY)
-        _gemini_client_loop = current_loop
-    return _gemini_client
+    return genai.Client(api_key=GEMINI_API_KEY)
 
 
 def get_openai_client():
-    """Get or create the OpenAI client for the current event loop.
+    """Create a fresh OpenAI client for each call.
 
-    Recreates the client if the event loop has changed to avoid
-    'Future attached to a different loop' errors in ASGI environments.
+    Creating a new client per call avoids event loop mismatch issues that occur
+    when caching clients in ASGI environments where requests may come on different
+    event loops. The overhead is minimal compared to the LLM API call.
     """
-    global _openai_client, _openai_client_loop
-    current_loop = asyncio.get_running_loop()
-    if _openai_client is None or _openai_client_loop is not current_loop:
-        _openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY, timeout=600.0)
-        _openai_client_loop = current_loop
-    return _openai_client
+    return AsyncOpenAI(api_key=OPENAI_API_KEY, timeout=600.0)
 
 # The default model we're using
 DEFAULT_MODEL = "gemini-3-pro-preview"