Fix asyncio event loop mismatch in LLM service

The genai.Client and AsyncOpenAI clients were being created at module import time, before the Quart/Hypercorn event loop existed. This caused "Future attached to a different loop" errors when async calls were made, resulting in autonomous focus group conversations stopping with "excessive_silence". Changed to lazy initialization - clients are now created on first use within the running event loop context via get_gemini_client() and get_openai_client() helper functions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-19 13:24:23 -06:00 · 2025-12-19 13:24:23 -06:00 · 45c7a52aeb
commit 45c7a52aeb
parent bb16165a83
1 changed files with 65 additions and 49 deletions
--- a/backend/app/services/llm_service.py
+++ b/backend/app/services/llm_service.py
@ -17,13 +17,29 @@ from typing import Dict, Any, Optional, Union, List
 from PIL import Image
 import io

-# Set up the Gemini API key and client
+# Set up the Gemini API key (client created lazily to avoid event loop issues)
 GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY', 'AIzaSyAc50jzC3k9K1PmKT1vGFi0sCdhhnqsvl0')
-gemini_client = genai.Client(api_key=GEMINI_API_KEY)
+_gemini_client = None

-# Set up OpenAI API key
+# Set up OpenAI API key (client created lazily to avoid event loop issues)
 OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'REDACTED_OPENAI_KEY')
-openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY, timeout=600.0)
+_openai_client = None
+
+
+def get_gemini_client():
+    """Get or create the Gemini client lazily within the running event loop."""
+    global _gemini_client
+    if _gemini_client is None:
+        _gemini_client = genai.Client(api_key=GEMINI_API_KEY)
+    return _gemini_client
+
+
+def get_openai_client():
+    """Get or create the OpenAI client lazily within the running event loop."""
+    global _openai_client
+    if _openai_client is None:
+        _openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY, timeout=600.0)
+    return _openai_client

 # The default model we're using
 DEFAULT_MODEL = "gemini-3-pro-preview"
@ -202,45 +218,45 @@ class LLMService:
                        
                        # Note: GPT-5 Responses API does not support max_tokens parameter
                        
-                        response = await openai_client.responses.create(**kwargs)
+                        response = await get_openai_client().responses.create(**kwargs)
                        result = LLMService._extract_responses_api_content(response)
-                        
+
                    else:
                        # Use Chat Completions API for non-GPT-5 models
                        messages = []
                        if system_prompt:
                            messages.append({"role": "system", "content": system_prompt})
                        messages.append({"role": "user", "content": prompt})
-                        
+
                        kwargs = {
                            "model": actual_model,
                            "messages": messages,
                            "temperature": temperature,
                        }
-                        
+
                        if max_tokens:
                            kwargs["max_tokens"] = max_tokens
-                        
-                        response = await openai_client.chat.completions.create(**kwargs)
+
+                        response = await get_openai_client().chat.completions.create(**kwargs)
                        result = response.choices[0].message.content.strip()
-                    
+
                else:
                    # New Google GenAI SDK - async call
                    config = genai.types.GenerateContentConfig(
                        temperature=temperature,
                    )
-                    
+
                    if max_tokens:
                        config.max_output_tokens = max_tokens
-                    
+
                    # Prepare the prompt - combine system prompt with user prompt if needed
                    if system_prompt:
                        combined_prompt = f"System: {system_prompt}\n\nUser: {prompt}"
                    else:
                        combined_prompt = prompt
-                    
+
                    # Make async call to new GenAI SDK
-                    response = await gemini_client.aio.models.generate_content(
+                    response = await get_gemini_client().aio.models.generate_content(
                        model=actual_model,
                        contents=combined_prompt,
                        config=config
@ -488,70 +504,70 @@ class LLMService:
                        }
                        
                        # Note: GPT-5 Responses API does not support max_tokens parameter
-                        
-                        response = await openai_client.responses.create(**kwargs)
+
+                        response = await get_openai_client().responses.create(**kwargs)
                        result = LLMService._extract_responses_api_content(response)
-                        
+
                    else:
                        # Use Chat Completions API for non-GPT-5 models
                        content = [{"type": "text", "text": prompt}]
                        content.extend(image_content)
-                        
+
                        kwargs = {
                            "model": actual_model,
                            "messages": [{"role": "user", "content": content}],
                            "temperature": temperature,
                        }
-                        
+
                        if max_tokens:
                            kwargs["max_tokens"] = max_tokens
-                        
-                        response = await openai_client.chat.completions.create(**kwargs)
+
+                        response = await get_openai_client().chat.completions.create(**kwargs)
                        result = response.choices[0].message.content.strip()
-                    
+
                else:
                    # New Google GenAI SDK - multimodal async call
                    config = genai.types.GenerateContentConfig(
                        temperature=temperature,
                    )
-                    
+
                    if max_tokens:
                        config.max_output_tokens = max_tokens
-                    
+
                    # Prepare multimodal content for new SDK
                    content_parts = []
-                    
+
                    # Add text prompt
                    content_parts.append(genai.types.Part.from_text(prompt))
-                    
+
                    # Add images
                    for image_path in image_paths:
                        try:
                            if not os.path.exists(image_path):
                                raise LLMServiceError(f"Image file not found: {image_path}")
-                            
+
                            # Read image data for new SDK
                            with open(image_path, 'rb') as img_file:
                                image_data = img_file.read()
-                            
+
                            # Determine MIME type from file extension
                            ext = os.path.splitext(image_path)[1].lower()
                            mime_type = {
                                '.jpg': 'image/jpeg',
-                                '.jpeg': 'image/jpeg', 
+                                '.jpeg': 'image/jpeg',
                                '.png': 'image/png',
                                '.gif': 'image/gif',
                                '.webp': 'image/webp'
                            }.get(ext, 'image/jpeg')  # Default to JPEG
-                            
+
                            content_parts.append(genai.types.Part.from_bytes(image_data, mime_type=mime_type))
                            logger.debug(f"Successfully loaded image for new GenAI SDK: {image_path}")
-                            
+
                        except Exception as e:
                            raise LLMServiceError(f"Failed to load image {image_path}: {str(e)}")
-                    
+
                    # Make async call to new GenAI SDK with multimodal content
-                    response = await gemini_client.aio.models.generate_content(
+                    response = await get_gemini_client().aio.models.generate_content(
                        model=actual_model,
                        contents=content_parts,
                        config=config
@ -713,54 +729,54 @@ class LLMService:
                            }
                            
                            # Note: GPT-5 Responses API does not support max_tokens parameter
-                            
-                            response = await openai_client.responses.create(**kwargs)
+
+                            response = await get_openai_client().responses.create(**kwargs)
                            result = LLMService._extract_responses_api_content(response)
-                            
+
                        else:
                            # Use Chat Completions API for non-GPT-5 models
                            content = [{"type": "text", "text": full_prompt}]
                            content.extend(image_content)
-                            
+
                            kwargs = {
                                "model": actual_model,
                                "messages": [{"role": "user", "content": content}],
                                "temperature": temperature,
                            }
-                            
+
                            if max_tokens:
                                kwargs["max_tokens"] = max_tokens
-                            
-                            response = await openai_client.chat.completions.create(**kwargs)
+
+                            response = await get_openai_client().chat.completions.create(**kwargs)
                            result = response.choices[0].message.content.strip()
-                        
+
                    else:
                        # New Google GenAI SDK - contextual multimodal async call
                        config = genai.types.GenerateContentConfig(
                            temperature=temperature,
                        )
-                        
+
                        if max_tokens:
                            config.max_output_tokens = max_tokens
-                        
+
                        # Prepare content parts for new SDK
                        new_content_parts = []
-                        
+
                        # Add text prompt
                        new_content_parts.append(genai.types.Part.from_text(full_prompt))
-                        
+
                        # Convert PIL image parts to new SDK format
                        for img in image_parts:
                            # Convert PIL image to bytes
                            buffer = io.BytesIO()
                            img.save(buffer, format='PNG')
                            image_data = buffer.getvalue()
-                            
+
                            # Add as image part in new SDK format
                            new_content_parts.append(genai.types.Part.from_bytes(image_data, mime_type='image/png'))
-                        
+
                        # Make async call to new GenAI SDK
-                        response = await gemini_client.aio.models.generate_content(
+                        response = await get_gemini_client().aio.models.generate_content(
                            model=actual_model,
                            contents=new_content_parts,
                            config=config