Fix asyncio event loop mismatch in LLM service

The genai.Client and AsyncOpenAI clients were being created at module
import time, before the Quart/Hypercorn event loop existed. This caused
"Future attached to a different loop" errors when async calls were made,
resulting in autonomous focus group conversations stopping with
"excessive_silence".

Changed to lazy initialization - clients are now created on first use
within the running event loop context via get_gemini_client() and
get_openai_client() helper functions.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
michael 2025-12-19 13:24:23 -06:00
parent bb16165a83
commit 45c7a52aeb

View file

@ -17,13 +17,29 @@ from typing import Dict, Any, Optional, Union, List
from PIL import Image
import io
# Set up the Gemini API key and client
# Set up the Gemini API key (client created lazily to avoid event loop issues)
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY', 'AIzaSyAc50jzC3k9K1PmKT1vGFi0sCdhhnqsvl0')
gemini_client = genai.Client(api_key=GEMINI_API_KEY)
_gemini_client = None
# Set up OpenAI API key
# Set up OpenAI API key (client created lazily to avoid event loop issues)
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'REDACTED_OPENAI_KEY')
openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY, timeout=600.0)
_openai_client = None
def get_gemini_client():
"""Get or create the Gemini client lazily within the running event loop."""
global _gemini_client
if _gemini_client is None:
_gemini_client = genai.Client(api_key=GEMINI_API_KEY)
return _gemini_client
def get_openai_client():
"""Get or create the OpenAI client lazily within the running event loop."""
global _openai_client
if _openai_client is None:
_openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY, timeout=600.0)
return _openai_client
# The default model we're using
DEFAULT_MODEL = "gemini-3-pro-preview"
@ -202,45 +218,45 @@ class LLMService:
# Note: GPT-5 Responses API does not support max_tokens parameter
response = await openai_client.responses.create(**kwargs)
response = await get_openai_client().responses.create(**kwargs)
result = LLMService._extract_responses_api_content(response)
else:
# Use Chat Completions API for non-GPT-5 models
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
kwargs = {
"model": actual_model,
"messages": messages,
"temperature": temperature,
}
if max_tokens:
kwargs["max_tokens"] = max_tokens
response = await openai_client.chat.completions.create(**kwargs)
response = await get_openai_client().chat.completions.create(**kwargs)
result = response.choices[0].message.content.strip()
else:
# New Google GenAI SDK - async call
config = genai.types.GenerateContentConfig(
temperature=temperature,
)
if max_tokens:
config.max_output_tokens = max_tokens
# Prepare the prompt - combine system prompt with user prompt if needed
if system_prompt:
combined_prompt = f"System: {system_prompt}\n\nUser: {prompt}"
else:
combined_prompt = prompt
# Make async call to new GenAI SDK
response = await gemini_client.aio.models.generate_content(
response = await get_gemini_client().aio.models.generate_content(
model=actual_model,
contents=combined_prompt,
config=config
@ -488,70 +504,70 @@ class LLMService:
}
# Note: GPT-5 Responses API does not support max_tokens parameter
response = await openai_client.responses.create(**kwargs)
response = await get_openai_client().responses.create(**kwargs)
result = LLMService._extract_responses_api_content(response)
else:
# Use Chat Completions API for non-GPT-5 models
content = [{"type": "text", "text": prompt}]
content.extend(image_content)
kwargs = {
"model": actual_model,
"messages": [{"role": "user", "content": content}],
"temperature": temperature,
}
if max_tokens:
kwargs["max_tokens"] = max_tokens
response = await openai_client.chat.completions.create(**kwargs)
response = await get_openai_client().chat.completions.create(**kwargs)
result = response.choices[0].message.content.strip()
else:
# New Google GenAI SDK - multimodal async call
config = genai.types.GenerateContentConfig(
temperature=temperature,
)
if max_tokens:
config.max_output_tokens = max_tokens
# Prepare multimodal content for new SDK
content_parts = []
# Add text prompt
content_parts.append(genai.types.Part.from_text(prompt))
# Add images
for image_path in image_paths:
try:
if not os.path.exists(image_path):
raise LLMServiceError(f"Image file not found: {image_path}")
# Read image data for new SDK
with open(image_path, 'rb') as img_file:
image_data = img_file.read()
# Determine MIME type from file extension
ext = os.path.splitext(image_path)[1].lower()
mime_type = {
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.webp': 'image/webp'
}.get(ext, 'image/jpeg') # Default to JPEG
content_parts.append(genai.types.Part.from_bytes(image_data, mime_type=mime_type))
logger.debug(f"Successfully loaded image for new GenAI SDK: {image_path}")
except Exception as e:
raise LLMServiceError(f"Failed to load image {image_path}: {str(e)}")
# Make async call to new GenAI SDK with multimodal content
response = await gemini_client.aio.models.generate_content(
response = await get_gemini_client().aio.models.generate_content(
model=actual_model,
contents=content_parts,
config=config
@ -713,54 +729,54 @@ class LLMService:
}
# Note: GPT-5 Responses API does not support max_tokens parameter
response = await openai_client.responses.create(**kwargs)
response = await get_openai_client().responses.create(**kwargs)
result = LLMService._extract_responses_api_content(response)
else:
# Use Chat Completions API for non-GPT-5 models
content = [{"type": "text", "text": full_prompt}]
content.extend(image_content)
kwargs = {
"model": actual_model,
"messages": [{"role": "user", "content": content}],
"temperature": temperature,
}
if max_tokens:
kwargs["max_tokens"] = max_tokens
response = await openai_client.chat.completions.create(**kwargs)
response = await get_openai_client().chat.completions.create(**kwargs)
result = response.choices[0].message.content.strip()
else:
# New Google GenAI SDK - contextual multimodal async call
config = genai.types.GenerateContentConfig(
temperature=temperature,
)
if max_tokens:
config.max_output_tokens = max_tokens
# Prepare content parts for new SDK
new_content_parts = []
# Add text prompt
new_content_parts.append(genai.types.Part.from_text(full_prompt))
# Convert PIL image parts to new SDK format
for img in image_parts:
# Convert PIL image to bytes
buffer = io.BytesIO()
img.save(buffer, format='PNG')
image_data = buffer.getvalue()
# Add as image part in new SDK format
new_content_parts.append(genai.types.Part.from_bytes(image_data, mime_type='image/png'))
# Make async call to new GenAI SDK
response = await gemini_client.aio.models.generate_content(
response = await get_gemini_client().aio.models.generate_content(
model=actual_model,
contents=new_content_parts,
config=config