diff --git a/backend/app/services/llm_service.py b/backend/app/services/llm_service.py index 3f676899..0d47b55b 100644 --- a/backend/app/services/llm_service.py +++ b/backend/app/services/llm_service.py @@ -17,13 +17,29 @@ from typing import Dict, Any, Optional, Union, List from PIL import Image import io -# Set up the Gemini API key and client +# Set up the Gemini API key (client created lazily to avoid event loop issues) GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY', 'AIzaSyAc50jzC3k9K1PmKT1vGFi0sCdhhnqsvl0') -gemini_client = genai.Client(api_key=GEMINI_API_KEY) +_gemini_client = None -# Set up OpenAI API key +# Set up OpenAI API key (client created lazily to avoid event loop issues) OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'REDACTED_OPENAI_KEY') -openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY, timeout=600.0) +_openai_client = None + + +def get_gemini_client(): + """Get or create the Gemini client lazily within the running event loop.""" + global _gemini_client + if _gemini_client is None: + _gemini_client = genai.Client(api_key=GEMINI_API_KEY) + return _gemini_client + + +def get_openai_client(): + """Get or create the OpenAI client lazily within the running event loop.""" + global _openai_client + if _openai_client is None: + _openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY, timeout=600.0) + return _openai_client # The default model we're using DEFAULT_MODEL = "gemini-3-pro-preview" @@ -202,45 +218,45 @@ class LLMService: # Note: GPT-5 Responses API does not support max_tokens parameter - response = await openai_client.responses.create(**kwargs) + response = await get_openai_client().responses.create(**kwargs) result = LLMService._extract_responses_api_content(response) - + else: # Use Chat Completions API for non-GPT-5 models messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) - + kwargs = { "model": actual_model, "messages": messages, "temperature": temperature, } - + if max_tokens: kwargs["max_tokens"] = max_tokens - - response = await openai_client.chat.completions.create(**kwargs) + + response = await get_openai_client().chat.completions.create(**kwargs) result = response.choices[0].message.content.strip() - + else: # New Google GenAI SDK - async call config = genai.types.GenerateContentConfig( temperature=temperature, ) - + if max_tokens: config.max_output_tokens = max_tokens - + # Prepare the prompt - combine system prompt with user prompt if needed if system_prompt: combined_prompt = f"System: {system_prompt}\n\nUser: {prompt}" else: combined_prompt = prompt - + # Make async call to new GenAI SDK - response = await gemini_client.aio.models.generate_content( + response = await get_gemini_client().aio.models.generate_content( model=actual_model, contents=combined_prompt, config=config @@ -488,70 +504,70 @@ class LLMService: } # Note: GPT-5 Responses API does not support max_tokens parameter - - response = await openai_client.responses.create(**kwargs) + + response = await get_openai_client().responses.create(**kwargs) result = LLMService._extract_responses_api_content(response) - + else: # Use Chat Completions API for non-GPT-5 models content = [{"type": "text", "text": prompt}] content.extend(image_content) - + kwargs = { "model": actual_model, "messages": [{"role": "user", "content": content}], "temperature": temperature, } - + if max_tokens: kwargs["max_tokens"] = max_tokens - - response = await openai_client.chat.completions.create(**kwargs) + + response = await get_openai_client().chat.completions.create(**kwargs) result = response.choices[0].message.content.strip() - + else: # New Google GenAI SDK - multimodal async call config = genai.types.GenerateContentConfig( temperature=temperature, ) - + if max_tokens: config.max_output_tokens = max_tokens - + # Prepare multimodal content for new SDK content_parts = [] - + # Add text prompt content_parts.append(genai.types.Part.from_text(prompt)) - + # Add images for image_path in image_paths: try: if not os.path.exists(image_path): raise LLMServiceError(f"Image file not found: {image_path}") - + # Read image data for new SDK with open(image_path, 'rb') as img_file: image_data = img_file.read() - + # Determine MIME type from file extension ext = os.path.splitext(image_path)[1].lower() mime_type = { '.jpg': 'image/jpeg', - '.jpeg': 'image/jpeg', + '.jpeg': 'image/jpeg', '.png': 'image/png', '.gif': 'image/gif', '.webp': 'image/webp' }.get(ext, 'image/jpeg') # Default to JPEG - + content_parts.append(genai.types.Part.from_bytes(image_data, mime_type=mime_type)) logger.debug(f"Successfully loaded image for new GenAI SDK: {image_path}") - + except Exception as e: raise LLMServiceError(f"Failed to load image {image_path}: {str(e)}") - + # Make async call to new GenAI SDK with multimodal content - response = await gemini_client.aio.models.generate_content( + response = await get_gemini_client().aio.models.generate_content( model=actual_model, contents=content_parts, config=config @@ -713,54 +729,54 @@ class LLMService: } # Note: GPT-5 Responses API does not support max_tokens parameter - - response = await openai_client.responses.create(**kwargs) + + response = await get_openai_client().responses.create(**kwargs) result = LLMService._extract_responses_api_content(response) - + else: # Use Chat Completions API for non-GPT-5 models content = [{"type": "text", "text": full_prompt}] content.extend(image_content) - + kwargs = { "model": actual_model, "messages": [{"role": "user", "content": content}], "temperature": temperature, } - + if max_tokens: kwargs["max_tokens"] = max_tokens - - response = await openai_client.chat.completions.create(**kwargs) + + response = await get_openai_client().chat.completions.create(**kwargs) result = response.choices[0].message.content.strip() - + else: # New Google GenAI SDK - contextual multimodal async call config = genai.types.GenerateContentConfig( temperature=temperature, ) - + if max_tokens: config.max_output_tokens = max_tokens - + # Prepare content parts for new SDK new_content_parts = [] - + # Add text prompt new_content_parts.append(genai.types.Part.from_text(full_prompt)) - + # Convert PIL image parts to new SDK format for img in image_parts: # Convert PIL image to bytes buffer = io.BytesIO() img.save(buffer, format='PNG') image_data = buffer.getvalue() - + # Add as image part in new SDK format new_content_parts.append(genai.types.Part.from_bytes(image_data, mime_type='image/png')) - + # Make async call to new GenAI SDK - response = await gemini_client.aio.models.generate_content( + response = await get_gemini_client().aio.models.generate_content( model=actual_model, contents=new_content_parts, config=config