diff --git a/backend/app/services/llm_service.py b/backend/app/services/llm_service.py index b6fb9d51..06241a8a 100755 --- a/backend/app/services/llm_service.py +++ b/backend/app/services/llm_service.py @@ -11,6 +11,7 @@ import asyncio import logging import base64 from google import genai +from google.genai import errors as genai_errors from openai import AsyncOpenAI import httpx from typing import Dict, Any, Optional, Union, List @@ -269,20 +270,48 @@ class LLMService: logger.info(f"LLM content generation succeeded on attempt {attempt_num}/{max_retries}") return result + except genai_errors.APIError as e: + # Google GenAI SDK specific error handling + last_error = e + error_code = getattr(e, 'code', 'unknown') + error_message = getattr(e, 'message', str(e)) or str(e) or repr(e) + + logger.warning(f"LLM attempt {attempt_num}/{max_retries} failed: [Google API {error_code}] {error_message}") + + # Retryable: 429 rate limit, 500+ server errors + is_retryable = ( + error_code == 429 or + (isinstance(error_code, int) and error_code >= 500) + ) + + if is_retryable: + if attempt < max_retries - 1: + wait_time = 2 ** attempt + logger.info(f"Retryable Google API error. Waiting {wait_time}s before retry {attempt_num + 1}/{max_retries}") + await asyncio.sleep(wait_time) + continue + else: + logger.error(f"Retryable Google API error [{error_code}] but max retries ({max_retries}) reached") + else: + # 400, 403, 404, etc. - non-retryable + logger.error(f"Non-retryable Google API error [{error_code}]: {error_message}") + break + except Exception as e: + # Fallback for OpenAI and other non-Google errors last_error = e error_message = str(e).lower() - + logger.warning(f"LLM attempt {attempt_num}/{max_retries} failed: {str(e)}") - + # Check if this is a retryable error (API internal errors, rate limiting, etc.) - if ("500" in error_message or - "internal error" in error_message or + if ("500" in error_message or + "internal error" in error_message or "internal server error" in error_message or "service unavailable" in error_message or "timeout" in error_message or "rate" in error_message): - + if attempt < max_retries - 1: # Wait before retrying (exponential backoff) wait_time = 2 ** attempt # 1s, 2s, 4s @@ -294,10 +323,17 @@ class LLMService: else: logger.error(f"Non-retryable error detected: {str(e)}") break - + # If we've exhausted all retries or hit a non-retryable error, raise the last error - logger.error(f"LLM content generation failed after {max_retries} attempts. Final error: {str(last_error)}") - raise LLMServiceError(f"Error generating content: {str(last_error)}") + error_detail = "" + if isinstance(last_error, genai_errors.APIError): + error_code = getattr(last_error, 'code', 'unknown') + error_msg = getattr(last_error, 'message', str(last_error)) or str(last_error) or repr(last_error) + error_detail = f"[Google API {error_code}] {error_msg}" + else: + error_detail = str(last_error) + logger.error(f"LLM content generation failed after {max_retries} attempts. Final error: {error_detail}") + raise LLMServiceError(f"Error generating content: {error_detail}") @staticmethod def parse_json_response(response_text: str) -> Union[Dict[str, Any], List[Any]]: @@ -795,34 +831,68 @@ class LLMService: print(f" - Result repr: {repr(result[:50]) if result else 'NONE'}") return result + except genai_errors.APIError as e: + # Google GenAI SDK specific error handling + last_error = e + error_code = getattr(e, 'code', 'unknown') + error_message = getattr(e, 'message', str(e)) or str(e) or repr(e) + + logger.warning(f"Contextual multimodal attempt {attempt_num}/{max_retries} failed: [Google API {error_code}] {error_message}") + + # Retryable: 429 rate limit, 500+ server errors + is_retryable = ( + error_code == 429 or + (isinstance(error_code, int) and error_code >= 500) + ) + + if is_retryable: + if attempt < max_retries - 1: + wait_time = 2 ** attempt + logger.info(f"Retryable Google API error. Waiting {wait_time}s before retry {attempt_num + 1}/{max_retries}") + await asyncio.sleep(wait_time) + continue + else: + logger.error(f"Retryable Google API error [{error_code}] but max retries ({max_retries}) reached") + else: + logger.error(f"Non-retryable Google API error [{error_code}]: {error_message}") + break + except Exception as e: + # Fallback for non-Google errors last_error = e error_message = str(e).lower() - + logger.warning(f"Contextual multimodal attempt {attempt_num}/{max_retries} failed: {str(e)}") - + # Check if this is a retryable error - if ("500" in error_message or - "internal error" in error_message or + if ("500" in error_message or + "internal error" in error_message or "internal server error" in error_message or "service unavailable" in error_message or "timeout" in error_message or "rate" in error_message): - + if attempt < max_retries - 1: wait_time = 2 ** attempt logger.info(f"Retryable error detected. Waiting {wait_time} seconds before retry {attempt_num + 1}/{max_retries}") - time.sleep(wait_time) + await asyncio.sleep(wait_time) continue else: logger.error(f"Retryable error detected but max retries ({max_retries}) reached") else: logger.error(f"Non-retryable error detected: {str(e)}") break - + # If multimodal failed, raise the error - logger.error(f"Contextual multimodal generation failed after {max_retries} attempts. Final error: {str(last_error)}") - raise LLMServiceError(f"Error generating contextual multimodal content: {str(last_error)}") + error_detail = "" + if isinstance(last_error, genai_errors.APIError): + error_code = getattr(last_error, 'code', 'unknown') + error_msg = getattr(last_error, 'message', str(last_error)) or str(last_error) or repr(last_error) + error_detail = f"[Google API {error_code}] {error_msg}" + else: + error_detail = str(last_error) + logger.error(f"Contextual multimodal generation failed after {max_retries} attempts. Final error: {error_detail}") + raise LLMServiceError(f"Error generating contextual multimodal content: {error_detail}") else: # No images, use standard text generation