diff --git a/app/config.py b/app/config.py index 87e8cb7..41bc332 100644 --- a/app/config.py +++ b/app/config.py @@ -29,7 +29,7 @@ class Settings(BaseSettings): # OpenAI OPENAI_API_KEY: str = "" - AI_MODEL: str = "gpt-4o-mini" + AI_MODEL: str = "gpt-5.2" MAX_TOKENS: int = 500 TEMPERATURE: float = 0.5 MAX_TEXT_LENGTH: int = 4000 diff --git a/src/config.py b/src/config.py index 0651f8a..891b593 100644 --- a/src/config.py +++ b/src/config.py @@ -39,7 +39,7 @@ class Config: # AI Settings (for CLI and Web AI mode) OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') - AI_MODEL = os.getenv('AI_MODEL', 'gpt-4o-mini') # Better than gpt-3.5-turbo + AI_MODEL = os.getenv('AI_MODEL', 'gpt-5.2') MAX_TOKENS = int(os.getenv('MAX_TOKENS', '500')) TEMPERATURE = float(os.getenv('TEMPERATURE', '0.5')) # 0.5 better for factual content MAX_TEXT_LENGTH = int(os.getenv('MAX_TEXT_LENGTH', '4000')) diff --git a/src/metadata_analyzer.py b/src/metadata_analyzer.py index f0de954..e6b680d 100644 --- a/src/metadata_analyzer.py +++ b/src/metadata_analyzer.py @@ -25,16 +25,15 @@ logger = get_logger(__name__) class MetadataAnalyzer: """Analyze content and generate metadata using OpenAI GPT with production-ready error handling.""" - # Valid OpenAI models (as of January 2026) + # Valid OpenAI models VALID_MODELS = [ - # GPT-5 models (2026 release) - 'gpt-5', 'gpt-5-mini', 'gpt-5-nano', - 'gpt-5-mini-2025-08-07', 'gpt-5-nano-2025-08-07', + # GPT-5 models + 'gpt-5', 'gpt-5.2', 'gpt-5-mini', 'gpt-5-nano', # GPT-4 models - 'gpt-4o', 'gpt-4o-mini', 'gpt-4o-mini-2024-07-18', + 'gpt-4o', 'gpt-4o-mini', 'gpt-4-turbo', 'gpt-4', 'gpt-3.5-turbo', # Reasoning models - 'o1', 'o1-mini', 'o1-preview' + 'o1', 'o1-mini', 'o3', 'o3-mini', 'o4-mini', ] def __init__(self): @@ -131,25 +130,32 @@ class MetadataAnalyzer: return params + def _use_responses_api(self) -> bool: + """Check if model should use the Responses API (v1/responses) instead of Chat Completions.""" + responses_models = ['gpt-5', 'gpt-4o', 'gpt-4-turbo', 'o1', 'o3', 'o4'] + return any(self.model.startswith(prefix) for prefix in responses_models) + def _call_openai_api(self, messages: list) -> dict: """ Call OpenAI API with automatic retry on failures. - Uses tenacity for exponential backoff if available. + Uses Responses API (v1/responses) for newer models, Chat Completions for older. """ - # Get the correct API parameters api_params = self._get_api_params() + use_responses = self._use_responses_api() - if TENACITY_AVAILABLE: - # Use retry decorator dynamically - retry_decorator = retry( - stop=stop_after_attempt(Config.API_MAX_RETRIES), - wait=wait_exponential(multiplier=Config.API_RETRY_DELAY, min=2, max=10), - retry=retry_if_exception_type((Exception,)), - reraise=True - ) - - @retry_decorator - def _api_call(): + def _do_api_call(): + if use_responses: + # Responses API (v1/responses) + input_text = "\n\n".join( + f"[{m['role']}]: {m['content']}" for m in messages + ) + return self.client.responses.create( + model=self.model, + input=input_text, + max_output_tokens=self.max_tokens, + ) + else: + # Chat Completions API (v1/chat/completions) for older models return self.client.chat.completions.create( model=self.model, messages=messages, @@ -157,27 +163,26 @@ class MetadataAnalyzer: **api_params ) - return _api_call() + if TENACITY_AVAILABLE: + retry_decorator = retry( + stop=stop_after_attempt(Config.API_MAX_RETRIES), + wait=wait_exponential(multiplier=Config.API_RETRY_DELAY, min=2, max=10), + retry=retry_if_exception_type((Exception,)), + reraise=True + ) + return retry_decorator(_do_api_call)() else: - # Fallback: simple retry without exponential backoff import time last_error = None - for attempt in range(Config.API_MAX_RETRIES): try: - return self.client.chat.completions.create( - model=self.model, - messages=messages, - timeout=Config.API_TIMEOUT, - **api_params - ) + return _do_api_call() except Exception as e: last_error = e if attempt < Config.API_MAX_RETRIES - 1: wait_time = Config.API_RETRY_DELAY * (2 ** attempt) logger.warning(f"API call failed (attempt {attempt + 1}/{Config.API_MAX_RETRIES}), retrying in {wait_time}s: {e}") time.sleep(wait_time) - raise last_error def analyze_content(self, content: str, filename: str, file_type: FileType) -> Dict[str, str]: @@ -212,13 +217,23 @@ class MetadataAnalyzer: {"role": "user", "content": prompt} ]) - # Parse response with detailed logging + # Parse response — handle both Responses API and Chat Completions formats + use_responses = self._use_responses_api() logger.info(f"API Response for {filename}:") logger.info(f" - Model used: {response.model}") - logger.info(f" - Finish reason: {response.choices[0].finish_reason}") - logger.info(f" - Tokens: prompt={response.usage.prompt_tokens}, completion={response.usage.completion_tokens}, total={response.usage.total_tokens}") - metadata_text = response.choices[0].message.content + if use_responses: + # Responses API format + metadata_text = response.output_text + total_tokens = (response.usage.input_tokens or 0) + (response.usage.output_tokens or 0) + logger.info(f" - Tokens: input={response.usage.input_tokens}, output={response.usage.output_tokens}, total={total_tokens}") + else: + # Chat Completions format + metadata_text = response.choices[0].message.content + total_tokens = response.usage.total_tokens + logger.info(f" - Finish reason: {response.choices[0].finish_reason}") + logger.info(f" - Tokens: prompt={response.usage.prompt_tokens}, completion={response.usage.completion_tokens}, total={total_tokens}") + logger.info(f" - Content length: {len(metadata_text) if metadata_text else 0} chars") logger.info(f" - Content preview: {metadata_text[:200] if metadata_text else '(empty)'}") @@ -241,7 +256,8 @@ class MetadataAnalyzer: } # Add metadata about the generation - metadata['_tokens_used'] = response.usage.total_tokens + metadata['_tokens_used'] = total_tokens + metadata['_model'] = response.model metadata['_confidence'] = 0.9 # Could calculate based on response logger.info(f"Generated metadata for {filename} (tokens used: {metadata['_tokens_used']})")