Switch AI to GPT-5.2 with Responses API (v1/responses)
- Default model: gpt-5.2 - New models use client.responses.create() (Responses API) - Older models (gpt-3.5-turbo) still use chat.completions.create() - Response parsing handles both API formats - Updated valid models list Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ebc2322d61
commit
d4b71ff34e
3 changed files with 52 additions and 36 deletions
|
|
@ -29,7 +29,7 @@ class Settings(BaseSettings):
|
|||
|
||||
# OpenAI
|
||||
OPENAI_API_KEY: str = ""
|
||||
AI_MODEL: str = "gpt-4o-mini"
|
||||
AI_MODEL: str = "gpt-5.2"
|
||||
MAX_TOKENS: int = 500
|
||||
TEMPERATURE: float = 0.5
|
||||
MAX_TEXT_LENGTH: int = 4000
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ class Config:
|
|||
|
||||
# AI Settings (for CLI and Web AI mode)
|
||||
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
|
||||
AI_MODEL = os.getenv('AI_MODEL', 'gpt-4o-mini') # Better than gpt-3.5-turbo
|
||||
AI_MODEL = os.getenv('AI_MODEL', 'gpt-5.2')
|
||||
MAX_TOKENS = int(os.getenv('MAX_TOKENS', '500'))
|
||||
TEMPERATURE = float(os.getenv('TEMPERATURE', '0.5')) # 0.5 better for factual content
|
||||
MAX_TEXT_LENGTH = int(os.getenv('MAX_TEXT_LENGTH', '4000'))
|
||||
|
|
|
|||
|
|
@ -25,16 +25,15 @@ logger = get_logger(__name__)
|
|||
class MetadataAnalyzer:
|
||||
"""Analyze content and generate metadata using OpenAI GPT with production-ready error handling."""
|
||||
|
||||
# Valid OpenAI models (as of January 2026)
|
||||
# Valid OpenAI models
|
||||
VALID_MODELS = [
|
||||
# GPT-5 models (2026 release)
|
||||
'gpt-5', 'gpt-5-mini', 'gpt-5-nano',
|
||||
'gpt-5-mini-2025-08-07', 'gpt-5-nano-2025-08-07',
|
||||
# GPT-5 models
|
||||
'gpt-5', 'gpt-5.2', 'gpt-5-mini', 'gpt-5-nano',
|
||||
# GPT-4 models
|
||||
'gpt-4o', 'gpt-4o-mini', 'gpt-4o-mini-2024-07-18',
|
||||
'gpt-4o', 'gpt-4o-mini',
|
||||
'gpt-4-turbo', 'gpt-4', 'gpt-3.5-turbo',
|
||||
# Reasoning models
|
||||
'o1', 'o1-mini', 'o1-preview'
|
||||
'o1', 'o1-mini', 'o3', 'o3-mini', 'o4-mini',
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
|
|
@ -131,25 +130,32 @@ class MetadataAnalyzer:
|
|||
|
||||
return params
|
||||
|
||||
def _use_responses_api(self) -> bool:
|
||||
"""Check if model should use the Responses API (v1/responses) instead of Chat Completions."""
|
||||
responses_models = ['gpt-5', 'gpt-4o', 'gpt-4-turbo', 'o1', 'o3', 'o4']
|
||||
return any(self.model.startswith(prefix) for prefix in responses_models)
|
||||
|
||||
def _call_openai_api(self, messages: list) -> dict:
|
||||
"""
|
||||
Call OpenAI API with automatic retry on failures.
|
||||
Uses tenacity for exponential backoff if available.
|
||||
Uses Responses API (v1/responses) for newer models, Chat Completions for older.
|
||||
"""
|
||||
# Get the correct API parameters
|
||||
api_params = self._get_api_params()
|
||||
use_responses = self._use_responses_api()
|
||||
|
||||
if TENACITY_AVAILABLE:
|
||||
# Use retry decorator dynamically
|
||||
retry_decorator = retry(
|
||||
stop=stop_after_attempt(Config.API_MAX_RETRIES),
|
||||
wait=wait_exponential(multiplier=Config.API_RETRY_DELAY, min=2, max=10),
|
||||
retry=retry_if_exception_type((Exception,)),
|
||||
reraise=True
|
||||
)
|
||||
|
||||
@retry_decorator
|
||||
def _api_call():
|
||||
def _do_api_call():
|
||||
if use_responses:
|
||||
# Responses API (v1/responses)
|
||||
input_text = "\n\n".join(
|
||||
f"[{m['role']}]: {m['content']}" for m in messages
|
||||
)
|
||||
return self.client.responses.create(
|
||||
model=self.model,
|
||||
input=input_text,
|
||||
max_output_tokens=self.max_tokens,
|
||||
)
|
||||
else:
|
||||
# Chat Completions API (v1/chat/completions) for older models
|
||||
return self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
|
|
@ -157,27 +163,26 @@ class MetadataAnalyzer:
|
|||
**api_params
|
||||
)
|
||||
|
||||
return _api_call()
|
||||
if TENACITY_AVAILABLE:
|
||||
retry_decorator = retry(
|
||||
stop=stop_after_attempt(Config.API_MAX_RETRIES),
|
||||
wait=wait_exponential(multiplier=Config.API_RETRY_DELAY, min=2, max=10),
|
||||
retry=retry_if_exception_type((Exception,)),
|
||||
reraise=True
|
||||
)
|
||||
return retry_decorator(_do_api_call)()
|
||||
else:
|
||||
# Fallback: simple retry without exponential backoff
|
||||
import time
|
||||
last_error = None
|
||||
|
||||
for attempt in range(Config.API_MAX_RETRIES):
|
||||
try:
|
||||
return self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
timeout=Config.API_TIMEOUT,
|
||||
**api_params
|
||||
)
|
||||
return _do_api_call()
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
if attempt < Config.API_MAX_RETRIES - 1:
|
||||
wait_time = Config.API_RETRY_DELAY * (2 ** attempt)
|
||||
logger.warning(f"API call failed (attempt {attempt + 1}/{Config.API_MAX_RETRIES}), retrying in {wait_time}s: {e}")
|
||||
time.sleep(wait_time)
|
||||
|
||||
raise last_error
|
||||
|
||||
def analyze_content(self, content: str, filename: str, file_type: FileType) -> Dict[str, str]:
|
||||
|
|
@ -212,13 +217,23 @@ class MetadataAnalyzer:
|
|||
{"role": "user", "content": prompt}
|
||||
])
|
||||
|
||||
# Parse response with detailed logging
|
||||
# Parse response — handle both Responses API and Chat Completions formats
|
||||
use_responses = self._use_responses_api()
|
||||
logger.info(f"API Response for {filename}:")
|
||||
logger.info(f" - Model used: {response.model}")
|
||||
logger.info(f" - Finish reason: {response.choices[0].finish_reason}")
|
||||
logger.info(f" - Tokens: prompt={response.usage.prompt_tokens}, completion={response.usage.completion_tokens}, total={response.usage.total_tokens}")
|
||||
|
||||
metadata_text = response.choices[0].message.content
|
||||
if use_responses:
|
||||
# Responses API format
|
||||
metadata_text = response.output_text
|
||||
total_tokens = (response.usage.input_tokens or 0) + (response.usage.output_tokens or 0)
|
||||
logger.info(f" - Tokens: input={response.usage.input_tokens}, output={response.usage.output_tokens}, total={total_tokens}")
|
||||
else:
|
||||
# Chat Completions format
|
||||
metadata_text = response.choices[0].message.content
|
||||
total_tokens = response.usage.total_tokens
|
||||
logger.info(f" - Finish reason: {response.choices[0].finish_reason}")
|
||||
logger.info(f" - Tokens: prompt={response.usage.prompt_tokens}, completion={response.usage.completion_tokens}, total={total_tokens}")
|
||||
|
||||
logger.info(f" - Content length: {len(metadata_text) if metadata_text else 0} chars")
|
||||
logger.info(f" - Content preview: {metadata_text[:200] if metadata_text else '(empty)'}")
|
||||
|
||||
|
|
@ -241,7 +256,8 @@ class MetadataAnalyzer:
|
|||
}
|
||||
|
||||
# Add metadata about the generation
|
||||
metadata['_tokens_used'] = response.usage.total_tokens
|
||||
metadata['_tokens_used'] = total_tokens
|
||||
metadata['_model'] = response.model
|
||||
metadata['_confidence'] = 0.9 # Could calculate based on response
|
||||
|
||||
logger.info(f"Generated metadata for {filename} (tokens used: {metadata['_tokens_used']})")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue