Switch AI to GPT-5.2 with Responses API (v1/responses)

- Default model: gpt-5.2
- New models use client.responses.create() (Responses API)
- Older models (gpt-3.5-turbo) still use chat.completions.create()
- Response parsing handles both API formats
- Updated valid models list

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
SamoilenkoVadym 2026-02-09 22:13:55 +00:00
parent ebc2322d61
commit d4b71ff34e
3 changed files with 52 additions and 36 deletions

View file

@ -29,7 +29,7 @@ class Settings(BaseSettings):
# OpenAI
OPENAI_API_KEY: str = ""
AI_MODEL: str = "gpt-4o-mini"
AI_MODEL: str = "gpt-5.2"
MAX_TOKENS: int = 500
TEMPERATURE: float = 0.5
MAX_TEXT_LENGTH: int = 4000

View file

@ -39,7 +39,7 @@ class Config:
# AI Settings (for CLI and Web AI mode)
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
AI_MODEL = os.getenv('AI_MODEL', 'gpt-4o-mini') # Better than gpt-3.5-turbo
AI_MODEL = os.getenv('AI_MODEL', 'gpt-5.2')
MAX_TOKENS = int(os.getenv('MAX_TOKENS', '500'))
TEMPERATURE = float(os.getenv('TEMPERATURE', '0.5')) # 0.5 better for factual content
MAX_TEXT_LENGTH = int(os.getenv('MAX_TEXT_LENGTH', '4000'))

View file

@ -25,16 +25,15 @@ logger = get_logger(__name__)
class MetadataAnalyzer:
"""Analyze content and generate metadata using OpenAI GPT with production-ready error handling."""
# Valid OpenAI models (as of January 2026)
# Valid OpenAI models
VALID_MODELS = [
# GPT-5 models (2026 release)
'gpt-5', 'gpt-5-mini', 'gpt-5-nano',
'gpt-5-mini-2025-08-07', 'gpt-5-nano-2025-08-07',
# GPT-5 models
'gpt-5', 'gpt-5.2', 'gpt-5-mini', 'gpt-5-nano',
# GPT-4 models
'gpt-4o', 'gpt-4o-mini', 'gpt-4o-mini-2024-07-18',
'gpt-4o', 'gpt-4o-mini',
'gpt-4-turbo', 'gpt-4', 'gpt-3.5-turbo',
# Reasoning models
'o1', 'o1-mini', 'o1-preview'
'o1', 'o1-mini', 'o3', 'o3-mini', 'o4-mini',
]
def __init__(self):
@ -131,25 +130,32 @@ class MetadataAnalyzer:
return params
def _use_responses_api(self) -> bool:
"""Check if model should use the Responses API (v1/responses) instead of Chat Completions."""
responses_models = ['gpt-5', 'gpt-4o', 'gpt-4-turbo', 'o1', 'o3', 'o4']
return any(self.model.startswith(prefix) for prefix in responses_models)
def _call_openai_api(self, messages: list) -> dict:
"""
Call OpenAI API with automatic retry on failures.
Uses tenacity for exponential backoff if available.
Uses Responses API (v1/responses) for newer models, Chat Completions for older.
"""
# Get the correct API parameters
api_params = self._get_api_params()
use_responses = self._use_responses_api()
if TENACITY_AVAILABLE:
# Use retry decorator dynamically
retry_decorator = retry(
stop=stop_after_attempt(Config.API_MAX_RETRIES),
wait=wait_exponential(multiplier=Config.API_RETRY_DELAY, min=2, max=10),
retry=retry_if_exception_type((Exception,)),
reraise=True
)
@retry_decorator
def _api_call():
def _do_api_call():
if use_responses:
# Responses API (v1/responses)
input_text = "\n\n".join(
f"[{m['role']}]: {m['content']}" for m in messages
)
return self.client.responses.create(
model=self.model,
input=input_text,
max_output_tokens=self.max_tokens,
)
else:
# Chat Completions API (v1/chat/completions) for older models
return self.client.chat.completions.create(
model=self.model,
messages=messages,
@ -157,27 +163,26 @@ class MetadataAnalyzer:
**api_params
)
return _api_call()
if TENACITY_AVAILABLE:
retry_decorator = retry(
stop=stop_after_attempt(Config.API_MAX_RETRIES),
wait=wait_exponential(multiplier=Config.API_RETRY_DELAY, min=2, max=10),
retry=retry_if_exception_type((Exception,)),
reraise=True
)
return retry_decorator(_do_api_call)()
else:
# Fallback: simple retry without exponential backoff
import time
last_error = None
for attempt in range(Config.API_MAX_RETRIES):
try:
return self.client.chat.completions.create(
model=self.model,
messages=messages,
timeout=Config.API_TIMEOUT,
**api_params
)
return _do_api_call()
except Exception as e:
last_error = e
if attempt < Config.API_MAX_RETRIES - 1:
wait_time = Config.API_RETRY_DELAY * (2 ** attempt)
logger.warning(f"API call failed (attempt {attempt + 1}/{Config.API_MAX_RETRIES}), retrying in {wait_time}s: {e}")
time.sleep(wait_time)
raise last_error
def analyze_content(self, content: str, filename: str, file_type: FileType) -> Dict[str, str]:
@ -212,13 +217,23 @@ class MetadataAnalyzer:
{"role": "user", "content": prompt}
])
# Parse response with detailed logging
# Parse response — handle both Responses API and Chat Completions formats
use_responses = self._use_responses_api()
logger.info(f"API Response for {filename}:")
logger.info(f" - Model used: {response.model}")
logger.info(f" - Finish reason: {response.choices[0].finish_reason}")
logger.info(f" - Tokens: prompt={response.usage.prompt_tokens}, completion={response.usage.completion_tokens}, total={response.usage.total_tokens}")
metadata_text = response.choices[0].message.content
if use_responses:
# Responses API format
metadata_text = response.output_text
total_tokens = (response.usage.input_tokens or 0) + (response.usage.output_tokens or 0)
logger.info(f" - Tokens: input={response.usage.input_tokens}, output={response.usage.output_tokens}, total={total_tokens}")
else:
# Chat Completions format
metadata_text = response.choices[0].message.content
total_tokens = response.usage.total_tokens
logger.info(f" - Finish reason: {response.choices[0].finish_reason}")
logger.info(f" - Tokens: prompt={response.usage.prompt_tokens}, completion={response.usage.completion_tokens}, total={total_tokens}")
logger.info(f" - Content length: {len(metadata_text) if metadata_text else 0} chars")
logger.info(f" - Content preview: {metadata_text[:200] if metadata_text else '(empty)'}")
@ -241,7 +256,8 @@ class MetadataAnalyzer:
}
# Add metadata about the generation
metadata['_tokens_used'] = response.usage.total_tokens
metadata['_tokens_used'] = total_tokens
metadata['_model'] = response.model
metadata['_confidence'] = 0.9 # Could calculate based on response
logger.info(f"Generated metadata for {filename} (tokens used: {metadata['_tokens_used']})")