feat(ai-config): wire admin UI to LLM service — endpoint/key/model from DB
- _get_runtime_config(): reads active provider endpoint, api_key, main/mini model from app_settings (60s cache), falls back to env vars - get_azure_client() now async, accepts cfg dict - All generate_* methods call _get_runtime_config() per invocation so DB changes take effect without restart - app_settings: _seed_from_env() backfills empty endpoint/api_key from env vars on first load so the admin UI shows current values immediately Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
c98f6da6d2
commit
d92a099ade
2 changed files with 89 additions and 48 deletions
|
|
@ -32,8 +32,8 @@ DEFAULTS = {
|
|||
"id": "azure_openai",
|
||||
"name": "Azure OpenAI",
|
||||
"enabled": True,
|
||||
"endpoint": "",
|
||||
"api_key": "",
|
||||
"endpoint": "", # populated from env var on first load
|
||||
"api_key": "", # populated from env var on first load
|
||||
"models": [
|
||||
{"id": "gpt-5.4", "display_name": "GPT-5.4", "role": "main", "enabled": True},
|
||||
{"id": "gpt-5.4-mini", "display_name": "GPT-5.4 Mini", "role": "mini", "enabled": True},
|
||||
|
|
@ -43,6 +43,26 @@ DEFAULTS = {
|
|||
}
|
||||
|
||||
|
||||
def _seed_from_env(doc: dict) -> dict:
|
||||
"""Backfill endpoint/api_key from env vars when DB fields are still empty."""
|
||||
import os
|
||||
changed = False
|
||||
for p in doc.get("ai_providers", []):
|
||||
if not p.get("endpoint"):
|
||||
p["endpoint"] = os.environ.get("AZURE_AI_ENDPOINT", "")
|
||||
changed = True
|
||||
if not p.get("api_key"):
|
||||
p["api_key"] = os.environ.get("AZURE_AI_API_KEY", "")
|
||||
changed = True
|
||||
if not doc.get("active_main_model"):
|
||||
doc["active_main_model"] = os.environ.get("AZURE_AI_MODEL_MAIN", "gpt-5.4")
|
||||
changed = True
|
||||
if not doc.get("active_mini_model"):
|
||||
doc["active_mini_model"] = os.environ.get("AZURE_AI_MODEL_MINI", "gpt-5.4-mini")
|
||||
changed = True
|
||||
return doc if changed else doc
|
||||
|
||||
|
||||
async def get_settings() -> dict:
|
||||
global _cache, _cache_ts
|
||||
if _cache and (time.monotonic() - _cache_ts) < _CACHE_TTL:
|
||||
|
|
@ -60,6 +80,13 @@ async def get_settings() -> dict:
|
|||
await db.app_settings.update_one({"_id": "config"}, {"$set": missing})
|
||||
doc.update(missing)
|
||||
|
||||
# Backfill endpoint/api_key from env if still empty (first run after feature added)
|
||||
before = {p['id']: (p.get('endpoint'), p.get('api_key')) for p in doc.get('ai_providers', [])}
|
||||
_seed_from_env(doc)
|
||||
after = {p['id']: (p.get('endpoint'), p.get('api_key')) for p in doc.get('ai_providers', [])}
|
||||
if before != after:
|
||||
await db.app_settings.update_one({"_id": "config"}, {"$set": {"ai_providers": doc["ai_providers"]}})
|
||||
|
||||
_cache = doc
|
||||
_cache_ts = time.monotonic()
|
||||
return doc
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ def _require_env(key: str) -> str:
|
|||
return value
|
||||
|
||||
|
||||
# Env-var fallbacks (required at startup; DB overrides take effect within 60s)
|
||||
AZURE_AI_ENDPOINT = _require_env('AZURE_AI_ENDPOINT')
|
||||
AZURE_AI_API_KEY = _require_env('AZURE_AI_API_KEY')
|
||||
AZURE_MODEL_MAIN = os.environ.get('AZURE_AI_MODEL_MAIN', 'gpt-5.4')
|
||||
|
|
@ -44,40 +45,42 @@ MINI_FEATURES = frozenset({
|
|||
'audience_brief',
|
||||
})
|
||||
|
||||
DEFAULT_MODEL = AZURE_MODEL_MAIN
|
||||
|
||||
SUPPORTED_MODELS = {
|
||||
AZURE_MODEL_MAIN: 'azure',
|
||||
AZURE_MODEL_MINI: 'azure',
|
||||
}
|
||||
|
||||
# Legacy model IDs stored in the database — all map to the Azure main model
|
||||
MODEL_ALIASES = {
|
||||
'gemini-3.1-pro-preview': AZURE_MODEL_MAIN,
|
||||
'gemini-3-pro-preview': AZURE_MODEL_MAIN,
|
||||
'gpt-5.4-2026-03-05': AZURE_MODEL_MAIN,
|
||||
'gpt-5': AZURE_MODEL_MAIN,
|
||||
'gpt-5.2': AZURE_MODEL_MAIN,
|
||||
'gpt-4.1': AZURE_MODEL_MAIN,
|
||||
# Legacy model IDs stored in old documents — map to current main model
|
||||
_LEGACY_ALIASES = {
|
||||
'gemini-3.1-pro-preview', 'gemini-3-pro-preview',
|
||||
'gpt-5.4-2026-03-05', 'gpt-5', 'gpt-5.2', 'gpt-4.1',
|
||||
}
|
||||
|
||||
|
||||
def get_azure_client() -> AsyncOpenAI:
|
||||
"""Create a fresh Azure AI Foundry client for each call.
|
||||
async def _get_runtime_config() -> dict:
|
||||
"""Return active endpoint, api_key, main_model, mini_model.
|
||||
|
||||
Creating a new client per call avoids event-loop mismatch issues in ASGI
|
||||
environments where requests may arrive on different event loops. The
|
||||
overhead is negligible compared to the LLM API call itself.
|
||||
|
||||
The base URL must end with /v1/ so the SDK correctly appends operation
|
||||
paths (e.g. 'responses' → .../v1/responses).
|
||||
Reads from the DB app_settings (60s cache) and falls back to env vars
|
||||
when the DB field is empty or the DB is unreachable.
|
||||
"""
|
||||
base_url = AZURE_AI_ENDPOINT.rstrip('/') + '/'
|
||||
return AsyncOpenAI(
|
||||
base_url=base_url,
|
||||
api_key=AZURE_AI_API_KEY,
|
||||
timeout=600.0,
|
||||
)
|
||||
try:
|
||||
from app.models.app_settings import get_settings
|
||||
s = await get_settings()
|
||||
active_id = s.get('active_provider', 'azure_openai')
|
||||
providers = {p['id']: p for p in s.get('ai_providers', [])}
|
||||
p = providers.get(active_id, {})
|
||||
endpoint = p.get('endpoint') or AZURE_AI_ENDPOINT
|
||||
api_key = p.get('api_key') or AZURE_AI_API_KEY
|
||||
main_model = s.get('active_main_model') or AZURE_MODEL_MAIN
|
||||
mini_model = s.get('active_mini_model') or AZURE_MODEL_MINI
|
||||
except Exception:
|
||||
endpoint, api_key = AZURE_AI_ENDPOINT, AZURE_AI_API_KEY
|
||||
main_model, mini_model = AZURE_MODEL_MAIN, AZURE_MODEL_MINI
|
||||
return {'endpoint': endpoint, 'api_key': api_key,
|
||||
'main_model': main_model, 'mini_model': mini_model}
|
||||
|
||||
|
||||
async def get_azure_client(cfg: Optional[dict] = None) -> AsyncOpenAI:
|
||||
"""Create a fresh Azure AI Foundry client using runtime config (DB or env fallback)."""
|
||||
if cfg is None:
|
||||
cfg = await _get_runtime_config()
|
||||
base_url = cfg['endpoint'].rstrip('/') + '/'
|
||||
return AsyncOpenAI(base_url=base_url, api_key=cfg['api_key'], timeout=600.0)
|
||||
|
||||
|
||||
class LLMServiceError(Exception):
|
||||
|
|
@ -105,28 +108,33 @@ class LLMService:
|
|||
return result.strip()
|
||||
|
||||
@staticmethod
|
||||
def _resolve_model(model_name: Optional[str] = None) -> str:
|
||||
def _resolve_model(
|
||||
model_name: Optional[str] = None,
|
||||
main_model: Optional[str] = None,
|
||||
mini_model: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Resolve a model name, applying feature-based mini routing.
|
||||
|
||||
Resolution order:
|
||||
1. If model_name is one of the directly supported models, use it —
|
||||
but still override to mini when the current feature is a mini feature.
|
||||
2. If model_name is a legacy alias, resolve it, then apply mini routing.
|
||||
3. If model_name is None or unknown, auto-route by feature context.
|
||||
main_model / mini_model come from _get_runtime_config() so DB overrides
|
||||
take effect without a restart. Falls back to env-var globals when absent.
|
||||
"""
|
||||
# Determine base model from the explicit argument
|
||||
main = main_model or AZURE_MODEL_MAIN
|
||||
mini = mini_model or AZURE_MODEL_MINI
|
||||
|
||||
if model_name:
|
||||
resolved = MODEL_ALIASES.get(model_name, model_name)
|
||||
base = resolved if resolved in SUPPORTED_MODELS else DEFAULT_MODEL
|
||||
# Legacy aliases all collapse to the current main model
|
||||
base = mini if model_name == mini else (
|
||||
main if (model_name in _LEGACY_ALIASES or model_name == main) else main
|
||||
)
|
||||
else:
|
||||
base = DEFAULT_MODEL
|
||||
base = main
|
||||
|
||||
# Feature override: mini features always get the cheaper model
|
||||
try:
|
||||
from app.services.llm_usage_context import current_context
|
||||
ctx = current_context()
|
||||
if ctx.feature in MINI_FEATURES:
|
||||
return AZURE_MODEL_MINI
|
||||
return mini
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -273,7 +281,8 @@ class LLMService:
|
|||
raise
|
||||
pass
|
||||
|
||||
actual_model = LLMService._resolve_model(model_name)
|
||||
cfg = await _get_runtime_config()
|
||||
actual_model = LLMService._resolve_model(model_name, cfg['main_model'], cfg['mini_model'])
|
||||
_start_time = time.monotonic()
|
||||
|
||||
if system_prompt:
|
||||
|
|
@ -287,7 +296,8 @@ class LLMService:
|
|||
attempt_num = attempt + 1
|
||||
logger.debug(f"LLM generate_content attempt {attempt_num}/{max_retries} model={actual_model}")
|
||||
try:
|
||||
response = await get_azure_client().responses.create(**kwargs)
|
||||
client = await get_azure_client(cfg)
|
||||
response = await client.responses.create(**kwargs)
|
||||
result = LLMService._extract_responses_api_content(response)
|
||||
if attempt > 0:
|
||||
logger.info(f"LLM generate_content succeeded on attempt {attempt_num}/{max_retries}")
|
||||
|
|
@ -396,7 +406,8 @@ class LLMService:
|
|||
raise
|
||||
pass
|
||||
|
||||
actual_model = LLMService._resolve_model(model_name)
|
||||
cfg = await _get_runtime_config()
|
||||
actual_model = LLMService._resolve_model(model_name, cfg['main_model'], cfg['mini_model'])
|
||||
logger.info(f"generate_multimodal_content: {len(image_paths)} image(s), model={actual_model}")
|
||||
_start_time = time.monotonic()
|
||||
|
||||
|
|
@ -423,7 +434,8 @@ class LLMService:
|
|||
attempt_num = attempt + 1
|
||||
logger.debug(f"generate_multimodal_content attempt {attempt_num}/{max_retries}")
|
||||
try:
|
||||
response = await get_azure_client().responses.create(**kwargs)
|
||||
client = await get_azure_client(cfg)
|
||||
response = await client.responses.create(**kwargs)
|
||||
result = LLMService._extract_responses_api_content(response)
|
||||
if attempt > 0:
|
||||
logger.info(f"generate_multimodal_content succeeded on attempt {attempt_num}/{max_retries}")
|
||||
|
|
@ -510,7 +522,8 @@ class LLMService:
|
|||
)
|
||||
|
||||
# Multimodal path
|
||||
actual_model = LLMService._resolve_model(model_name)
|
||||
cfg = await _get_runtime_config()
|
||||
actual_model = LLMService._resolve_model(model_name, cfg['main_model'], cfg['mini_model'])
|
||||
max_retries = 3
|
||||
last_error = None
|
||||
_start_time = time.monotonic()
|
||||
|
|
@ -535,7 +548,8 @@ class LLMService:
|
|||
attempt_num = attempt + 1
|
||||
logger.debug(f"generate_contextual_response multimodal attempt {attempt_num}/{max_retries}")
|
||||
try:
|
||||
response = await get_azure_client().responses.create(**kwargs)
|
||||
client = await get_azure_client(cfg)
|
||||
response = await client.responses.create(**kwargs)
|
||||
result = LLMService._extract_responses_api_content(response)
|
||||
if attempt > 0:
|
||||
logger.info(f"generate_contextual_response succeeded on attempt {attempt_num}/{max_retries}")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue