feat(ai-config): wire admin UI to LLM service — endpoint/key/model from DB

- _get_runtime_config(): reads active provider endpoint, api_key, main/mini
  model from app_settings (60s cache), falls back to env vars
- get_azure_client() now async, accepts cfg dict
- All generate_* methods call _get_runtime_config() per invocation so DB
  changes take effect without restart
- app_settings: _seed_from_env() backfills empty endpoint/api_key from env
  vars on first load so the admin UI shows current values immediately

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-05-25 13:10:40 +01:00
parent c98f6da6d2
commit d92a099ade
2 changed files with 89 additions and 48 deletions

View file

@ -32,8 +32,8 @@ DEFAULTS = {
"id": "azure_openai",
"name": "Azure OpenAI",
"enabled": True,
"endpoint": "",
"api_key": "",
"endpoint": "", # populated from env var on first load
"api_key": "", # populated from env var on first load
"models": [
{"id": "gpt-5.4", "display_name": "GPT-5.4", "role": "main", "enabled": True},
{"id": "gpt-5.4-mini", "display_name": "GPT-5.4 Mini", "role": "mini", "enabled": True},
@ -43,6 +43,26 @@ DEFAULTS = {
}
def _seed_from_env(doc: dict) -> dict:
"""Backfill endpoint/api_key from env vars when DB fields are still empty."""
import os
changed = False
for p in doc.get("ai_providers", []):
if not p.get("endpoint"):
p["endpoint"] = os.environ.get("AZURE_AI_ENDPOINT", "")
changed = True
if not p.get("api_key"):
p["api_key"] = os.environ.get("AZURE_AI_API_KEY", "")
changed = True
if not doc.get("active_main_model"):
doc["active_main_model"] = os.environ.get("AZURE_AI_MODEL_MAIN", "gpt-5.4")
changed = True
if not doc.get("active_mini_model"):
doc["active_mini_model"] = os.environ.get("AZURE_AI_MODEL_MINI", "gpt-5.4-mini")
changed = True
return doc if changed else doc
async def get_settings() -> dict:
global _cache, _cache_ts
if _cache and (time.monotonic() - _cache_ts) < _CACHE_TTL:
@ -60,6 +80,13 @@ async def get_settings() -> dict:
await db.app_settings.update_one({"_id": "config"}, {"$set": missing})
doc.update(missing)
# Backfill endpoint/api_key from env if still empty (first run after feature added)
before = {p['id']: (p.get('endpoint'), p.get('api_key')) for p in doc.get('ai_providers', [])}
_seed_from_env(doc)
after = {p['id']: (p.get('endpoint'), p.get('api_key')) for p in doc.get('ai_providers', [])}
if before != after:
await db.app_settings.update_one({"_id": "config"}, {"$set": {"ai_providers": doc["ai_providers"]}})
_cache = doc
_cache_ts = time.monotonic()
return doc

View file

@ -29,6 +29,7 @@ def _require_env(key: str) -> str:
return value
# Env-var fallbacks (required at startup; DB overrides take effect within 60s)
AZURE_AI_ENDPOINT = _require_env('AZURE_AI_ENDPOINT')
AZURE_AI_API_KEY = _require_env('AZURE_AI_API_KEY')
AZURE_MODEL_MAIN = os.environ.get('AZURE_AI_MODEL_MAIN', 'gpt-5.4')
@ -44,40 +45,42 @@ MINI_FEATURES = frozenset({
'audience_brief',
})
DEFAULT_MODEL = AZURE_MODEL_MAIN
SUPPORTED_MODELS = {
AZURE_MODEL_MAIN: 'azure',
AZURE_MODEL_MINI: 'azure',
}
# Legacy model IDs stored in the database — all map to the Azure main model
MODEL_ALIASES = {
'gemini-3.1-pro-preview': AZURE_MODEL_MAIN,
'gemini-3-pro-preview': AZURE_MODEL_MAIN,
'gpt-5.4-2026-03-05': AZURE_MODEL_MAIN,
'gpt-5': AZURE_MODEL_MAIN,
'gpt-5.2': AZURE_MODEL_MAIN,
'gpt-4.1': AZURE_MODEL_MAIN,
# Legacy model IDs stored in old documents — map to current main model
_LEGACY_ALIASES = {
'gemini-3.1-pro-preview', 'gemini-3-pro-preview',
'gpt-5.4-2026-03-05', 'gpt-5', 'gpt-5.2', 'gpt-4.1',
}
def get_azure_client() -> AsyncOpenAI:
"""Create a fresh Azure AI Foundry client for each call.
async def _get_runtime_config() -> dict:
"""Return active endpoint, api_key, main_model, mini_model.
Creating a new client per call avoids event-loop mismatch issues in ASGI
environments where requests may arrive on different event loops. The
overhead is negligible compared to the LLM API call itself.
The base URL must end with /v1/ so the SDK correctly appends operation
paths (e.g. 'responses' .../v1/responses).
Reads from the DB app_settings (60s cache) and falls back to env vars
when the DB field is empty or the DB is unreachable.
"""
base_url = AZURE_AI_ENDPOINT.rstrip('/') + '/'
return AsyncOpenAI(
base_url=base_url,
api_key=AZURE_AI_API_KEY,
timeout=600.0,
)
try:
from app.models.app_settings import get_settings
s = await get_settings()
active_id = s.get('active_provider', 'azure_openai')
providers = {p['id']: p for p in s.get('ai_providers', [])}
p = providers.get(active_id, {})
endpoint = p.get('endpoint') or AZURE_AI_ENDPOINT
api_key = p.get('api_key') or AZURE_AI_API_KEY
main_model = s.get('active_main_model') or AZURE_MODEL_MAIN
mini_model = s.get('active_mini_model') or AZURE_MODEL_MINI
except Exception:
endpoint, api_key = AZURE_AI_ENDPOINT, AZURE_AI_API_KEY
main_model, mini_model = AZURE_MODEL_MAIN, AZURE_MODEL_MINI
return {'endpoint': endpoint, 'api_key': api_key,
'main_model': main_model, 'mini_model': mini_model}
async def get_azure_client(cfg: Optional[dict] = None) -> AsyncOpenAI:
"""Create a fresh Azure AI Foundry client using runtime config (DB or env fallback)."""
if cfg is None:
cfg = await _get_runtime_config()
base_url = cfg['endpoint'].rstrip('/') + '/'
return AsyncOpenAI(base_url=base_url, api_key=cfg['api_key'], timeout=600.0)
class LLMServiceError(Exception):
@ -105,28 +108,33 @@ class LLMService:
return result.strip()
@staticmethod
def _resolve_model(model_name: Optional[str] = None) -> str:
def _resolve_model(
model_name: Optional[str] = None,
main_model: Optional[str] = None,
mini_model: Optional[str] = None,
) -> str:
"""Resolve a model name, applying feature-based mini routing.
Resolution order:
1. If model_name is one of the directly supported models, use it
but still override to mini when the current feature is a mini feature.
2. If model_name is a legacy alias, resolve it, then apply mini routing.
3. If model_name is None or unknown, auto-route by feature context.
main_model / mini_model come from _get_runtime_config() so DB overrides
take effect without a restart. Falls back to env-var globals when absent.
"""
# Determine base model from the explicit argument
main = main_model or AZURE_MODEL_MAIN
mini = mini_model or AZURE_MODEL_MINI
if model_name:
resolved = MODEL_ALIASES.get(model_name, model_name)
base = resolved if resolved in SUPPORTED_MODELS else DEFAULT_MODEL
# Legacy aliases all collapse to the current main model
base = mini if model_name == mini else (
main if (model_name in _LEGACY_ALIASES or model_name == main) else main
)
else:
base = DEFAULT_MODEL
base = main
# Feature override: mini features always get the cheaper model
try:
from app.services.llm_usage_context import current_context
ctx = current_context()
if ctx.feature in MINI_FEATURES:
return AZURE_MODEL_MINI
return mini
except Exception:
pass
@ -273,7 +281,8 @@ class LLMService:
raise
pass
actual_model = LLMService._resolve_model(model_name)
cfg = await _get_runtime_config()
actual_model = LLMService._resolve_model(model_name, cfg['main_model'], cfg['mini_model'])
_start_time = time.monotonic()
if system_prompt:
@ -287,7 +296,8 @@ class LLMService:
attempt_num = attempt + 1
logger.debug(f"LLM generate_content attempt {attempt_num}/{max_retries} model={actual_model}")
try:
response = await get_azure_client().responses.create(**kwargs)
client = await get_azure_client(cfg)
response = await client.responses.create(**kwargs)
result = LLMService._extract_responses_api_content(response)
if attempt > 0:
logger.info(f"LLM generate_content succeeded on attempt {attempt_num}/{max_retries}")
@ -396,7 +406,8 @@ class LLMService:
raise
pass
actual_model = LLMService._resolve_model(model_name)
cfg = await _get_runtime_config()
actual_model = LLMService._resolve_model(model_name, cfg['main_model'], cfg['mini_model'])
logger.info(f"generate_multimodal_content: {len(image_paths)} image(s), model={actual_model}")
_start_time = time.monotonic()
@ -423,7 +434,8 @@ class LLMService:
attempt_num = attempt + 1
logger.debug(f"generate_multimodal_content attempt {attempt_num}/{max_retries}")
try:
response = await get_azure_client().responses.create(**kwargs)
client = await get_azure_client(cfg)
response = await client.responses.create(**kwargs)
result = LLMService._extract_responses_api_content(response)
if attempt > 0:
logger.info(f"generate_multimodal_content succeeded on attempt {attempt_num}/{max_retries}")
@ -510,7 +522,8 @@ class LLMService:
)
# Multimodal path
actual_model = LLMService._resolve_model(model_name)
cfg = await _get_runtime_config()
actual_model = LLMService._resolve_model(model_name, cfg['main_model'], cfg['mini_model'])
max_retries = 3
last_error = None
_start_time = time.monotonic()
@ -535,7 +548,8 @@ class LLMService:
attempt_num = attempt + 1
logger.debug(f"generate_contextual_response multimodal attempt {attempt_num}/{max_retries}")
try:
response = await get_azure_client().responses.create(**kwargs)
client = await get_azure_client(cfg)
response = await client.responses.create(**kwargs)
result = LLMService._extract_responses_api_content(response)
if attempt > 0:
logger.info(f"generate_contextual_response succeeded on attempt {attempt_num}/{max_retries}")