From d92a099adeb7533013db94faa7ded246322cda18 Mon Sep 17 00:00:00 2001 From: Vadym Samoilenko Date: Mon, 25 May 2026 13:10:40 +0100 Subject: [PATCH] =?UTF-8?q?feat(ai-config):=20wire=20admin=20UI=20to=20LLM?= =?UTF-8?q?=20service=20=E2=80=94=20endpoint/key/model=20from=20DB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - _get_runtime_config(): reads active provider endpoint, api_key, main/mini model from app_settings (60s cache), falls back to env vars - get_azure_client() now async, accepts cfg dict - All generate_* methods call _get_runtime_config() per invocation so DB changes take effect without restart - app_settings: _seed_from_env() backfills empty endpoint/api_key from env vars on first load so the admin UI shows current values immediately Co-Authored-By: Claude Sonnet 4.6 --- backend/app/models/app_settings.py | 31 +++++++- backend/app/services/llm_service.py | 106 ++++++++++++++++------------ 2 files changed, 89 insertions(+), 48 deletions(-) diff --git a/backend/app/models/app_settings.py b/backend/app/models/app_settings.py index 0358fe45..dc7f197e 100644 --- a/backend/app/models/app_settings.py +++ b/backend/app/models/app_settings.py @@ -32,8 +32,8 @@ DEFAULTS = { "id": "azure_openai", "name": "Azure OpenAI", "enabled": True, - "endpoint": "", - "api_key": "", + "endpoint": "", # populated from env var on first load + "api_key": "", # populated from env var on first load "models": [ {"id": "gpt-5.4", "display_name": "GPT-5.4", "role": "main", "enabled": True}, {"id": "gpt-5.4-mini", "display_name": "GPT-5.4 Mini", "role": "mini", "enabled": True}, @@ -43,6 +43,26 @@ DEFAULTS = { } +def _seed_from_env(doc: dict) -> dict: + """Backfill endpoint/api_key from env vars when DB fields are still empty.""" + import os + changed = False + for p in doc.get("ai_providers", []): + if not p.get("endpoint"): + p["endpoint"] = os.environ.get("AZURE_AI_ENDPOINT", "") + changed = True + if not p.get("api_key"): + p["api_key"] = os.environ.get("AZURE_AI_API_KEY", "") + changed = True + if not doc.get("active_main_model"): + doc["active_main_model"] = os.environ.get("AZURE_AI_MODEL_MAIN", "gpt-5.4") + changed = True + if not doc.get("active_mini_model"): + doc["active_mini_model"] = os.environ.get("AZURE_AI_MODEL_MINI", "gpt-5.4-mini") + changed = True + return doc if changed else doc + + async def get_settings() -> dict: global _cache, _cache_ts if _cache and (time.monotonic() - _cache_ts) < _CACHE_TTL: @@ -60,6 +80,13 @@ async def get_settings() -> dict: await db.app_settings.update_one({"_id": "config"}, {"$set": missing}) doc.update(missing) + # Backfill endpoint/api_key from env if still empty (first run after feature added) + before = {p['id']: (p.get('endpoint'), p.get('api_key')) for p in doc.get('ai_providers', [])} + _seed_from_env(doc) + after = {p['id']: (p.get('endpoint'), p.get('api_key')) for p in doc.get('ai_providers', [])} + if before != after: + await db.app_settings.update_one({"_id": "config"}, {"$set": {"ai_providers": doc["ai_providers"]}}) + _cache = doc _cache_ts = time.monotonic() return doc diff --git a/backend/app/services/llm_service.py b/backend/app/services/llm_service.py index fc787570..75b576b9 100755 --- a/backend/app/services/llm_service.py +++ b/backend/app/services/llm_service.py @@ -29,6 +29,7 @@ def _require_env(key: str) -> str: return value +# Env-var fallbacks (required at startup; DB overrides take effect within 60s) AZURE_AI_ENDPOINT = _require_env('AZURE_AI_ENDPOINT') AZURE_AI_API_KEY = _require_env('AZURE_AI_API_KEY') AZURE_MODEL_MAIN = os.environ.get('AZURE_AI_MODEL_MAIN', 'gpt-5.4') @@ -44,40 +45,42 @@ MINI_FEATURES = frozenset({ 'audience_brief', }) -DEFAULT_MODEL = AZURE_MODEL_MAIN - -SUPPORTED_MODELS = { - AZURE_MODEL_MAIN: 'azure', - AZURE_MODEL_MINI: 'azure', -} - -# Legacy model IDs stored in the database — all map to the Azure main model -MODEL_ALIASES = { - 'gemini-3.1-pro-preview': AZURE_MODEL_MAIN, - 'gemini-3-pro-preview': AZURE_MODEL_MAIN, - 'gpt-5.4-2026-03-05': AZURE_MODEL_MAIN, - 'gpt-5': AZURE_MODEL_MAIN, - 'gpt-5.2': AZURE_MODEL_MAIN, - 'gpt-4.1': AZURE_MODEL_MAIN, +# Legacy model IDs stored in old documents — map to current main model +_LEGACY_ALIASES = { + 'gemini-3.1-pro-preview', 'gemini-3-pro-preview', + 'gpt-5.4-2026-03-05', 'gpt-5', 'gpt-5.2', 'gpt-4.1', } -def get_azure_client() -> AsyncOpenAI: - """Create a fresh Azure AI Foundry client for each call. +async def _get_runtime_config() -> dict: + """Return active endpoint, api_key, main_model, mini_model. - Creating a new client per call avoids event-loop mismatch issues in ASGI - environments where requests may arrive on different event loops. The - overhead is negligible compared to the LLM API call itself. - - The base URL must end with /v1/ so the SDK correctly appends operation - paths (e.g. 'responses' → .../v1/responses). + Reads from the DB app_settings (60s cache) and falls back to env vars + when the DB field is empty or the DB is unreachable. """ - base_url = AZURE_AI_ENDPOINT.rstrip('/') + '/' - return AsyncOpenAI( - base_url=base_url, - api_key=AZURE_AI_API_KEY, - timeout=600.0, - ) + try: + from app.models.app_settings import get_settings + s = await get_settings() + active_id = s.get('active_provider', 'azure_openai') + providers = {p['id']: p for p in s.get('ai_providers', [])} + p = providers.get(active_id, {}) + endpoint = p.get('endpoint') or AZURE_AI_ENDPOINT + api_key = p.get('api_key') or AZURE_AI_API_KEY + main_model = s.get('active_main_model') or AZURE_MODEL_MAIN + mini_model = s.get('active_mini_model') or AZURE_MODEL_MINI + except Exception: + endpoint, api_key = AZURE_AI_ENDPOINT, AZURE_AI_API_KEY + main_model, mini_model = AZURE_MODEL_MAIN, AZURE_MODEL_MINI + return {'endpoint': endpoint, 'api_key': api_key, + 'main_model': main_model, 'mini_model': mini_model} + + +async def get_azure_client(cfg: Optional[dict] = None) -> AsyncOpenAI: + """Create a fresh Azure AI Foundry client using runtime config (DB or env fallback).""" + if cfg is None: + cfg = await _get_runtime_config() + base_url = cfg['endpoint'].rstrip('/') + '/' + return AsyncOpenAI(base_url=base_url, api_key=cfg['api_key'], timeout=600.0) class LLMServiceError(Exception): @@ -105,28 +108,33 @@ class LLMService: return result.strip() @staticmethod - def _resolve_model(model_name: Optional[str] = None) -> str: + def _resolve_model( + model_name: Optional[str] = None, + main_model: Optional[str] = None, + mini_model: Optional[str] = None, + ) -> str: """Resolve a model name, applying feature-based mini routing. - Resolution order: - 1. If model_name is one of the directly supported models, use it — - but still override to mini when the current feature is a mini feature. - 2. If model_name is a legacy alias, resolve it, then apply mini routing. - 3. If model_name is None or unknown, auto-route by feature context. + main_model / mini_model come from _get_runtime_config() so DB overrides + take effect without a restart. Falls back to env-var globals when absent. """ - # Determine base model from the explicit argument + main = main_model or AZURE_MODEL_MAIN + mini = mini_model or AZURE_MODEL_MINI + if model_name: - resolved = MODEL_ALIASES.get(model_name, model_name) - base = resolved if resolved in SUPPORTED_MODELS else DEFAULT_MODEL + # Legacy aliases all collapse to the current main model + base = mini if model_name == mini else ( + main if (model_name in _LEGACY_ALIASES or model_name == main) else main + ) else: - base = DEFAULT_MODEL + base = main # Feature override: mini features always get the cheaper model try: from app.services.llm_usage_context import current_context ctx = current_context() if ctx.feature in MINI_FEATURES: - return AZURE_MODEL_MINI + return mini except Exception: pass @@ -273,7 +281,8 @@ class LLMService: raise pass - actual_model = LLMService._resolve_model(model_name) + cfg = await _get_runtime_config() + actual_model = LLMService._resolve_model(model_name, cfg['main_model'], cfg['mini_model']) _start_time = time.monotonic() if system_prompt: @@ -287,7 +296,8 @@ class LLMService: attempt_num = attempt + 1 logger.debug(f"LLM generate_content attempt {attempt_num}/{max_retries} model={actual_model}") try: - response = await get_azure_client().responses.create(**kwargs) + client = await get_azure_client(cfg) + response = await client.responses.create(**kwargs) result = LLMService._extract_responses_api_content(response) if attempt > 0: logger.info(f"LLM generate_content succeeded on attempt {attempt_num}/{max_retries}") @@ -396,7 +406,8 @@ class LLMService: raise pass - actual_model = LLMService._resolve_model(model_name) + cfg = await _get_runtime_config() + actual_model = LLMService._resolve_model(model_name, cfg['main_model'], cfg['mini_model']) logger.info(f"generate_multimodal_content: {len(image_paths)} image(s), model={actual_model}") _start_time = time.monotonic() @@ -423,7 +434,8 @@ class LLMService: attempt_num = attempt + 1 logger.debug(f"generate_multimodal_content attempt {attempt_num}/{max_retries}") try: - response = await get_azure_client().responses.create(**kwargs) + client = await get_azure_client(cfg) + response = await client.responses.create(**kwargs) result = LLMService._extract_responses_api_content(response) if attempt > 0: logger.info(f"generate_multimodal_content succeeded on attempt {attempt_num}/{max_retries}") @@ -510,7 +522,8 @@ class LLMService: ) # Multimodal path - actual_model = LLMService._resolve_model(model_name) + cfg = await _get_runtime_config() + actual_model = LLMService._resolve_model(model_name, cfg['main_model'], cfg['mini_model']) max_retries = 3 last_error = None _start_time = time.monotonic() @@ -535,7 +548,8 @@ class LLMService: attempt_num = attempt + 1 logger.debug(f"generate_contextual_response multimodal attempt {attempt_num}/{max_retries}") try: - response = await get_azure_client().responses.create(**kwargs) + client = await get_azure_client(cfg) + response = await client.responses.create(**kwargs) result = LLMService._extract_responses_api_content(response) if attempt > 0: logger.info(f"generate_contextual_response succeeded on attempt {attempt_num}/{max_retries}")