From d92a099adeb7533013db94faa7ded246322cda18 Mon Sep 17 00:00:00 2001
From: Vadym Samoilenko <vadymsamoilenko@oliver.agency>
Date: Mon, 25 May 2026 13:10:40 +0100
Subject: [PATCH] =?UTF-8?q?feat(ai-config):=20wire=20admin=20UI=20to=20LLM?=
 =?UTF-8?q?=20service=20=E2=80=94=20endpoint/key/model=20from=20DB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- _get_runtime_config(): reads active provider endpoint, api_key, main/mini
  model from app_settings (60s cache), falls back to env vars
- get_azure_client() now async, accepts cfg dict
- All generate_* methods call _get_runtime_config() per invocation so DB
  changes take effect without restart
- app_settings: _seed_from_env() backfills empty endpoint/api_key from env
  vars on first load so the admin UI shows current values immediately

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 backend/app/models/app_settings.py  |  31 +++++++-
 backend/app/services/llm_service.py | 106 ++++++++++++++++------------
 2 files changed, 89 insertions(+), 48 deletions(-)

diff --git a/backend/app/models/app_settings.py b/backend/app/models/app_settings.py
index 0358fe45..dc7f197e 100644
--- a/backend/app/models/app_settings.py
+++ b/backend/app/models/app_settings.py
@@ -32,8 +32,8 @@ DEFAULTS = {
             "id": "azure_openai",
             "name": "Azure OpenAI",
             "enabled": True,
-            "endpoint": "",
-            "api_key": "",
+            "endpoint": "",  # populated from env var on first load
+            "api_key": "",   # populated from env var on first load
             "models": [
                 {"id": "gpt-5.4",      "display_name": "GPT-5.4",      "role": "main", "enabled": True},
                 {"id": "gpt-5.4-mini", "display_name": "GPT-5.4 Mini", "role": "mini", "enabled": True},
@@ -43,6 +43,26 @@ DEFAULTS = {
 }
 
 
+def _seed_from_env(doc: dict) -> dict:
+    """Backfill endpoint/api_key from env vars when DB fields are still empty."""
+    import os
+    changed = False
+    for p in doc.get("ai_providers", []):
+        if not p.get("endpoint"):
+            p["endpoint"] = os.environ.get("AZURE_AI_ENDPOINT", "")
+            changed = True
+        if not p.get("api_key"):
+            p["api_key"] = os.environ.get("AZURE_AI_API_KEY", "")
+            changed = True
+    if not doc.get("active_main_model"):
+        doc["active_main_model"] = os.environ.get("AZURE_AI_MODEL_MAIN", "gpt-5.4")
+        changed = True
+    if not doc.get("active_mini_model"):
+        doc["active_mini_model"] = os.environ.get("AZURE_AI_MODEL_MINI", "gpt-5.4-mini")
+        changed = True
+    return doc if changed else doc
+
+
 async def get_settings() -> dict:
     global _cache, _cache_ts
     if _cache and (time.monotonic() - _cache_ts) < _CACHE_TTL:
@@ -60,6 +80,13 @@ async def get_settings() -> dict:
             await db.app_settings.update_one({"_id": "config"}, {"$set": missing})
             doc.update(missing)
 
+    # Backfill endpoint/api_key from env if still empty (first run after feature added)
+    before = {p['id']: (p.get('endpoint'), p.get('api_key')) for p in doc.get('ai_providers', [])}
+    _seed_from_env(doc)
+    after = {p['id']: (p.get('endpoint'), p.get('api_key')) for p in doc.get('ai_providers', [])}
+    if before != after:
+        await db.app_settings.update_one({"_id": "config"}, {"$set": {"ai_providers": doc["ai_providers"]}})
+
     _cache = doc
     _cache_ts = time.monotonic()
     return doc
diff --git a/backend/app/services/llm_service.py b/backend/app/services/llm_service.py
index fc787570..75b576b9 100755
--- a/backend/app/services/llm_service.py
+++ b/backend/app/services/llm_service.py
@@ -29,6 +29,7 @@ def _require_env(key: str) -> str:
     return value
 
 
+# Env-var fallbacks (required at startup; DB overrides take effect within 60s)
 AZURE_AI_ENDPOINT = _require_env('AZURE_AI_ENDPOINT')
 AZURE_AI_API_KEY = _require_env('AZURE_AI_API_KEY')
 AZURE_MODEL_MAIN = os.environ.get('AZURE_AI_MODEL_MAIN', 'gpt-5.4')
@@ -44,40 +45,42 @@ MINI_FEATURES = frozenset({
     'audience_brief',
 })
 
-DEFAULT_MODEL = AZURE_MODEL_MAIN
-
-SUPPORTED_MODELS = {
-    AZURE_MODEL_MAIN: 'azure',
-    AZURE_MODEL_MINI: 'azure',
-}
-
-# Legacy model IDs stored in the database — all map to the Azure main model
-MODEL_ALIASES = {
-    'gemini-3.1-pro-preview': AZURE_MODEL_MAIN,
-    'gemini-3-pro-preview': AZURE_MODEL_MAIN,
-    'gpt-5.4-2026-03-05': AZURE_MODEL_MAIN,
-    'gpt-5': AZURE_MODEL_MAIN,
-    'gpt-5.2': AZURE_MODEL_MAIN,
-    'gpt-4.1': AZURE_MODEL_MAIN,
+# Legacy model IDs stored in old documents — map to current main model
+_LEGACY_ALIASES = {
+    'gemini-3.1-pro-preview', 'gemini-3-pro-preview',
+    'gpt-5.4-2026-03-05', 'gpt-5', 'gpt-5.2', 'gpt-4.1',
 }
 
 
-def get_azure_client() -> AsyncOpenAI:
-    """Create a fresh Azure AI Foundry client for each call.
+async def _get_runtime_config() -> dict:
+    """Return active endpoint, api_key, main_model, mini_model.
 
-    Creating a new client per call avoids event-loop mismatch issues in ASGI
-    environments where requests may arrive on different event loops. The
-    overhead is negligible compared to the LLM API call itself.
-
-    The base URL must end with /v1/ so the SDK correctly appends operation
-    paths (e.g. 'responses' → .../v1/responses).
+    Reads from the DB app_settings (60s cache) and falls back to env vars
+    when the DB field is empty or the DB is unreachable.
     """
-    base_url = AZURE_AI_ENDPOINT.rstrip('/') + '/'
-    return AsyncOpenAI(
-        base_url=base_url,
-        api_key=AZURE_AI_API_KEY,
-        timeout=600.0,
-    )
+    try:
+        from app.models.app_settings import get_settings
+        s = await get_settings()
+        active_id = s.get('active_provider', 'azure_openai')
+        providers = {p['id']: p for p in s.get('ai_providers', [])}
+        p = providers.get(active_id, {})
+        endpoint = p.get('endpoint') or AZURE_AI_ENDPOINT
+        api_key = p.get('api_key') or AZURE_AI_API_KEY
+        main_model = s.get('active_main_model') or AZURE_MODEL_MAIN
+        mini_model = s.get('active_mini_model') or AZURE_MODEL_MINI
+    except Exception:
+        endpoint, api_key = AZURE_AI_ENDPOINT, AZURE_AI_API_KEY
+        main_model, mini_model = AZURE_MODEL_MAIN, AZURE_MODEL_MINI
+    return {'endpoint': endpoint, 'api_key': api_key,
+            'main_model': main_model, 'mini_model': mini_model}
+
+
+async def get_azure_client(cfg: Optional[dict] = None) -> AsyncOpenAI:
+    """Create a fresh Azure AI Foundry client using runtime config (DB or env fallback)."""
+    if cfg is None:
+        cfg = await _get_runtime_config()
+    base_url = cfg['endpoint'].rstrip('/') + '/'
+    return AsyncOpenAI(base_url=base_url, api_key=cfg['api_key'], timeout=600.0)
 
 
 class LLMServiceError(Exception):
@@ -105,28 +108,33 @@ class LLMService:
         return result.strip()
 
     @staticmethod
-    def _resolve_model(model_name: Optional[str] = None) -> str:
+    def _resolve_model(
+        model_name: Optional[str] = None,
+        main_model: Optional[str] = None,
+        mini_model: Optional[str] = None,
+    ) -> str:
         """Resolve a model name, applying feature-based mini routing.
 
-        Resolution order:
-        1. If model_name is one of the directly supported models, use it —
-           but still override to mini when the current feature is a mini feature.
-        2. If model_name is a legacy alias, resolve it, then apply mini routing.
-        3. If model_name is None or unknown, auto-route by feature context.
+        main_model / mini_model come from _get_runtime_config() so DB overrides
+        take effect without a restart. Falls back to env-var globals when absent.
         """
-        # Determine base model from the explicit argument
+        main = main_model or AZURE_MODEL_MAIN
+        mini = mini_model or AZURE_MODEL_MINI
+
         if model_name:
-            resolved = MODEL_ALIASES.get(model_name, model_name)
-            base = resolved if resolved in SUPPORTED_MODELS else DEFAULT_MODEL
+            # Legacy aliases all collapse to the current main model
+            base = mini if model_name == mini else (
+                main if (model_name in _LEGACY_ALIASES or model_name == main) else main
+            )
         else:
-            base = DEFAULT_MODEL
+            base = main
 
         # Feature override: mini features always get the cheaper model
         try:
             from app.services.llm_usage_context import current_context
             ctx = current_context()
             if ctx.feature in MINI_FEATURES:
-                return AZURE_MODEL_MINI
+                return mini
         except Exception:
             pass
 
@@ -273,7 +281,8 @@ class LLMService:
                 raise
             pass
 
-        actual_model = LLMService._resolve_model(model_name)
+        cfg = await _get_runtime_config()
+        actual_model = LLMService._resolve_model(model_name, cfg['main_model'], cfg['mini_model'])
         _start_time = time.monotonic()
 
         if system_prompt:
@@ -287,7 +296,8 @@ class LLMService:
             attempt_num = attempt + 1
             logger.debug(f"LLM generate_content attempt {attempt_num}/{max_retries} model={actual_model}")
             try:
-                response = await get_azure_client().responses.create(**kwargs)
+                client = await get_azure_client(cfg)
+                response = await client.responses.create(**kwargs)
                 result = LLMService._extract_responses_api_content(response)
                 if attempt > 0:
                     logger.info(f"LLM generate_content succeeded on attempt {attempt_num}/{max_retries}")
@@ -396,7 +406,8 @@ class LLMService:
                 raise
             pass
 
-        actual_model = LLMService._resolve_model(model_name)
+        cfg = await _get_runtime_config()
+        actual_model = LLMService._resolve_model(model_name, cfg['main_model'], cfg['mini_model'])
         logger.info(f"generate_multimodal_content: {len(image_paths)} image(s), model={actual_model}")
         _start_time = time.monotonic()
 
@@ -423,7 +434,8 @@ class LLMService:
             attempt_num = attempt + 1
             logger.debug(f"generate_multimodal_content attempt {attempt_num}/{max_retries}")
             try:
-                response = await get_azure_client().responses.create(**kwargs)
+                client = await get_azure_client(cfg)
+                response = await client.responses.create(**kwargs)
                 result = LLMService._extract_responses_api_content(response)
                 if attempt > 0:
                     logger.info(f"generate_multimodal_content succeeded on attempt {attempt_num}/{max_retries}")
@@ -510,7 +522,8 @@ class LLMService:
             )
 
         # Multimodal path
-        actual_model = LLMService._resolve_model(model_name)
+        cfg = await _get_runtime_config()
+        actual_model = LLMService._resolve_model(model_name, cfg['main_model'], cfg['mini_model'])
         max_retries = 3
         last_error = None
         _start_time = time.monotonic()
@@ -535,7 +548,8 @@ class LLMService:
             attempt_num = attempt + 1
             logger.debug(f"generate_contextual_response multimodal attempt {attempt_num}/{max_retries}")
             try:
-                response = await get_azure_client().responses.create(**kwargs)
+                client = await get_azure_client(cfg)
+                response = await client.responses.create(**kwargs)
                 result = LLMService._extract_responses_api_content(response)
                 if attempt > 0:
                     logger.info(f"generate_contextual_response succeeded on attempt {attempt_num}/{max_retries}")