- Model renames: gpt-5.2 → gpt-5.4-2026-03-05, gemini-3-pro-preview → gemini-3.1-pro-preview; retire gpt-4.1 via alias fallback - New: llm_usage_context.py (ContextVar-based attribution), model_pricing.py (tiered pricing + 60s cache), usage_event.py (append-only telemetry), quota.py (user/FG quota enforcement with 80% warning) - Wire _record_usage into all 3 LLM methods; set_llm_context at every service entry point - Fix admin_required decorator (was sync, never awaited User.find_by_id); add active_required and with_user_context decorators - Inject user_id into ContextVar from JWT on every authenticated request - Add DB indexes for usage_events, model_pricing, users collections - Seed script for model pricing (gpt-5.4 single-tier, gemini-3.1 two-tier 200k threshold) - Fix parse_json_response NameError (logger undefined at module level) - 70 passing tests: conftest.py with sys.modules stubs, test_usage_infrastructure.py (52 tests), rewrite stale test_llm_service.py (18 tests) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
54 lines
1.7 KiB
Python
54 lines
1.7 KiB
Python
from contextvars import ContextVar
|
|
from dataclasses import dataclass, replace
|
|
from contextlib import contextmanager
|
|
from typing import Optional
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class LLMCallContext:
|
|
user_id: Optional[str] = None
|
|
focus_group_id: Optional[str] = None
|
|
persona_id: Optional[str] = None
|
|
feature: str = "other"
|
|
task_id: Optional[str] = None
|
|
|
|
|
|
_ctx: ContextVar[LLMCallContext] = ContextVar("llm_call_context", default=LLMCallContext())
|
|
|
|
|
|
def current_context() -> LLMCallContext:
|
|
return _ctx.get()
|
|
|
|
|
|
def set_llm_context(**overrides) -> None:
|
|
"""Mutate the LLM context for the current asyncio task without cleanup.
|
|
|
|
Use this at service entry points where the feature/focus_group_id/persona_id
|
|
should persist for the duration of the whole async call tree (including sub-awaits).
|
|
The change lives until the asyncio Task ends or is overridden again.
|
|
|
|
Unlike llm_context(), this does NOT restore the previous value on exit — suitable
|
|
for top-level service calls, not for re-entrant helpers.
|
|
"""
|
|
prev = _ctx.get()
|
|
_ctx.set(replace(prev, **overrides))
|
|
|
|
|
|
@contextmanager
|
|
def llm_context(**overrides):
|
|
"""Context manager that sets LLM call attribution metadata.
|
|
|
|
Usage:
|
|
with llm_context(user_id="abc", focus_group_id="xyz", feature="moderator"):
|
|
await LLMService.generate_content(...)
|
|
|
|
Overrides stack — inner contexts extend (not replace) outer ones.
|
|
Safe across asyncio tasks and run_coroutine_threadsafe hops because
|
|
ContextVar inherits context on task creation / thread submission.
|
|
"""
|
|
prev = _ctx.get()
|
|
token = _ctx.set(replace(prev, **overrides))
|
|
try:
|
|
yield
|
|
finally:
|
|
_ctx.reset(token)
|