- Model renames: gpt-5.2 → gpt-5.4-2026-03-05, gemini-3-pro-preview → gemini-3.1-pro-preview; retire gpt-4.1 via alias fallback - New: llm_usage_context.py (ContextVar-based attribution), model_pricing.py (tiered pricing + 60s cache), usage_event.py (append-only telemetry), quota.py (user/FG quota enforcement with 80% warning) - Wire _record_usage into all 3 LLM methods; set_llm_context at every service entry point - Fix admin_required decorator (was sync, never awaited User.find_by_id); add active_required and with_user_context decorators - Inject user_id into ContextVar from JWT on every authenticated request - Add DB indexes for usage_events, model_pricing, users collections - Seed script for model pricing (gpt-5.4 single-tier, gemini-3.1 two-tier 200k threshold) - Fix parse_json_response NameError (logger undefined at module level) - 70 passing tests: conftest.py with sys.modules stubs, test_usage_infrastructure.py (52 tests), rewrite stale test_llm_service.py (18 tests) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
91 lines
3.2 KiB
Python
91 lines
3.2 KiB
Python
from app.db import get_db
|
|
from datetime import datetime, timezone
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
VALID_FEATURES = {
|
|
"moderator", "persona_response", "persona_generate", "persona_modify",
|
|
"persona_export", "key_themes", "summary", "discussion_guide",
|
|
"image_description", "conversation_decision", "other",
|
|
}
|
|
|
|
|
|
class UsageEvent:
|
|
@staticmethod
|
|
async def record(
|
|
*,
|
|
provider: str,
|
|
model: str,
|
|
prompt_tokens: int,
|
|
completion_tokens: int,
|
|
cached_tokens: int = 0,
|
|
reasoning_tokens: int = 0,
|
|
cost_usd: dict,
|
|
price_snapshot_id=None,
|
|
duration_ms: int = 0,
|
|
retry_count: int = 0,
|
|
status: str = "success",
|
|
error: str | None = None,
|
|
is_estimated: bool = False,
|
|
estimate_method: str | None = None,
|
|
user_id: str | None = None,
|
|
focus_group_id: str | None = None,
|
|
persona_id: str | None = None,
|
|
feature: str = "other",
|
|
task_id: str | None = None,
|
|
) -> None:
|
|
"""Append one usage event doc. Never raises — telemetry must not kill LLM calls."""
|
|
try:
|
|
if feature not in VALID_FEATURES:
|
|
feature = "other"
|
|
|
|
doc = {
|
|
"ts": datetime.now(timezone.utc),
|
|
"user_id": user_id,
|
|
"focus_group_id": focus_group_id,
|
|
"persona_id": persona_id,
|
|
"task_id": task_id,
|
|
"feature": feature,
|
|
"provider": provider,
|
|
"model": model,
|
|
"prompt_tokens": prompt_tokens,
|
|
"completion_tokens": completion_tokens,
|
|
"cached_tokens": cached_tokens,
|
|
"reasoning_tokens": reasoning_tokens,
|
|
"total_tokens": prompt_tokens + completion_tokens,
|
|
"cost_usd": cost_usd,
|
|
"price_snapshot_id": price_snapshot_id,
|
|
"duration_ms": duration_ms,
|
|
"retry_count": retry_count,
|
|
"status": status,
|
|
"error": (error or "")[:500] if error else None,
|
|
"is_estimated": is_estimated,
|
|
"estimate_method": estimate_method,
|
|
}
|
|
|
|
db = await get_db()
|
|
await db.usage_events.insert_one(doc)
|
|
|
|
if not user_id or not focus_group_id:
|
|
logger.info(
|
|
f"Usage event recorded with partial context: user_id={user_id} "
|
|
f"focus_group_id={focus_group_id} feature={feature} model={model}"
|
|
)
|
|
except Exception:
|
|
logger.warning("Failed to record usage event (non-fatal)", exc_info=True)
|
|
|
|
@staticmethod
|
|
async def sum_cost(match: dict) -> float:
|
|
"""Return total cost_usd.total for the given match filter."""
|
|
try:
|
|
db = await get_db()
|
|
pipeline = [
|
|
{"$match": match},
|
|
{"$group": {"_id": None, "total": {"$sum": "$cost_usd.total"}}},
|
|
]
|
|
result = await db.usage_events.aggregate(pipeline).to_list(1)
|
|
return result[0]["total"] if result else 0.0
|
|
except Exception:
|
|
logger.warning("Failed to sum usage costs (non-fatal)", exc_info=True)
|
|
return 0.0
|