cohorta/backend/app/models/usage_event.py
Vadym Samoilenko 3e9ccafad2 Add LLM usage tracking infrastructure (Phases A-C)
- Model renames: gpt-5.2 → gpt-5.4-2026-03-05, gemini-3-pro-preview → gemini-3.1-pro-preview; retire gpt-4.1 via alias fallback
- New: llm_usage_context.py (ContextVar-based attribution), model_pricing.py (tiered pricing + 60s cache), usage_event.py (append-only telemetry), quota.py (user/FG quota enforcement with 80% warning)
- Wire _record_usage into all 3 LLM methods; set_llm_context at every service entry point
- Fix admin_required decorator (was sync, never awaited User.find_by_id); add active_required and with_user_context decorators
- Inject user_id into ContextVar from JWT on every authenticated request
- Add DB indexes for usage_events, model_pricing, users collections
- Seed script for model pricing (gpt-5.4 single-tier, gemini-3.1 two-tier 200k threshold)
- Fix parse_json_response NameError (logger undefined at module level)
- 70 passing tests: conftest.py with sys.modules stubs, test_usage_infrastructure.py (52 tests), rewrite stale test_llm_service.py (18 tests)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-24 18:08:27 +01:00

91 lines
3.2 KiB
Python

from app.db import get_db
from datetime import datetime, timezone
import logging
logger = logging.getLogger(__name__)
VALID_FEATURES = {
"moderator", "persona_response", "persona_generate", "persona_modify",
"persona_export", "key_themes", "summary", "discussion_guide",
"image_description", "conversation_decision", "other",
}
class UsageEvent:
@staticmethod
async def record(
*,
provider: str,
model: str,
prompt_tokens: int,
completion_tokens: int,
cached_tokens: int = 0,
reasoning_tokens: int = 0,
cost_usd: dict,
price_snapshot_id=None,
duration_ms: int = 0,
retry_count: int = 0,
status: str = "success",
error: str | None = None,
is_estimated: bool = False,
estimate_method: str | None = None,
user_id: str | None = None,
focus_group_id: str | None = None,
persona_id: str | None = None,
feature: str = "other",
task_id: str | None = None,
) -> None:
"""Append one usage event doc. Never raises — telemetry must not kill LLM calls."""
try:
if feature not in VALID_FEATURES:
feature = "other"
doc = {
"ts": datetime.now(timezone.utc),
"user_id": user_id,
"focus_group_id": focus_group_id,
"persona_id": persona_id,
"task_id": task_id,
"feature": feature,
"provider": provider,
"model": model,
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"cached_tokens": cached_tokens,
"reasoning_tokens": reasoning_tokens,
"total_tokens": prompt_tokens + completion_tokens,
"cost_usd": cost_usd,
"price_snapshot_id": price_snapshot_id,
"duration_ms": duration_ms,
"retry_count": retry_count,
"status": status,
"error": (error or "")[:500] if error else None,
"is_estimated": is_estimated,
"estimate_method": estimate_method,
}
db = await get_db()
await db.usage_events.insert_one(doc)
if not user_id or not focus_group_id:
logger.info(
f"Usage event recorded with partial context: user_id={user_id} "
f"focus_group_id={focus_group_id} feature={feature} model={model}"
)
except Exception:
logger.warning("Failed to record usage event (non-fatal)", exc_info=True)
@staticmethod
async def sum_cost(match: dict) -> float:
"""Return total cost_usd.total for the given match filter."""
try:
db = await get_db()
pipeline = [
{"$match": match},
{"$group": {"_id": None, "total": {"$sum": "$cost_usd.total"}}},
]
result = await db.usage_events.aggregate(pipeline).to_list(1)
return result[0]["total"] if result else 0.0
except Exception:
logger.warning("Failed to sum usage costs (non-fatal)", exc_info=True)
return 0.0