- Model renames: gpt-5.2 → gpt-5.4-2026-03-05, gemini-3-pro-preview → gemini-3.1-pro-preview; retire gpt-4.1 via alias fallback - New: llm_usage_context.py (ContextVar-based attribution), model_pricing.py (tiered pricing + 60s cache), usage_event.py (append-only telemetry), quota.py (user/FG quota enforcement with 80% warning) - Wire _record_usage into all 3 LLM methods; set_llm_context at every service entry point - Fix admin_required decorator (was sync, never awaited User.find_by_id); add active_required and with_user_context decorators - Inject user_id into ContextVar from JWT on every authenticated request - Add DB indexes for usage_events, model_pricing, users collections - Seed script for model pricing (gpt-5.4 single-tier, gemini-3.1 two-tier 200k threshold) - Fix parse_json_response NameError (logger undefined at module level) - 70 passing tests: conftest.py with sys.modules stubs, test_usage_infrastructure.py (52 tests), rewrite stale test_llm_service.py (18 tests) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
101 lines
3 KiB
Python
101 lines
3 KiB
Python
#!/usr/bin/env python3
|
|
"""Seed model pricing for Semblance.
|
|
|
|
Run from the backend/ directory:
|
|
source venv/bin/activate
|
|
python scripts/seed_model_pricing.py
|
|
|
|
Idempotent — upserts on {model, effective_from}. Safe to re-run.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from dotenv import load_dotenv
|
|
load_dotenv()
|
|
|
|
import pymongo
|
|
from datetime import datetime, timezone
|
|
|
|
MONGO_URI = os.environ.get("MONGO_URI")
|
|
MONGO_USER = os.environ.get("MONGO_USER")
|
|
MONGO_PASS = os.environ.get("MONGO_PASS")
|
|
MONGO_HOST = os.environ.get("MONGO_HOST", "localhost")
|
|
MONGO_PORT = os.environ.get("MONGO_PORT", "27017")
|
|
|
|
if not MONGO_URI:
|
|
if MONGO_USER and MONGO_PASS:
|
|
MONGO_URI = f"mongodb://{MONGO_USER}:{MONGO_PASS}@{MONGO_HOST}:{MONGO_PORT}/semblance_db?authSource=admin"
|
|
else:
|
|
MONGO_URI = f"mongodb://{MONGO_HOST}:{MONGO_PORT}"
|
|
|
|
# Pricing effective from project start — covers all historical backfill
|
|
EFFECTIVE_FROM = datetime(2024, 1, 1, tzinfo=timezone.utc)
|
|
|
|
PRICING_ROWS = [
|
|
{
|
|
"model": "gpt-5.4-2026-03-05",
|
|
"provider": "openai",
|
|
"currency": "USD",
|
|
"tiers": [
|
|
{
|
|
"threshold_input_tokens": 0,
|
|
"input_per_mtok": 2.50,
|
|
"cached_input_per_mtok": 0.25,
|
|
"output_per_mtok": 15.00,
|
|
"image_per_mtok": None,
|
|
}
|
|
],
|
|
"effective_from": EFFECTIVE_FROM,
|
|
"effective_until": None,
|
|
"notes": "gpt-5.4-2026-03-05 pricing as of 2026-04",
|
|
},
|
|
{
|
|
"model": "gemini-3.1-pro-preview",
|
|
"provider": "gemini",
|
|
"currency": "USD",
|
|
"tiers": [
|
|
{
|
|
"threshold_input_tokens": 0,
|
|
"input_per_mtok": 2.00,
|
|
"cached_input_per_mtok": None,
|
|
"output_per_mtok": 12.00,
|
|
"image_per_mtok": None,
|
|
},
|
|
{
|
|
"threshold_input_tokens": 200_000,
|
|
"input_per_mtok": 4.00,
|
|
"cached_input_per_mtok": None,
|
|
"output_per_mtok": 18.00,
|
|
"image_per_mtok": None,
|
|
},
|
|
],
|
|
"effective_from": EFFECTIVE_FROM,
|
|
"effective_until": None,
|
|
"notes": "gemini-3.1-pro-preview pricing: $2/$12 (<200k ctx), $4/$18 (>=200k ctx)",
|
|
},
|
|
]
|
|
|
|
|
|
def main():
|
|
client = pymongo.MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
|
|
db = client.semblance_db
|
|
|
|
db.model_pricing.create_index(
|
|
[("model", pymongo.ASCENDING), ("effective_from", pymongo.DESCENDING)],
|
|
background=True,
|
|
)
|
|
|
|
for row in PRICING_ROWS:
|
|
key = {"model": row["model"], "effective_from": row["effective_from"]}
|
|
result = db.model_pricing.update_one(key, {"$set": row}, upsert=True)
|
|
action = "inserted" if result.upserted_id else "updated"
|
|
print(f" {action}: {row['model']} (effective from {row['effective_from'].date()})")
|
|
|
|
print(f"\nDone. {len(PRICING_ROWS)} pricing rows seeded.")
|
|
client.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|