fix: pricing engine unit keys and google→vertex_ai provider alias

- compute_total_cost: read token_input/token_output/char (new keys) with fallback to old input_tokens/output_tokens/chars for compat - _PROVIDER_ALIAS: google/gemini → vertex_ai-language-models - _infer_provider: gemini → vertex_ai-language-models Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-27 14:41:14 +01:00 · 2026-04-27 14:41:14 +01:00 · 2822e9cb99
commit 2822e9cb99
parent 36ce10eb50
2 changed files with 13 additions and 5 deletions
--- a/backend/app/api/v1/routes_public.py
+++ b/backend/app/api/v1/routes_public.py
@ -121,6 +121,7 @@ class RecordResponse(BaseModel):


@router.post("/usage/record", response_model=RecordResponse)
+@router.post("/record", response_model=RecordResponse, include_in_schema=False)
 async def record_usage(
    body: RecordRequest,
    request: Request,
@ -324,7 +325,7 @@ async def health(db: AsyncIOMotorDatabase = Depends(get_db)):
 def _infer_provider(model: str) -> str:
    model_lower = model.lower()
    if "gemini" in model_lower:
-        return "google"
+        return "vertex_ai-language-models"
    if "gpt" in model_lower or "o1" in model_lower or "o3" in model_lower:
        return "openai"
    if "claude" in model_lower:
--- a/backend/app/services/pricing_engine.py
+++ b/backend/app/services/pricing_engine.py
@ -114,6 +114,12 @@ async def compute_cost(
    return round(cost, 8), price_id


+_PROVIDER_ALIAS = {
+    "google": "vertex_ai-language-models",
+    "gemini": "vertex_ai-language-models",
+}
+
+
 async def compute_total_cost(
    db: AsyncIOMotorDatabase,
    provider: str,
@ -125,6 +131,7 @@ async def compute_total_cost(
    if ts is None:
        ts = datetime.now(timezone.utc)
    ts_date = ts.date().isoformat()
+    provider = _PROVIDER_ALIAS.get(provider, provider)

    cursor = db.model_prices.find({
        "provider": provider,
@ -149,13 +156,13 @@ async def compute_total_cost(
        ppu = rec["price_per_unit_usd"]
        price_id = str(rec["_id"])
        if u == "token_input":
-            total_cost += ppu * units.get("input_tokens", 0)
+            total_cost += ppu * units.get("token_input", units.get("input_tokens", 0))
        elif u == "token_output":
-            total_cost += ppu * units.get("output_tokens", 0)
+            total_cost += ppu * units.get("token_output", units.get("output_tokens", 0))
        elif u == "char":
-            total_cost += ppu * units.get("chars", 0)
+            total_cost += ppu * units.get("char", units.get("chars", 0))
        elif u == "second":
-            total_cost += ppu * units.get("seconds", 0)
+            total_cost += ppu * units.get("second", units.get("seconds", 0))
        elif u == "request":
            total_cost += ppu