gmal-scope-builder/backend/app/utils/claude_client.py
DJP e18976fdb2 Initial commit - GMAL Scope Builder
Dockerized web app (FastAPI + React + PostgreSQL) for scoping client ratecards
against the GMAL master asset database. Features:
- GMAL data ingestion from Excel (390 assets, 120 roles, 5 model types)
- AI-powered document parsing and asset extraction (Claude Opus 4.6)
- AI matching engine with parallel batching, confidence scoring, caveats
- Ratecard builder with hours x volume calculation
- Excel and PDF export
- GMAL browser and inline editor
- AI cost tracking per project (persisted to DB)
- Debug panel for AI call inspection
- Dark theme UI with gold (#FFC407) accent

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 17:35:14 -04:00

166 lines
4.8 KiB
Python

"""Anthropic Claude API client wrapper with token tracking and debug log."""
import json
import logging
import threading
from datetime import datetime
import anthropic
from app.config import settings
logger = logging.getLogger(__name__)
MODEL = "claude-opus-4-6"
# Cost per million tokens (USD)
INPUT_COST_PER_M = 3.0
OUTPUT_COST_PER_M = 15.0
# Thread-safe token tracking + debug log
_lock = threading.Lock()
_usage = {
"total_input_tokens": 0,
"total_output_tokens": 0,
"total_cost_usd": 0.0,
"call_count": 0,
}
_debug_log: list[dict] = [] # Last N AI interactions
MAX_DEBUG_LOG = 50
def get_client() -> anthropic.Anthropic:
return anthropic.Anthropic(api_key=settings.anthropic_api_key)
def get_usage_stats() -> dict:
with _lock:
return {**_usage}
def get_debug_log() -> list[dict]:
with _lock:
return list(_debug_log)
def reset_usage_stats():
with _lock:
_usage["total_input_tokens"] = 0
_usage["total_output_tokens"] = 0
_usage["total_cost_usd"] = 0.0
_usage["call_count"] = 0
_debug_log.clear()
def call_claude(
system: str,
user_message: str,
tools: list[dict] | None = None,
tool_choice: dict | None = None,
max_tokens: int = 4096,
) -> dict:
"""Make a Claude API call, optionally with tool_use for structured output."""
client = get_client()
kwargs = {
"model": MODEL,
"max_tokens": max_tokens,
"system": system,
"messages": [{"role": "user", "content": user_message}],
}
if tools:
kwargs["tools"] = tools
kwargs["tool_choice"] = tool_choice or {"type": "auto"}
# Build debug entry
entry = {
"timestamp": datetime.utcnow().isoformat(),
"model": MODEL,
"system_prompt": system[:500] + ("..." if len(system) > 500 else ""),
"user_message_length": len(user_message),
"user_message_preview": user_message[:1000] + ("..." if len(user_message) > 1000 else ""),
"tools": [t["name"] for t in tools] if tools else [],
"tool_choice": tool_choice,
"status": "pending",
}
try:
response = client.messages.create(**kwargs)
# Parse response content
response_parts = []
tool_results = []
for block in response.content:
if block.type == "text":
response_parts.append({"type": "text", "text": block.text[:1000]})
elif block.type == "tool_use":
tool_data = block.input
tool_results.append({"tool": block.name, "input": tool_data})
response_parts.append({
"type": "tool_use",
"tool": block.name,
"input_preview": json.dumps(tool_data, default=str)[:2000],
})
entry["status"] = "success"
entry["stop_reason"] = response.stop_reason
entry["response_parts"] = response_parts
entry["tool_results_count"] = len(tool_results)
# Track token usage
inp = 0
out = 0
cost = 0.0
if hasattr(response, "usage") and response.usage:
inp = response.usage.input_tokens or 0
out = response.usage.output_tokens or 0
cost = (inp / 1_000_000) * INPUT_COST_PER_M + (out / 1_000_000) * OUTPUT_COST_PER_M
entry["input_tokens"] = inp
entry["output_tokens"] = out
entry["cost_usd"] = round(cost, 6)
with _lock:
_usage["total_input_tokens"] += inp
_usage["total_output_tokens"] += out
_usage["total_cost_usd"] += cost
_usage["call_count"] += 1
# Attach usage to response for callers to save per-project
response._usage_info = {"input_tokens": inp, "output_tokens": out, "cost_usd": cost}
logger.info(
f"Claude API call: {inp} in / {out} out tokens, "
f"${cost:.4f} this call, ${_usage['total_cost_usd']:.4f} total"
)
return response
except Exception as e:
entry["status"] = "error"
entry["error"] = str(e)
logger.error(f"Claude API error: {e}")
raise
finally:
with _lock:
_debug_log.append(entry)
if len(_debug_log) > MAX_DEBUG_LOG:
_debug_log.pop(0)
def extract_tool_result(response) -> dict | None:
"""Extract the first tool_use result from a Claude response."""
for block in response.content:
if block.type == "tool_use":
return block.input
return None
def extract_text(response) -> str:
"""Extract text content from a Claude response."""
parts = []
for block in response.content:
if block.type == "text":
parts.append(block.text)
return "\n".join(parts)