gmal-scope-builder/backend/app/utils/claude_client.py
DJP 26d3435be0 Improve matching, upload UX, collapse fix, full catalog approach
- Upload now shows live stage progress (uploading -> extracting -> AI parsing -> done)
- Fix match group collapse: proper React state instead of DOM manipulation
- Replace pre-filter with full GMAL catalog sent to Claude (~3k tokens, <$0.01)
  - FTS and keyword matching missed too many semantic matches
  - Claude now sees all 243 assets and uses semantic understanding
- Improved system prompt with terminology bridges for better scoring
- Per-project AI cost tracking persisted to DB
- Parallel matching with cancel support
- Auto-select matches >= 80%, YOLO button for rest
- Debug panel for AI call inspection

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 19:22:08 -04:00

166 lines
4.7 KiB
Python

"""Anthropic Claude API client wrapper with token tracking and debug log."""
import json
import logging
import threading
from datetime import datetime
import anthropic
from app.config import settings
logger = logging.getLogger(__name__)
MODEL = "claude-opus-4-6"
# Cost per million tokens (USD)
INPUT_COST_PER_M = 3.0
OUTPUT_COST_PER_M = 15.0
# Thread-safe token tracking + debug log
_lock = threading.Lock()
_usage = {
"total_input_tokens": 0,
"total_output_tokens": 0,
"total_cost_usd": 0.0,
"call_count": 0,
}
_debug_log: list[dict] = [] # Last N AI interactions
MAX_DEBUG_LOG = 50
def get_client() -> anthropic.Anthropic:
return anthropic.Anthropic(api_key=settings.anthropic_api_key)
def get_usage_stats() -> dict:
with _lock:
return {**_usage}
def get_debug_log() -> list[dict]:
with _lock:
return list(_debug_log)
def reset_usage_stats():
with _lock:
_usage["total_input_tokens"] = 0
_usage["total_output_tokens"] = 0
_usage["total_cost_usd"] = 0.0
_usage["call_count"] = 0
_debug_log.clear()
def call_claude(
system: str,
user_message: str,
tools: list[dict] | None = None,
tool_choice: dict | None = None,
max_tokens: int = 4096,
) -> dict:
"""Make a Claude API call, optionally with tool_use for structured output."""
client = get_client()
kwargs = {
"model": MODEL,
"max_tokens": max_tokens,
"system": system,
"messages": [{"role": "user", "content": user_message}],
}
if tools:
kwargs["tools"] = tools
kwargs["tool_choice"] = tool_choice or {"type": "auto"}
# Build debug entry
entry = {
"timestamp": datetime.utcnow().isoformat(),
"model": MODEL,
"system_prompt": system[:500] + ("..." if len(system) > 500 else ""),
"user_message_length": len(user_message),
"user_message_preview": user_message[:1000] + ("..." if len(user_message) > 1000 else ""),
"tools": [t["name"] for t in tools] if tools else [],
"tool_choice": tool_choice,
"status": "pending",
}
try:
response = client.messages.create(**kwargs)
# Parse response content
response_parts = []
tool_results = []
for block in response.content:
if block.type == "text":
response_parts.append({"type": "text", "text": block.text[:1000]})
elif block.type == "tool_use":
tool_data = block.input
tool_results.append({"tool": block.name, "input": tool_data})
response_parts.append({
"type": "tool_use",
"tool": block.name,
"input_preview": json.dumps(tool_data, default=str)[:2000],
})
entry["status"] = "success"
entry["stop_reason"] = response.stop_reason
entry["response_parts"] = response_parts
entry["tool_results_count"] = len(tool_results)
# Track token usage
inp = 0
out = 0
cost = 0.0
if hasattr(response, "usage") and response.usage:
inp = response.usage.input_tokens or 0
out = response.usage.output_tokens or 0
cost = (inp / 1_000_000) * INPUT_COST_PER_M + (out / 1_000_000) * OUTPUT_COST_PER_M
entry["input_tokens"] = inp
entry["output_tokens"] = out
entry["cost_usd"] = round(cost, 6)
with _lock:
_usage["total_input_tokens"] += inp
_usage["total_output_tokens"] += out
_usage["total_cost_usd"] += cost
_usage["call_count"] += 1
# Attach usage to response for callers to save per-project
response._usage_info = {"input_tokens": inp, "output_tokens": out, "cost_usd": cost}
logger.info(
f"Claude API call: {inp} in / {out} out tokens, "
f"${cost:.4f} this call, ${_usage['total_cost_usd']:.4f} total"
)
return response
except Exception as e:
entry["status"] = "error"
entry["error"] = str(e)
logger.error(f"Claude API error: {e}")
raise
finally:
with _lock:
_debug_log.append(entry)
if len(_debug_log) > MAX_DEBUG_LOG:
_debug_log.pop(0)
def extract_tool_result(response) -> dict | None:
"""Extract the first tool_use result from a Claude response."""
for block in response.content:
if block.type == "tool_use":
return block.input
return None
def extract_text(response) -> str:
"""Extract text content from a Claude response."""
parts = []
for block in response.content:
if block.type == "text":
parts.append(block.text)
return "\n".join(parts)