- Upload now shows live stage progress (uploading -> extracting -> AI parsing -> done) - Fix match group collapse: proper React state instead of DOM manipulation - Replace pre-filter with full GMAL catalog sent to Claude (~3k tokens, <$0.01) - FTS and keyword matching missed too many semantic matches - Claude now sees all 243 assets and uses semantic understanding - Improved system prompt with terminology bridges for better scoring - Per-project AI cost tracking persisted to DB - Parallel matching with cancel support - Auto-select matches >= 80%, YOLO button for rest - Debug panel for AI call inspection Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
166 lines
4.7 KiB
Python
166 lines
4.7 KiB
Python
"""Anthropic Claude API client wrapper with token tracking and debug log."""
|
|
|
|
import json
|
|
import logging
|
|
import threading
|
|
from datetime import datetime
|
|
|
|
import anthropic
|
|
|
|
from app.config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
MODEL = "claude-opus-4-6"
|
|
|
|
# Cost per million tokens (USD)
|
|
INPUT_COST_PER_M = 3.0
|
|
OUTPUT_COST_PER_M = 15.0
|
|
|
|
# Thread-safe token tracking + debug log
|
|
_lock = threading.Lock()
|
|
_usage = {
|
|
"total_input_tokens": 0,
|
|
"total_output_tokens": 0,
|
|
"total_cost_usd": 0.0,
|
|
"call_count": 0,
|
|
}
|
|
_debug_log: list[dict] = [] # Last N AI interactions
|
|
MAX_DEBUG_LOG = 50
|
|
|
|
|
|
def get_client() -> anthropic.Anthropic:
|
|
return anthropic.Anthropic(api_key=settings.anthropic_api_key)
|
|
|
|
|
|
def get_usage_stats() -> dict:
|
|
with _lock:
|
|
return {**_usage}
|
|
|
|
|
|
def get_debug_log() -> list[dict]:
|
|
with _lock:
|
|
return list(_debug_log)
|
|
|
|
|
|
def reset_usage_stats():
|
|
with _lock:
|
|
_usage["total_input_tokens"] = 0
|
|
_usage["total_output_tokens"] = 0
|
|
_usage["total_cost_usd"] = 0.0
|
|
_usage["call_count"] = 0
|
|
_debug_log.clear()
|
|
|
|
|
|
def call_claude(
|
|
system: str,
|
|
user_message: str,
|
|
tools: list[dict] | None = None,
|
|
tool_choice: dict | None = None,
|
|
max_tokens: int = 4096,
|
|
) -> dict:
|
|
"""Make a Claude API call, optionally with tool_use for structured output."""
|
|
client = get_client()
|
|
|
|
kwargs = {
|
|
"model": MODEL,
|
|
"max_tokens": max_tokens,
|
|
"system": system,
|
|
"messages": [{"role": "user", "content": user_message}],
|
|
}
|
|
if tools:
|
|
kwargs["tools"] = tools
|
|
kwargs["tool_choice"] = tool_choice or {"type": "auto"}
|
|
|
|
# Build debug entry
|
|
entry = {
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
"model": MODEL,
|
|
"system_prompt": system[:500] + ("..." if len(system) > 500 else ""),
|
|
"user_message_length": len(user_message),
|
|
"user_message_preview": user_message[:1000] + ("..." if len(user_message) > 1000 else ""),
|
|
"tools": [t["name"] for t in tools] if tools else [],
|
|
"tool_choice": tool_choice,
|
|
"status": "pending",
|
|
}
|
|
|
|
try:
|
|
response = client.messages.create(**kwargs)
|
|
|
|
# Parse response content
|
|
response_parts = []
|
|
tool_results = []
|
|
for block in response.content:
|
|
if block.type == "text":
|
|
response_parts.append({"type": "text", "text": block.text[:1000]})
|
|
elif block.type == "tool_use":
|
|
tool_data = block.input
|
|
tool_results.append({"tool": block.name, "input": tool_data})
|
|
response_parts.append({
|
|
"type": "tool_use",
|
|
"tool": block.name,
|
|
"input_preview": json.dumps(tool_data, default=str)[:2000],
|
|
})
|
|
|
|
entry["status"] = "success"
|
|
entry["stop_reason"] = response.stop_reason
|
|
entry["response_parts"] = response_parts
|
|
entry["tool_results_count"] = len(tool_results)
|
|
|
|
# Track token usage
|
|
inp = 0
|
|
out = 0
|
|
cost = 0.0
|
|
if hasattr(response, "usage") and response.usage:
|
|
inp = response.usage.input_tokens or 0
|
|
out = response.usage.output_tokens or 0
|
|
cost = (inp / 1_000_000) * INPUT_COST_PER_M + (out / 1_000_000) * OUTPUT_COST_PER_M
|
|
|
|
entry["input_tokens"] = inp
|
|
entry["output_tokens"] = out
|
|
entry["cost_usd"] = round(cost, 6)
|
|
|
|
with _lock:
|
|
_usage["total_input_tokens"] += inp
|
|
_usage["total_output_tokens"] += out
|
|
_usage["total_cost_usd"] += cost
|
|
_usage["call_count"] += 1
|
|
|
|
# Attach usage to response for callers to save per-project
|
|
response._usage_info = {"input_tokens": inp, "output_tokens": out, "cost_usd": cost}
|
|
|
|
logger.info(
|
|
f"Claude API call: {inp} in / {out} out tokens, "
|
|
f"${cost:.4f} this call, ${_usage['total_cost_usd']:.4f} total"
|
|
)
|
|
|
|
return response
|
|
|
|
except Exception as e:
|
|
entry["status"] = "error"
|
|
entry["error"] = str(e)
|
|
logger.error(f"Claude API error: {e}")
|
|
raise
|
|
|
|
finally:
|
|
with _lock:
|
|
_debug_log.append(entry)
|
|
if len(_debug_log) > MAX_DEBUG_LOG:
|
|
_debug_log.pop(0)
|
|
|
|
|
|
def extract_tool_result(response) -> dict | None:
|
|
"""Extract the first tool_use result from a Claude response."""
|
|
for block in response.content:
|
|
if block.type == "tool_use":
|
|
return block.input
|
|
return None
|
|
|
|
|
|
def extract_text(response) -> str:
|
|
"""Extract text content from a Claude response."""
|
|
parts = []
|
|
for block in response.content:
|
|
if block.type == "text":
|
|
parts.append(block.text)
|
|
return "\n".join(parts)
|