"""Anthropic Claude API client wrapper with token tracking and debug log.""" import json import logging import threading from datetime import datetime import anthropic from app.config import settings logger = logging.getLogger(__name__) MODEL = "claude-opus-4-6" # Cost per million tokens (USD) INPUT_COST_PER_M = 3.0 OUTPUT_COST_PER_M = 15.0 # Thread-safe token tracking + debug log _lock = threading.Lock() _usage = { "total_input_tokens": 0, "total_output_tokens": 0, "total_cost_usd": 0.0, "call_count": 0, } _debug_log: list[dict] = [] # Last N AI interactions MAX_DEBUG_LOG = 50 def get_client() -> anthropic.Anthropic: return anthropic.Anthropic(api_key=settings.anthropic_api_key) def get_usage_stats() -> dict: with _lock: return {**_usage} def get_debug_log() -> list[dict]: with _lock: return list(_debug_log) def reset_usage_stats(): with _lock: _usage["total_input_tokens"] = 0 _usage["total_output_tokens"] = 0 _usage["total_cost_usd"] = 0.0 _usage["call_count"] = 0 _debug_log.clear() def call_claude( system: str, user_message: str, tools: list[dict] | None = None, tool_choice: dict | None = None, max_tokens: int = 4096, ) -> dict: """Make a Claude API call, optionally with tool_use for structured output.""" client = get_client() kwargs = { "model": MODEL, "max_tokens": max_tokens, "system": system, "messages": [{"role": "user", "content": user_message}], } if tools: kwargs["tools"] = tools kwargs["tool_choice"] = tool_choice or {"type": "auto"} # Build debug entry entry = { "timestamp": datetime.utcnow().isoformat(), "model": MODEL, "system_prompt": system[:500] + ("..." if len(system) > 500 else ""), "user_message_length": len(user_message), "user_message_preview": user_message[:1000] + ("..." if len(user_message) > 1000 else ""), "tools": [t["name"] for t in tools] if tools else [], "tool_choice": tool_choice, "status": "pending", } try: response = client.messages.create(**kwargs) # Parse response content response_parts = [] tool_results = [] for block in response.content: if block.type == "text": response_parts.append({"type": "text", "text": block.text[:1000]}) elif block.type == "tool_use": tool_data = block.input tool_results.append({"tool": block.name, "input": tool_data}) response_parts.append({ "type": "tool_use", "tool": block.name, "input_preview": json.dumps(tool_data, default=str)[:2000], }) entry["status"] = "success" entry["stop_reason"] = response.stop_reason entry["response_parts"] = response_parts entry["tool_results_count"] = len(tool_results) # Track token usage inp = 0 out = 0 cost = 0.0 if hasattr(response, "usage") and response.usage: inp = response.usage.input_tokens or 0 out = response.usage.output_tokens or 0 cost = (inp / 1_000_000) * INPUT_COST_PER_M + (out / 1_000_000) * OUTPUT_COST_PER_M entry["input_tokens"] = inp entry["output_tokens"] = out entry["cost_usd"] = round(cost, 6) with _lock: _usage["total_input_tokens"] += inp _usage["total_output_tokens"] += out _usage["total_cost_usd"] += cost _usage["call_count"] += 1 # Attach usage to response for callers to save per-project response._usage_info = {"input_tokens": inp, "output_tokens": out, "cost_usd": cost} logger.info( f"Claude API call: {inp} in / {out} out tokens, " f"${cost:.4f} this call, ${_usage['total_cost_usd']:.4f} total" ) return response except Exception as e: entry["status"] = "error" entry["error"] = str(e) logger.error(f"Claude API error: {e}") raise finally: with _lock: _debug_log.append(entry) if len(_debug_log) > MAX_DEBUG_LOG: _debug_log.pop(0) def extract_tool_result(response) -> dict | None: """Extract the first tool_use result from a Claude response.""" for block in response.content: if block.type == "tool_use": return block.input return None def extract_text(response) -> str: """Extract text content from a Claude response.""" parts = [] for block in response.content: if block.type == "text": parts.append(block.text) return "\n".join(parts)