gmal-scope-builder/backend/app/utils/claude_client.py

"""Anthropic Claude API client wrapper with token tracking and debug log."""

import json
import logging
import threading
from datetime import datetime

import anthropic

from app.config import settings

logger = logging.getLogger(__name__)

MODEL = "claude-opus-4-6"

# Cost per million tokens (USD)
INPUT_COST_PER_M = 3.0
OUTPUT_COST_PER_M = 15.0

# Thread-safe token tracking + debug log
_lock = threading.Lock()
_usage = {
    "total_input_tokens": 0,
    "total_output_tokens": 0,
    "total_cost_usd": 0.0,
    "call_count": 0,
}
_debug_log: list[dict] = []  # Last N AI interactions
MAX_DEBUG_LOG = 50


def get_client() -> anthropic.Anthropic:
    return anthropic.Anthropic(api_key=settings.anthropic_api_key)


def get_usage_stats() -> dict:
    with _lock:
        return {**_usage}


def get_debug_log() -> list[dict]:
    with _lock:
        return list(_debug_log)


def reset_usage_stats():
    with _lock:
        _usage["total_input_tokens"] = 0
        _usage["total_output_tokens"] = 0
        _usage["total_cost_usd"] = 0.0
        _usage["call_count"] = 0
        _debug_log.clear()


def call_claude(
    system: str,
    user_message: str,
    tools: list[dict] | None = None,
    tool_choice: dict | None = None,
    max_tokens: int = 4096,
) -> dict:
    """Make a Claude API call, optionally with tool_use for structured output."""
    client = get_client()

    kwargs = {
        "model": MODEL,
        "max_tokens": max_tokens,
        "system": system,
        "messages": [{"role": "user", "content": user_message}],
    }
    if tools:
        kwargs["tools"] = tools
        kwargs["tool_choice"] = tool_choice or {"type": "auto"}

    # Build debug entry
    entry = {
        "timestamp": datetime.utcnow().isoformat(),
        "model": MODEL,
        "system_prompt": system[:500] + ("..." if len(system) > 500 else ""),
        "user_message_length": len(user_message),
        "user_message_preview": user_message[:1000] + ("..." if len(user_message) > 1000 else ""),
        "tools": [t["name"] for t in tools] if tools else [],
        "tool_choice": tool_choice,
        "status": "pending",
    }

    try:
        response = client.messages.create(**kwargs)

        # Parse response content
        response_parts = []
        tool_results = []
        for block in response.content:
            if block.type == "text":
                response_parts.append({"type": "text", "text": block.text[:1000]})
            elif block.type == "tool_use":
                tool_data = block.input
                tool_results.append({"tool": block.name, "input": tool_data})
                response_parts.append({
                    "type": "tool_use",
                    "tool": block.name,
                    "input_preview": json.dumps(tool_data, default=str)[:2000],
                })

        entry["status"] = "success"
        entry["stop_reason"] = response.stop_reason
        entry["response_parts"] = response_parts
        entry["tool_results_count"] = len(tool_results)

        # Track token usage
        inp = 0
        out = 0
        cost = 0.0
        if hasattr(response, "usage") and response.usage:
            inp = response.usage.input_tokens or 0
            out = response.usage.output_tokens or 0
            cost = (inp / 1_000_000) * INPUT_COST_PER_M + (out / 1_000_000) * OUTPUT_COST_PER_M

            entry["input_tokens"] = inp
            entry["output_tokens"] = out
            entry["cost_usd"] = round(cost, 6)

            with _lock:
                _usage["total_input_tokens"] += inp
                _usage["total_output_tokens"] += out
                _usage["total_cost_usd"] += cost
                _usage["call_count"] += 1

        # Attach usage to response for callers to save per-project
        response._usage_info = {"input_tokens": inp, "output_tokens": out, "cost_usd": cost}

        logger.info(
            f"Claude API call: {inp} in / {out} out tokens, "
            f"${cost:.4f} this call, ${_usage['total_cost_usd']:.4f} total"
        )

        return response

    except Exception as e:
        entry["status"] = "error"
        entry["error"] = str(e)
        logger.error(f"Claude API error: {e}")
        raise

    finally:
        with _lock:
            _debug_log.append(entry)
            if len(_debug_log) > MAX_DEBUG_LOG:
                _debug_log.pop(0)


def extract_tool_result(response) -> dict | None:
    """Extract the first tool_use result from a Claude response."""
    for block in response.content:
        if block.type == "tool_use":
            return block.input
    return None


def extract_text(response) -> str:
    """Extract text content from a Claude response."""
    parts = []
    for block in response.content:
        if block.type == "text":
            parts.append(block.text)
    return "\n".join(parts)