salary-benchmark/app/services/claude_client.py

import json

import anthropic

from app.config import settings


async def analyze_salary_data(
    title: str, location: str, ranked_content: list[dict]
) -> dict:
    sources_text = "\n\n---\n\n".join(
        item["content"] for item in ranked_content if item.get("content")
    )

    prompt = f"""You are a compensation analyst. Based on the following salary data sources
for the role "{title}" in "{location}", produce a structured benchmark.

[SOURCES]
{sources_text}
[/SOURCES]

Return ONLY valid JSON in this exact format:
{{
  "benchmarks": [
    {{"level": "junior", "salary": <int>}},
    {{"level": "mid", "salary": <int>}},
    {{"level": "senior", "salary": <int>}}
  ],
  "confidence_score": <float 0.0-1.0>,
  "reasoning": "<Brief explanation of how you derived these numbers>",
  "sources_used": ["<relevant source descriptions>"]
}}

Rules:
- Each salary value is a single annual USD integer representing the typical/median salary for that level
- Base your estimates on the provided data, not general knowledge
- If data is sparse, lower the confidence_score accordingly
- Return ONLY the JSON object, no markdown or explanation"""

    client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key)
    message = await client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=1024,
        messages=[{"role": "user", "content": prompt}],
    )

    response_text = message.content[0].text.strip()
    # Strip markdown code fences if present
    if response_text.startswith("```"):
        response_text = response_text.split("\n", 1)[1]
        if response_text.endswith("```"):
            response_text = response_text[:-3].strip()

    return json.loads(response_text)