Implements three major feature enhancements: 1. Usage Tracking Reports - Command-line tool (generate_usage_report.py) for comprehensive usage reports - Supports text, JSON, and CSV output formats - Filters by date range, client, and user - Aggregates statistics by client, user, profile, and date - Automated report generation via cron jobs 2. Profile Auto-Versioning & Visibility Control - Automatic version control: edits create new versions (v2, v3, etc.) - Original profiles preserved for rollback capability - Profile visibility control (all clients vs client-specific) - Client-profile relationship management with dynamic updates - Audit trail with timestamps and user tracking 3. Actual Token Usage Tracking - Captures real token counts from OpenAI and Gemini APIs - Precise cost calculations instead of estimates (99% accuracy) - Per-check and per-provider token breakdowns - Pricing validation tool (validate_pricing.py) - Token usage optimization recommendations Key Files Added: - backend/generate_usage_report.py - Usage report generator - backend/validate_pricing.py - Pricing validation tool - backend/USAGE_REPORTS.md - Usage reports documentation - backend/PROFILE_MANAGEMENT.md - Profile versioning guide - backend/TOKEN_TRACKING_ENHANCEMENT.md - Token tracking guide - backend/PRICING_GUIDE.md - Pricing validation guide - backend/NEW_FEATURES_QUICKSTART.md - Quick start guide - IMPLEMENTATION_SUMMARY.md - Complete implementation overview Key Files Modified: - backend/api_server.py - Profile versioning, token passthrough - backend/client_config.py - Visibility-aware profile filtering - backend/llm_config.py - Token usage extraction from APIs - backend/usage_tracker.py - Actual token tracking and cost calculation - CLAUDE.md - Updated documentation with new features Benefits: - Accurate cost tracking with real token usage - Safe profile editing with version history - Flexible profile visibility for multi-tenant setup - Comprehensive usage analytics for optimization - Better budget forecasting and client billing Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
299 lines
12 KiB
Python
Executable file
299 lines
12 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Pricing Validation Tool
|
|
Analyzes actual usage data to validate pricing accuracy and estimates
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import json
|
|
from collections import defaultdict
|
|
from datetime import datetime
|
|
|
|
# Add parent directory to path
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
from usage_tracker import COST_PER_1K_TOKENS, USAGE_LOG_DIR
|
|
|
|
|
|
def analyze_actual_token_usage():
|
|
"""Analyze actual token usage from logs to validate estimates"""
|
|
import glob
|
|
|
|
stats = {
|
|
'total_analyses': 0,
|
|
'total_analyses_with_tokens': 0,
|
|
'by_provider': defaultdict(lambda: {
|
|
'analyses': 0,
|
|
'total_tokens': 0,
|
|
'prompt_tokens': 0,
|
|
'completion_tokens': 0,
|
|
'total_cost': 0,
|
|
'by_check': defaultdict(lambda: {
|
|
'count': 0,
|
|
'total_tokens': 0,
|
|
'prompt_tokens': 0,
|
|
'completion_tokens': 0,
|
|
'avg_tokens': 0
|
|
})
|
|
})
|
|
}
|
|
|
|
log_files = sorted(glob.glob(os.path.join(USAGE_LOG_DIR, '*.jsonl')))
|
|
|
|
for log_file in log_files:
|
|
with open(log_file, 'r') as f:
|
|
for line in f:
|
|
try:
|
|
entry = json.loads(line.strip())
|
|
|
|
if entry.get('event') == 'analysis_complete':
|
|
stats['total_analyses'] += 1
|
|
|
|
token_usage = entry.get('token_usage', {})
|
|
if token_usage.get('total_tokens', 0) > 0:
|
|
stats['total_analyses_with_tokens'] += 1
|
|
|
|
# By provider
|
|
by_provider = token_usage.get('by_provider', {})
|
|
for provider, provider_stats in by_provider.items():
|
|
stats['by_provider'][provider]['analyses'] += 1
|
|
stats['by_provider'][provider]['total_tokens'] += provider_stats.get('total_tokens', 0)
|
|
stats['by_provider'][provider]['prompt_tokens'] += provider_stats.get('prompt_tokens', 0)
|
|
stats['by_provider'][provider]['completion_tokens'] += provider_stats.get('completion_tokens', 0)
|
|
stats['by_provider'][provider]['total_cost'] += provider_stats.get('cost', 0)
|
|
|
|
elif entry.get('event') == 'check_execution':
|
|
# Individual check token usage
|
|
token_usage = entry.get('token_usage', {})
|
|
if token_usage.get('total_tokens', 0) > 0:
|
|
check_name = entry.get('check_name', 'unknown')
|
|
llm_used = entry.get('llm_used', 'unknown')
|
|
|
|
check_stats = stats['by_provider'][llm_used]['by_check'][check_name]
|
|
check_stats['count'] += 1
|
|
check_stats['total_tokens'] += token_usage.get('total_tokens', 0)
|
|
check_stats['prompt_tokens'] += token_usage.get('prompt_tokens', 0)
|
|
check_stats['completion_tokens'] += token_usage.get('completion_tokens', 0)
|
|
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
# Calculate averages
|
|
for provider, provider_data in stats['by_provider'].items():
|
|
for check_name, check_data in provider_data['by_check'].items():
|
|
if check_data['count'] > 0:
|
|
check_data['avg_tokens'] = check_data['total_tokens'] / check_data['count']
|
|
check_data['avg_prompt_tokens'] = check_data['prompt_tokens'] / check_data['count']
|
|
check_data['avg_completion_tokens'] = check_data['completion_tokens'] / check_data['count']
|
|
|
|
return stats
|
|
|
|
|
|
def validate_pricing():
|
|
"""Validate current pricing configuration"""
|
|
print("=" * 80)
|
|
print("PRICING VALIDATION REPORT")
|
|
print("=" * 80)
|
|
print(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
print()
|
|
|
|
print("CURRENT PRICING CONFIGURATION")
|
|
print("-" * 80)
|
|
for provider, pricing in COST_PER_1K_TOKENS.items():
|
|
print(f"\n{provider}:")
|
|
print(f" Model: {pricing.get('model', 'Unknown')}")
|
|
print(f" Input: ${pricing['input']:.4f} per 1K tokens (${pricing['input'] * 1000:.2f} per 1M)")
|
|
print(f" Output: ${pricing['output']:.4f} per 1K tokens (${pricing['output'] * 1000:.2f} per 1M)")
|
|
print(f" Last Verified: {pricing.get('last_verified', 'Unknown')}")
|
|
|
|
# Example costs
|
|
example_input = 1000
|
|
example_output = 200
|
|
example_cost = (example_input / 1000 * pricing['input']) + (example_output / 1000 * pricing['output'])
|
|
print(f" Example: {example_input} input + {example_output} output tokens = ${example_cost:.4f}")
|
|
|
|
print()
|
|
print("=" * 80)
|
|
print()
|
|
|
|
return True
|
|
|
|
|
|
def analyze_token_patterns():
|
|
"""Analyze actual token usage patterns from logs"""
|
|
print("=" * 80)
|
|
print("ACTUAL TOKEN USAGE ANALYSIS")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
stats = analyze_actual_token_usage()
|
|
|
|
if stats['total_analyses'] == 0:
|
|
print("No usage data found. Run some analyses first to collect data.")
|
|
return False
|
|
|
|
print(f"Total Analyses Logged: {stats['total_analyses']}")
|
|
print(f"Analyses with Token Data: {stats['total_analyses_with_tokens']}")
|
|
|
|
if stats['total_analyses_with_tokens'] == 0:
|
|
print()
|
|
print("⚠️ No actual token data found in logs yet.")
|
|
print(" Token tracking will start with the next analysis.")
|
|
print(" Run a test analysis to collect token usage data.")
|
|
return False
|
|
|
|
print(f"Coverage: {(stats['total_analyses_with_tokens'] / stats['total_analyses'] * 100):.1f}%")
|
|
print()
|
|
|
|
for provider, provider_data in sorted(stats['by_provider'].items()):
|
|
if provider_data['analyses'] > 0:
|
|
print(f"\n{provider.upper()}")
|
|
print("-" * 80)
|
|
print(f"Analyses: {provider_data['analyses']}")
|
|
print(f"Total Tokens: {provider_data['total_tokens']:,}")
|
|
print(f"Prompt Tokens: {provider_data['prompt_tokens']:,}")
|
|
print(f"Completion Tokens: {provider_data['completion_tokens']:,}")
|
|
print(f"Total Cost: ${provider_data['total_cost']:.4f}")
|
|
|
|
if provider_data['analyses'] > 0:
|
|
avg_total = provider_data['total_tokens'] / provider_data['analyses']
|
|
avg_prompt = provider_data['prompt_tokens'] / provider_data['analyses']
|
|
avg_completion = provider_data['completion_tokens'] / provider_data['analyses']
|
|
avg_cost = provider_data['total_cost'] / provider_data['analyses']
|
|
|
|
print()
|
|
print("Average per Analysis:")
|
|
print(f" Total Tokens: {avg_total:.0f}")
|
|
print(f" Prompt Tokens: {avg_prompt:.0f}")
|
|
print(f" Completion Tokens: {avg_completion:.0f}")
|
|
print(f" Cost: ${avg_cost:.4f}")
|
|
|
|
# Show per-check breakdown if available
|
|
if provider_data['by_check']:
|
|
print()
|
|
print("Per-Check Averages (Top 10 by token usage):")
|
|
sorted_checks = sorted(
|
|
provider_data['by_check'].items(),
|
|
key=lambda x: x[1]['avg_tokens'],
|
|
reverse=True
|
|
)
|
|
for check_name, check_data in sorted_checks[:10]:
|
|
if check_data['count'] > 0:
|
|
print(f" {check_name}:")
|
|
print(f" Count: {check_data['count']}")
|
|
print(f" Avg Tokens: {check_data['avg_tokens']:.0f} (Prompt: {check_data['avg_prompt_tokens']:.0f}, Completion: {check_data['avg_completion_tokens']:.0f})")
|
|
|
|
print()
|
|
print("=" * 80)
|
|
return True
|
|
|
|
|
|
def compare_estimate_vs_actual():
|
|
"""Compare estimated costs vs actual costs"""
|
|
print()
|
|
print("=" * 80)
|
|
print("ESTIMATE ACCURACY ANALYSIS")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
stats = analyze_actual_token_usage()
|
|
|
|
if stats['total_analyses_with_tokens'] == 0:
|
|
print("Not enough data to compare estimates vs actuals.")
|
|
print("Run some analyses to collect actual token usage data.")
|
|
return False
|
|
|
|
# Default estimates used when token data unavailable
|
|
estimated_prompt_tokens = 1000
|
|
estimated_completion_tokens = 200
|
|
estimated_total_tokens = estimated_prompt_tokens + estimated_completion_tokens
|
|
|
|
print("DEFAULT ESTIMATES (used when actual data unavailable):")
|
|
print(f" Prompt Tokens: {estimated_prompt_tokens}")
|
|
print(f" Completion Tokens: {estimated_completion_tokens}")
|
|
print(f" Total Tokens: {estimated_total_tokens}")
|
|
print()
|
|
|
|
for provider, provider_data in sorted(stats['by_provider'].items()):
|
|
if provider_data['analyses'] > 0:
|
|
avg_prompt = provider_data['prompt_tokens'] / provider_data['analyses']
|
|
avg_completion = provider_data['completion_tokens'] / provider_data['analyses']
|
|
avg_total = provider_data['total_tokens'] / provider_data['analyses']
|
|
|
|
print(f"\n{provider.upper()} - ACTUAL vs ESTIMATE")
|
|
print("-" * 80)
|
|
print(f"Actual Average per Analysis:")
|
|
print(f" Prompt: {avg_prompt:.0f} tokens")
|
|
print(f" Completion: {avg_completion:.0f} tokens")
|
|
print(f" Total: {avg_total:.0f} tokens")
|
|
print()
|
|
|
|
prompt_diff = ((avg_prompt - estimated_prompt_tokens) / estimated_prompt_tokens) * 100
|
|
completion_diff = ((avg_completion - estimated_completion_tokens) / estimated_completion_tokens) * 100
|
|
total_diff = ((avg_total - estimated_total_tokens) / estimated_total_tokens) * 100
|
|
|
|
print(f"Difference from Estimate:")
|
|
print(f" Prompt: {prompt_diff:+.1f}%")
|
|
print(f" Completion: {completion_diff:+.1f}%")
|
|
print(f" Total: {total_diff:+.1f}%")
|
|
print()
|
|
|
|
# Cost comparison
|
|
pricing = COST_PER_1K_TOKENS.get(provider, {})
|
|
estimated_cost = (estimated_prompt_tokens / 1000 * pricing.get('input', 0)) + \
|
|
(estimated_completion_tokens / 1000 * pricing.get('output', 0))
|
|
actual_avg_cost = provider_data['total_cost'] / provider_data['analyses']
|
|
|
|
print(f"Cost Comparison:")
|
|
print(f" Estimated: ${estimated_cost:.4f} per analysis")
|
|
print(f" Actual: ${actual_avg_cost:.4f} per analysis")
|
|
cost_diff = ((actual_avg_cost - estimated_cost) / estimated_cost) * 100
|
|
print(f" Difference: {cost_diff:+.1f}%")
|
|
|
|
if abs(total_diff) > 20:
|
|
print()
|
|
print(f"⚠️ RECOMMENDATION: Update default estimates for {provider}")
|
|
print(f" Suggested values:")
|
|
print(f" Prompt Tokens: {int(avg_prompt)}")
|
|
print(f" Completion Tokens: {int(avg_completion)}")
|
|
|
|
print()
|
|
print("=" * 80)
|
|
return True
|
|
|
|
|
|
def main():
|
|
print()
|
|
validate_pricing()
|
|
print()
|
|
|
|
has_data = analyze_token_patterns()
|
|
|
|
if has_data:
|
|
compare_estimate_vs_actual()
|
|
print()
|
|
print("NEXT STEPS:")
|
|
print("-" * 80)
|
|
print("1. If estimates are significantly off (>20%), update the default estimates")
|
|
print("2. Verify pricing matches current API provider pricing pages")
|
|
print("3. Re-run this validation monthly or when pricing changes")
|
|
print()
|
|
else:
|
|
print()
|
|
print("NEXT STEPS:")
|
|
print("-" * 80)
|
|
print("1. Run a few test analyses to collect actual token usage data")
|
|
print("2. Re-run this script: python validate_pricing.py")
|
|
print("3. Review the actual vs estimated comparison")
|
|
print("4. Update estimates if needed")
|
|
print()
|
|
|
|
print("For latest pricing, check:")
|
|
print(" - OpenAI: https://openai.com/api/pricing/")
|
|
print(" - Google Gemini: https://ai.google.dev/pricing")
|
|
print()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|