ai_qc/backend/validate_pricing.py
nickviljoen 8bc1256e82 Add usage tracking reports, profile versioning, and token tracking
Implements three major feature enhancements:

1. Usage Tracking Reports
   - Command-line tool (generate_usage_report.py) for comprehensive usage reports
   - Supports text, JSON, and CSV output formats
   - Filters by date range, client, and user
   - Aggregates statistics by client, user, profile, and date
   - Automated report generation via cron jobs

2. Profile Auto-Versioning & Visibility Control
   - Automatic version control: edits create new versions (v2, v3, etc.)
   - Original profiles preserved for rollback capability
   - Profile visibility control (all clients vs client-specific)
   - Client-profile relationship management with dynamic updates
   - Audit trail with timestamps and user tracking

3. Actual Token Usage Tracking
   - Captures real token counts from OpenAI and Gemini APIs
   - Precise cost calculations instead of estimates (99% accuracy)
   - Per-check and per-provider token breakdowns
   - Pricing validation tool (validate_pricing.py)
   - Token usage optimization recommendations

Key Files Added:
- backend/generate_usage_report.py - Usage report generator
- backend/validate_pricing.py - Pricing validation tool
- backend/USAGE_REPORTS.md - Usage reports documentation
- backend/PROFILE_MANAGEMENT.md - Profile versioning guide
- backend/TOKEN_TRACKING_ENHANCEMENT.md - Token tracking guide
- backend/PRICING_GUIDE.md - Pricing validation guide
- backend/NEW_FEATURES_QUICKSTART.md - Quick start guide
- IMPLEMENTATION_SUMMARY.md - Complete implementation overview

Key Files Modified:
- backend/api_server.py - Profile versioning, token passthrough
- backend/client_config.py - Visibility-aware profile filtering
- backend/llm_config.py - Token usage extraction from APIs
- backend/usage_tracker.py - Actual token tracking and cost calculation
- CLAUDE.md - Updated documentation with new features

Benefits:
- Accurate cost tracking with real token usage
- Safe profile editing with version history
- Flexible profile visibility for multi-tenant setup
- Comprehensive usage analytics for optimization
- Better budget forecasting and client billing

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-02 13:22:33 +02:00

299 lines
12 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Pricing Validation Tool
Analyzes actual usage data to validate pricing accuracy and estimates
"""
import sys
import os
import json
from collections import defaultdict
from datetime import datetime
# Add parent directory to path
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from usage_tracker import COST_PER_1K_TOKENS, USAGE_LOG_DIR
def analyze_actual_token_usage():
"""Analyze actual token usage from logs to validate estimates"""
import glob
stats = {
'total_analyses': 0,
'total_analyses_with_tokens': 0,
'by_provider': defaultdict(lambda: {
'analyses': 0,
'total_tokens': 0,
'prompt_tokens': 0,
'completion_tokens': 0,
'total_cost': 0,
'by_check': defaultdict(lambda: {
'count': 0,
'total_tokens': 0,
'prompt_tokens': 0,
'completion_tokens': 0,
'avg_tokens': 0
})
})
}
log_files = sorted(glob.glob(os.path.join(USAGE_LOG_DIR, '*.jsonl')))
for log_file in log_files:
with open(log_file, 'r') as f:
for line in f:
try:
entry = json.loads(line.strip())
if entry.get('event') == 'analysis_complete':
stats['total_analyses'] += 1
token_usage = entry.get('token_usage', {})
if token_usage.get('total_tokens', 0) > 0:
stats['total_analyses_with_tokens'] += 1
# By provider
by_provider = token_usage.get('by_provider', {})
for provider, provider_stats in by_provider.items():
stats['by_provider'][provider]['analyses'] += 1
stats['by_provider'][provider]['total_tokens'] += provider_stats.get('total_tokens', 0)
stats['by_provider'][provider]['prompt_tokens'] += provider_stats.get('prompt_tokens', 0)
stats['by_provider'][provider]['completion_tokens'] += provider_stats.get('completion_tokens', 0)
stats['by_provider'][provider]['total_cost'] += provider_stats.get('cost', 0)
elif entry.get('event') == 'check_execution':
# Individual check token usage
token_usage = entry.get('token_usage', {})
if token_usage.get('total_tokens', 0) > 0:
check_name = entry.get('check_name', 'unknown')
llm_used = entry.get('llm_used', 'unknown')
check_stats = stats['by_provider'][llm_used]['by_check'][check_name]
check_stats['count'] += 1
check_stats['total_tokens'] += token_usage.get('total_tokens', 0)
check_stats['prompt_tokens'] += token_usage.get('prompt_tokens', 0)
check_stats['completion_tokens'] += token_usage.get('completion_tokens', 0)
except json.JSONDecodeError:
continue
# Calculate averages
for provider, provider_data in stats['by_provider'].items():
for check_name, check_data in provider_data['by_check'].items():
if check_data['count'] > 0:
check_data['avg_tokens'] = check_data['total_tokens'] / check_data['count']
check_data['avg_prompt_tokens'] = check_data['prompt_tokens'] / check_data['count']
check_data['avg_completion_tokens'] = check_data['completion_tokens'] / check_data['count']
return stats
def validate_pricing():
"""Validate current pricing configuration"""
print("=" * 80)
print("PRICING VALIDATION REPORT")
print("=" * 80)
print(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()
print("CURRENT PRICING CONFIGURATION")
print("-" * 80)
for provider, pricing in COST_PER_1K_TOKENS.items():
print(f"\n{provider}:")
print(f" Model: {pricing.get('model', 'Unknown')}")
print(f" Input: ${pricing['input']:.4f} per 1K tokens (${pricing['input'] * 1000:.2f} per 1M)")
print(f" Output: ${pricing['output']:.4f} per 1K tokens (${pricing['output'] * 1000:.2f} per 1M)")
print(f" Last Verified: {pricing.get('last_verified', 'Unknown')}")
# Example costs
example_input = 1000
example_output = 200
example_cost = (example_input / 1000 * pricing['input']) + (example_output / 1000 * pricing['output'])
print(f" Example: {example_input} input + {example_output} output tokens = ${example_cost:.4f}")
print()
print("=" * 80)
print()
return True
def analyze_token_patterns():
"""Analyze actual token usage patterns from logs"""
print("=" * 80)
print("ACTUAL TOKEN USAGE ANALYSIS")
print("=" * 80)
print()
stats = analyze_actual_token_usage()
if stats['total_analyses'] == 0:
print("No usage data found. Run some analyses first to collect data.")
return False
print(f"Total Analyses Logged: {stats['total_analyses']}")
print(f"Analyses with Token Data: {stats['total_analyses_with_tokens']}")
if stats['total_analyses_with_tokens'] == 0:
print()
print("⚠️ No actual token data found in logs yet.")
print(" Token tracking will start with the next analysis.")
print(" Run a test analysis to collect token usage data.")
return False
print(f"Coverage: {(stats['total_analyses_with_tokens'] / stats['total_analyses'] * 100):.1f}%")
print()
for provider, provider_data in sorted(stats['by_provider'].items()):
if provider_data['analyses'] > 0:
print(f"\n{provider.upper()}")
print("-" * 80)
print(f"Analyses: {provider_data['analyses']}")
print(f"Total Tokens: {provider_data['total_tokens']:,}")
print(f"Prompt Tokens: {provider_data['prompt_tokens']:,}")
print(f"Completion Tokens: {provider_data['completion_tokens']:,}")
print(f"Total Cost: ${provider_data['total_cost']:.4f}")
if provider_data['analyses'] > 0:
avg_total = provider_data['total_tokens'] / provider_data['analyses']
avg_prompt = provider_data['prompt_tokens'] / provider_data['analyses']
avg_completion = provider_data['completion_tokens'] / provider_data['analyses']
avg_cost = provider_data['total_cost'] / provider_data['analyses']
print()
print("Average per Analysis:")
print(f" Total Tokens: {avg_total:.0f}")
print(f" Prompt Tokens: {avg_prompt:.0f}")
print(f" Completion Tokens: {avg_completion:.0f}")
print(f" Cost: ${avg_cost:.4f}")
# Show per-check breakdown if available
if provider_data['by_check']:
print()
print("Per-Check Averages (Top 10 by token usage):")
sorted_checks = sorted(
provider_data['by_check'].items(),
key=lambda x: x[1]['avg_tokens'],
reverse=True
)
for check_name, check_data in sorted_checks[:10]:
if check_data['count'] > 0:
print(f" {check_name}:")
print(f" Count: {check_data['count']}")
print(f" Avg Tokens: {check_data['avg_tokens']:.0f} (Prompt: {check_data['avg_prompt_tokens']:.0f}, Completion: {check_data['avg_completion_tokens']:.0f})")
print()
print("=" * 80)
return True
def compare_estimate_vs_actual():
"""Compare estimated costs vs actual costs"""
print()
print("=" * 80)
print("ESTIMATE ACCURACY ANALYSIS")
print("=" * 80)
print()
stats = analyze_actual_token_usage()
if stats['total_analyses_with_tokens'] == 0:
print("Not enough data to compare estimates vs actuals.")
print("Run some analyses to collect actual token usage data.")
return False
# Default estimates used when token data unavailable
estimated_prompt_tokens = 1000
estimated_completion_tokens = 200
estimated_total_tokens = estimated_prompt_tokens + estimated_completion_tokens
print("DEFAULT ESTIMATES (used when actual data unavailable):")
print(f" Prompt Tokens: {estimated_prompt_tokens}")
print(f" Completion Tokens: {estimated_completion_tokens}")
print(f" Total Tokens: {estimated_total_tokens}")
print()
for provider, provider_data in sorted(stats['by_provider'].items()):
if provider_data['analyses'] > 0:
avg_prompt = provider_data['prompt_tokens'] / provider_data['analyses']
avg_completion = provider_data['completion_tokens'] / provider_data['analyses']
avg_total = provider_data['total_tokens'] / provider_data['analyses']
print(f"\n{provider.upper()} - ACTUAL vs ESTIMATE")
print("-" * 80)
print(f"Actual Average per Analysis:")
print(f" Prompt: {avg_prompt:.0f} tokens")
print(f" Completion: {avg_completion:.0f} tokens")
print(f" Total: {avg_total:.0f} tokens")
print()
prompt_diff = ((avg_prompt - estimated_prompt_tokens) / estimated_prompt_tokens) * 100
completion_diff = ((avg_completion - estimated_completion_tokens) / estimated_completion_tokens) * 100
total_diff = ((avg_total - estimated_total_tokens) / estimated_total_tokens) * 100
print(f"Difference from Estimate:")
print(f" Prompt: {prompt_diff:+.1f}%")
print(f" Completion: {completion_diff:+.1f}%")
print(f" Total: {total_diff:+.1f}%")
print()
# Cost comparison
pricing = COST_PER_1K_TOKENS.get(provider, {})
estimated_cost = (estimated_prompt_tokens / 1000 * pricing.get('input', 0)) + \
(estimated_completion_tokens / 1000 * pricing.get('output', 0))
actual_avg_cost = provider_data['total_cost'] / provider_data['analyses']
print(f"Cost Comparison:")
print(f" Estimated: ${estimated_cost:.4f} per analysis")
print(f" Actual: ${actual_avg_cost:.4f} per analysis")
cost_diff = ((actual_avg_cost - estimated_cost) / estimated_cost) * 100
print(f" Difference: {cost_diff:+.1f}%")
if abs(total_diff) > 20:
print()
print(f"⚠️ RECOMMENDATION: Update default estimates for {provider}")
print(f" Suggested values:")
print(f" Prompt Tokens: {int(avg_prompt)}")
print(f" Completion Tokens: {int(avg_completion)}")
print()
print("=" * 80)
return True
def main():
print()
validate_pricing()
print()
has_data = analyze_token_patterns()
if has_data:
compare_estimate_vs_actual()
print()
print("NEXT STEPS:")
print("-" * 80)
print("1. If estimates are significantly off (>20%), update the default estimates")
print("2. Verify pricing matches current API provider pricing pages")
print("3. Re-run this validation monthly or when pricing changes")
print()
else:
print()
print("NEXT STEPS:")
print("-" * 80)
print("1. Run a few test analyses to collect actual token usage data")
print("2. Re-run this script: python validate_pricing.py")
print("3. Review the actual vs estimated comparison")
print("4. Update estimates if needed")
print()
print("For latest pricing, check:")
print(" - OpenAI: https://openai.com/api/pricing/")
print(" - Google Gemini: https://ai.google.dev/pricing")
print()
if __name__ == '__main__':
main()