#!/usr/bin/env python3 """ Pricing Validation Tool Analyzes actual usage data to validate pricing accuracy and estimates """ import sys import os import json from collections import defaultdict from datetime import datetime # Add parent directory to path sys.path.append(os.path.dirname(os.path.abspath(__file__))) from usage_tracker import COST_PER_1K_TOKENS, USAGE_LOG_DIR def analyze_actual_token_usage(): """Analyze actual token usage from logs to validate estimates""" import glob stats = { 'total_analyses': 0, 'total_analyses_with_tokens': 0, 'by_provider': defaultdict(lambda: { 'analyses': 0, 'total_tokens': 0, 'prompt_tokens': 0, 'completion_tokens': 0, 'total_cost': 0, 'by_check': defaultdict(lambda: { 'count': 0, 'total_tokens': 0, 'prompt_tokens': 0, 'completion_tokens': 0, 'avg_tokens': 0 }) }) } log_files = sorted(glob.glob(os.path.join(USAGE_LOG_DIR, '*.jsonl'))) for log_file in log_files: with open(log_file, 'r') as f: for line in f: try: entry = json.loads(line.strip()) if entry.get('event') == 'analysis_complete': stats['total_analyses'] += 1 token_usage = entry.get('token_usage', {}) if token_usage.get('total_tokens', 0) > 0: stats['total_analyses_with_tokens'] += 1 # By provider by_provider = token_usage.get('by_provider', {}) for provider, provider_stats in by_provider.items(): stats['by_provider'][provider]['analyses'] += 1 stats['by_provider'][provider]['total_tokens'] += provider_stats.get('total_tokens', 0) stats['by_provider'][provider]['prompt_tokens'] += provider_stats.get('prompt_tokens', 0) stats['by_provider'][provider]['completion_tokens'] += provider_stats.get('completion_tokens', 0) stats['by_provider'][provider]['total_cost'] += provider_stats.get('cost', 0) elif entry.get('event') == 'check_execution': # Individual check token usage token_usage = entry.get('token_usage', {}) if token_usage.get('total_tokens', 0) > 0: check_name = entry.get('check_name', 'unknown') llm_used = entry.get('llm_used', 'unknown') check_stats = stats['by_provider'][llm_used]['by_check'][check_name] check_stats['count'] += 1 check_stats['total_tokens'] += token_usage.get('total_tokens', 0) check_stats['prompt_tokens'] += token_usage.get('prompt_tokens', 0) check_stats['completion_tokens'] += token_usage.get('completion_tokens', 0) except json.JSONDecodeError: continue # Calculate averages for provider, provider_data in stats['by_provider'].items(): for check_name, check_data in provider_data['by_check'].items(): if check_data['count'] > 0: check_data['avg_tokens'] = check_data['total_tokens'] / check_data['count'] check_data['avg_prompt_tokens'] = check_data['prompt_tokens'] / check_data['count'] check_data['avg_completion_tokens'] = check_data['completion_tokens'] / check_data['count'] return stats def validate_pricing(): """Validate current pricing configuration""" print("=" * 80) print("PRICING VALIDATION REPORT") print("=" * 80) print(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print() print("CURRENT PRICING CONFIGURATION") print("-" * 80) for provider, pricing in COST_PER_1K_TOKENS.items(): print(f"\n{provider}:") print(f" Model: {pricing.get('model', 'Unknown')}") print(f" Input: ${pricing['input']:.4f} per 1K tokens (${pricing['input'] * 1000:.2f} per 1M)") print(f" Output: ${pricing['output']:.4f} per 1K tokens (${pricing['output'] * 1000:.2f} per 1M)") print(f" Last Verified: {pricing.get('last_verified', 'Unknown')}") # Example costs example_input = 1000 example_output = 200 example_cost = (example_input / 1000 * pricing['input']) + (example_output / 1000 * pricing['output']) print(f" Example: {example_input} input + {example_output} output tokens = ${example_cost:.4f}") print() print("=" * 80) print() return True def analyze_token_patterns(): """Analyze actual token usage patterns from logs""" print("=" * 80) print("ACTUAL TOKEN USAGE ANALYSIS") print("=" * 80) print() stats = analyze_actual_token_usage() if stats['total_analyses'] == 0: print("No usage data found. Run some analyses first to collect data.") return False print(f"Total Analyses Logged: {stats['total_analyses']}") print(f"Analyses with Token Data: {stats['total_analyses_with_tokens']}") if stats['total_analyses_with_tokens'] == 0: print() print("⚠️ No actual token data found in logs yet.") print(" Token tracking will start with the next analysis.") print(" Run a test analysis to collect token usage data.") return False print(f"Coverage: {(stats['total_analyses_with_tokens'] / stats['total_analyses'] * 100):.1f}%") print() for provider, provider_data in sorted(stats['by_provider'].items()): if provider_data['analyses'] > 0: print(f"\n{provider.upper()}") print("-" * 80) print(f"Analyses: {provider_data['analyses']}") print(f"Total Tokens: {provider_data['total_tokens']:,}") print(f"Prompt Tokens: {provider_data['prompt_tokens']:,}") print(f"Completion Tokens: {provider_data['completion_tokens']:,}") print(f"Total Cost: ${provider_data['total_cost']:.4f}") if provider_data['analyses'] > 0: avg_total = provider_data['total_tokens'] / provider_data['analyses'] avg_prompt = provider_data['prompt_tokens'] / provider_data['analyses'] avg_completion = provider_data['completion_tokens'] / provider_data['analyses'] avg_cost = provider_data['total_cost'] / provider_data['analyses'] print() print("Average per Analysis:") print(f" Total Tokens: {avg_total:.0f}") print(f" Prompt Tokens: {avg_prompt:.0f}") print(f" Completion Tokens: {avg_completion:.0f}") print(f" Cost: ${avg_cost:.4f}") # Show per-check breakdown if available if provider_data['by_check']: print() print("Per-Check Averages (Top 10 by token usage):") sorted_checks = sorted( provider_data['by_check'].items(), key=lambda x: x[1]['avg_tokens'], reverse=True ) for check_name, check_data in sorted_checks[:10]: if check_data['count'] > 0: print(f" {check_name}:") print(f" Count: {check_data['count']}") print(f" Avg Tokens: {check_data['avg_tokens']:.0f} (Prompt: {check_data['avg_prompt_tokens']:.0f}, Completion: {check_data['avg_completion_tokens']:.0f})") print() print("=" * 80) return True def compare_estimate_vs_actual(): """Compare estimated costs vs actual costs""" print() print("=" * 80) print("ESTIMATE ACCURACY ANALYSIS") print("=" * 80) print() stats = analyze_actual_token_usage() if stats['total_analyses_with_tokens'] == 0: print("Not enough data to compare estimates vs actuals.") print("Run some analyses to collect actual token usage data.") return False # Default estimates used when token data unavailable estimated_prompt_tokens = 1000 estimated_completion_tokens = 200 estimated_total_tokens = estimated_prompt_tokens + estimated_completion_tokens print("DEFAULT ESTIMATES (used when actual data unavailable):") print(f" Prompt Tokens: {estimated_prompt_tokens}") print(f" Completion Tokens: {estimated_completion_tokens}") print(f" Total Tokens: {estimated_total_tokens}") print() for provider, provider_data in sorted(stats['by_provider'].items()): if provider_data['analyses'] > 0: avg_prompt = provider_data['prompt_tokens'] / provider_data['analyses'] avg_completion = provider_data['completion_tokens'] / provider_data['analyses'] avg_total = provider_data['total_tokens'] / provider_data['analyses'] print(f"\n{provider.upper()} - ACTUAL vs ESTIMATE") print("-" * 80) print(f"Actual Average per Analysis:") print(f" Prompt: {avg_prompt:.0f} tokens") print(f" Completion: {avg_completion:.0f} tokens") print(f" Total: {avg_total:.0f} tokens") print() prompt_diff = ((avg_prompt - estimated_prompt_tokens) / estimated_prompt_tokens) * 100 completion_diff = ((avg_completion - estimated_completion_tokens) / estimated_completion_tokens) * 100 total_diff = ((avg_total - estimated_total_tokens) / estimated_total_tokens) * 100 print(f"Difference from Estimate:") print(f" Prompt: {prompt_diff:+.1f}%") print(f" Completion: {completion_diff:+.1f}%") print(f" Total: {total_diff:+.1f}%") print() # Cost comparison pricing = COST_PER_1K_TOKENS.get(provider, {}) estimated_cost = (estimated_prompt_tokens / 1000 * pricing.get('input', 0)) + \ (estimated_completion_tokens / 1000 * pricing.get('output', 0)) actual_avg_cost = provider_data['total_cost'] / provider_data['analyses'] print(f"Cost Comparison:") print(f" Estimated: ${estimated_cost:.4f} per analysis") print(f" Actual: ${actual_avg_cost:.4f} per analysis") cost_diff = ((actual_avg_cost - estimated_cost) / estimated_cost) * 100 print(f" Difference: {cost_diff:+.1f}%") if abs(total_diff) > 20: print() print(f"⚠️ RECOMMENDATION: Update default estimates for {provider}") print(f" Suggested values:") print(f" Prompt Tokens: {int(avg_prompt)}") print(f" Completion Tokens: {int(avg_completion)}") print() print("=" * 80) return True def main(): print() validate_pricing() print() has_data = analyze_token_patterns() if has_data: compare_estimate_vs_actual() print() print("NEXT STEPS:") print("-" * 80) print("1. If estimates are significantly off (>20%), update the default estimates") print("2. Verify pricing matches current API provider pricing pages") print("3. Re-run this validation monthly or when pricing changes") print() else: print() print("NEXT STEPS:") print("-" * 80) print("1. Run a few test analyses to collect actual token usage data") print("2. Re-run this script: python validate_pricing.py") print("3. Review the actual vs estimated comparison") print("4. Update estimates if needed") print() print("For latest pricing, check:") print(" - OpenAI: https://openai.com/api/pricing/") print(" - Google Gemini: https://ai.google.dev/pricing") print() if __name__ == '__main__': main()