- rag_test_app: OpenAI Assistants benchmark tool - TEST_TO_RUN: Barclays test configs (Internal Banners, Social Posts, Display Banners, PPC) - Added report.xlsx + report.csv export alongside HTML report Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
481 lines
No EOL
18 KiB
Python
481 lines
No EOL
18 KiB
Python
#!/usr/bin/env python3
|
|
import argparse
|
|
import os
|
|
import sys
|
|
import json
|
|
import time
|
|
from datetime import datetime
|
|
from main import RAGTester
|
|
from rich.console import Console
|
|
from typing import Dict, Any, List
|
|
|
|
console = Console()
|
|
|
|
def load_config(config_path: str) -> Dict[str, Any]:
|
|
"""
|
|
Load configuration from a JSON file
|
|
"""
|
|
try:
|
|
with open(config_path, 'r') as f:
|
|
config = json.load(f)
|
|
console.print(f"[green]Loaded configuration from {config_path}[/green]")
|
|
return config
|
|
except Exception as e:
|
|
console.print(f"[bold red]Error loading config file: {str(e)}[/bold red]")
|
|
sys.exit(1)
|
|
|
|
def process_single_config(config_path: str, args: argparse.Namespace) -> bool:
|
|
"""
|
|
Process a single configuration file.
|
|
Returns True if successful, False otherwise.
|
|
"""
|
|
console.print(f"\n[bold cyan]{'='*60}[/bold cyan]")
|
|
console.print(f"[bold cyan]Processing config: {os.path.basename(config_path)}[/bold cyan]")
|
|
console.print(f"[bold cyan]{'='*60}[/bold cyan]\n")
|
|
|
|
try:
|
|
# Load config file
|
|
if not os.path.exists(config_path):
|
|
console.print(f"[bold red]Error: Config file not found: {config_path}[/bold red]")
|
|
return False
|
|
config = load_config(config_path)
|
|
|
|
# Merge command-line arguments with config file (command-line takes precedence)
|
|
api_key = args.api_key or config.get("api_key") or os.environ.get("OPENAI_API_KEY")
|
|
assistant_id = args.assistant_id or config.get("assistant_id")
|
|
document_path = args.document or config.get("document")
|
|
documents = args.documents or config.get("documents")
|
|
output_dir = args.output_dir if args.output_dir != "results" else config.get("output_dir", "results")
|
|
num_questions = args.num_questions if args.num_questions != 20 else config.get("num_questions", 20)
|
|
iterations = args.iterations if args.iterations != 3 else config.get("iterations", 3)
|
|
questions_file = args.questions_file or config.get("questions_file")
|
|
generate_only = args.generate_only or config.get("generate_only", False)
|
|
verbose = args.verbose or config.get("verbose", False)
|
|
model = args.model if args.model != "gpt-4o" else config.get("model", "gpt-4o")
|
|
prompt_type = args.prompt_type if args.prompt_type != "task-based" else config.get("prompt_type", "task-based")
|
|
parallel = args.parallel if args.parallel != 5 else config.get("parallel", 5)
|
|
batch_size = args.batch_size or config.get("batch_size")
|
|
|
|
# Create unique timestamped output directory
|
|
config_name = os.path.splitext(os.path.basename(config_path))[0]
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
unique_output_dir = os.path.join(output_dir, f"{config_name}_{timestamp}")
|
|
output_dir = unique_output_dir
|
|
|
|
# Check for required parameters
|
|
if not api_key:
|
|
console.print("[bold red]Error: No OpenAI API key provided.[/bold red]")
|
|
console.print("Please provide an API key using --api-key, config file, or set the OPENAI_API_KEY environment variable.")
|
|
return False
|
|
|
|
if not assistant_id:
|
|
console.print("[bold red]Error: No assistant ID provided.[/bold red]")
|
|
console.print("Please provide an assistant ID using --assistant-id or in the config file.")
|
|
return False
|
|
|
|
# Check if at least one document source is provided
|
|
if not documents and not document_path:
|
|
console.print("[bold red]Error: No documents provided.[/bold red]")
|
|
console.print("Please provide documents using --documents, --document, or in the config file.")
|
|
return False
|
|
|
|
# Check if document paths exist
|
|
if documents:
|
|
for doc in documents:
|
|
if not os.path.exists(doc):
|
|
console.print(f"[bold red]Warning: Document not found: {doc}[/bold red]")
|
|
elif document_path and not os.path.exists(document_path):
|
|
console.print(f"[bold red]Error: Document or directory not found: {document_path}[/bold red]")
|
|
return False
|
|
|
|
# Print startup banner
|
|
console.print("\n[bold green]╔══════════════════════════════════════════════╗[/bold green]")
|
|
console.print("[bold green]║ RAG Testing Application ║[/bold green]")
|
|
console.print("[bold green]╚══════════════════════════════════════════════╝[/bold green]\n")
|
|
|
|
if verbose:
|
|
console.print("[bold yellow]Verbose mode enabled[/bold yellow]")
|
|
|
|
# Create tester
|
|
tester = RAGTester(
|
|
api_key=api_key,
|
|
assistant_id=assistant_id,
|
|
document_paths=documents,
|
|
document_path=None if documents else document_path,
|
|
output_dir=output_dir,
|
|
verbose=verbose,
|
|
model=model,
|
|
prompt_type=prompt_type
|
|
)
|
|
|
|
# Load or generate questions
|
|
if questions_file:
|
|
if not os.path.exists(questions_file):
|
|
console.print(f"[bold red]Error: Questions file not found: {questions_file}[/bold red]")
|
|
return False
|
|
|
|
console.print(f"Loading questions from {questions_file}")
|
|
tester.load_questions_from_file(questions_file)
|
|
else:
|
|
console.print(f"Generating {num_questions} test questions")
|
|
tester.generate_test_questions(num_questions)
|
|
|
|
# Exit if generate-only
|
|
if generate_only:
|
|
console.print("[green]Questions generated and saved. Exiting.[/green]")
|
|
return True
|
|
|
|
# Run tests with parallelization
|
|
tester.run_tests(iterations=iterations, max_workers=parallel, batch_size=batch_size)
|
|
|
|
# Evaluate results
|
|
evaluation = tester.evaluate_results()
|
|
|
|
# Generate report
|
|
tester.generate_report()
|
|
|
|
console.print(f"[bold green]Testing complete! Results saved to {output_dir}[/bold green]")
|
|
return True
|
|
|
|
except Exception as e:
|
|
console.print(f"[bold red]Error processing config {config_path}: {str(e)}[/bold red]")
|
|
import traceback
|
|
console.print(f"[red]{traceback.format_exc()}[/red]")
|
|
return False
|
|
|
|
def create_config_template(output_path: str) -> None:
|
|
"""
|
|
Create a configuration template file at the specified path
|
|
"""
|
|
template = {
|
|
"assistant_id": "asst_YourAssistantIdHere",
|
|
"documents": [
|
|
"/path/to/your/document1.txt",
|
|
"/path/to/your/document2.txt"
|
|
],
|
|
"api_key": "YOUR_OPENAI_API_KEY",
|
|
"output_dir": "results",
|
|
"num_questions": 20,
|
|
"iterations": 3,
|
|
"questions_file": "", # Leave empty to generate new questions
|
|
"generate_only": False,
|
|
"verbose": True,
|
|
"model": "gpt-4o",
|
|
"prompt_type": "task-based", # Options: "task-based", "content-based", "scenario-based"
|
|
"parallel": 10,
|
|
"batch_size": 30
|
|
}
|
|
|
|
try:
|
|
# Ensure directory exists
|
|
os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
|
|
|
|
# Write the template with pretty formatting
|
|
with open(output_path, 'w') as f:
|
|
json.dump(template, f, indent=2)
|
|
|
|
console.print(f"[green]Created configuration template at: {output_path}[/green]")
|
|
console.print("[cyan]Edit this file with your settings and then run:[/cyan]")
|
|
console.print(f"[cyan] python cli.py --config {output_path}[/cyan]")
|
|
except Exception as e:
|
|
console.print(f"[bold red]Error creating configuration template: {str(e)}[/bold red]")
|
|
sys.exit(1)
|
|
|
|
def main():
|
|
"""Main entry point for the RAG Testing CLI"""
|
|
parser = argparse.ArgumentParser(
|
|
description="Test OpenAI assistants with RAG capabilities",
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
|
)
|
|
|
|
# Add config group
|
|
config_group = parser.add_argument_group('Configuration')
|
|
|
|
config_group.add_argument(
|
|
"--config",
|
|
type=str,
|
|
help="Path to a JSON configuration file"
|
|
)
|
|
|
|
config_group.add_argument(
|
|
"--config-dir",
|
|
type=str,
|
|
help="Path to a directory containing multiple JSON configuration files. All configs will be run in sequence."
|
|
)
|
|
|
|
config_group.add_argument(
|
|
"--create-config",
|
|
type=str,
|
|
metavar="OUTPUT_PATH",
|
|
help="Create a configuration template file at the specified path and exit"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--api-key",
|
|
type=str,
|
|
help="OpenAI API key. If not provided, will use OPENAI_API_KEY environment variable."
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--assistant-id",
|
|
type=str,
|
|
help="ID of the OpenAI assistant to test"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--document",
|
|
type=str,
|
|
help="Path to the document or directory of documents to use for testing"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--documents",
|
|
nargs='+',
|
|
help="List of document paths to use for testing (takes precedence over --document)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--output-dir",
|
|
type=str,
|
|
default="results",
|
|
help="Directory to save test results"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--num-questions",
|
|
type=int,
|
|
default=20,
|
|
help="Number of test questions to generate"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--iterations",
|
|
type=int,
|
|
default=3,
|
|
help="Number of times to test each question"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--questions-file",
|
|
type=str,
|
|
help="Path to a JSON file with pre-generated questions"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--generate-only",
|
|
action="store_true",
|
|
help="Only generate questions, don't run tests"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--verbose",
|
|
action="store_true",
|
|
help="Enable verbose output for debugging"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--model",
|
|
type=str,
|
|
default="gpt-4o",
|
|
help="OpenAI model to use for question generation and evaluation"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--prompt-type",
|
|
type=str,
|
|
choices=["task-based", "content-based", "scenario-based"],
|
|
default="task-based",
|
|
help="Type of prompts to generate: 'task-based' (real user tasks like 'create a banner'), "
|
|
"'content-based' (knowledge questions about documents), "
|
|
"'scenario-based' (realistic business scenarios). Default: task-based"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--parallel",
|
|
type=int,
|
|
default=5,
|
|
help="Number of parallel workers for running tests (default: 5)"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--batch-size",
|
|
type=int,
|
|
help="Number of questions to process in a batch (defaults to same as --parallel if not specified)"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Check if we need to create a config template
|
|
if args.create_config:
|
|
create_config_template(args.create_config)
|
|
sys.exit(0)
|
|
|
|
# Handle batch processing of multiple config files
|
|
if args.config_dir:
|
|
if not os.path.exists(args.config_dir):
|
|
console.print(f"[bold red]Error: Config directory not found: {args.config_dir}[/bold red]")
|
|
sys.exit(1)
|
|
|
|
if not os.path.isdir(args.config_dir):
|
|
console.print(f"[bold red]Error: {args.config_dir} is not a directory[/bold red]")
|
|
sys.exit(1)
|
|
|
|
# Find all JSON config files in the directory
|
|
config_files = sorted([
|
|
os.path.join(args.config_dir, f)
|
|
for f in os.listdir(args.config_dir)
|
|
if f.endswith('.json')
|
|
])
|
|
|
|
if not config_files:
|
|
console.print(f"[bold red]Error: No JSON config files found in {args.config_dir}[/bold red]")
|
|
sys.exit(1)
|
|
|
|
console.print(f"\n[bold magenta]{'='*60}[/bold magenta]")
|
|
console.print(f"[bold magenta]BATCH PROCESSING MODE[/bold magenta]")
|
|
console.print(f"[bold magenta]Found {len(config_files)} configuration file(s)[/bold magenta]")
|
|
console.print(f"[bold magenta]{'='*60}[/bold magenta]\n")
|
|
|
|
for config_file in config_files:
|
|
console.print(f" • {os.path.basename(config_file)}")
|
|
|
|
console.print()
|
|
|
|
# Process each config file
|
|
successful = 0
|
|
failed = 0
|
|
start_time = time.time()
|
|
|
|
for idx, config_file in enumerate(config_files, 1):
|
|
console.print(f"\n[bold yellow]>>> Processing {idx}/{len(config_files)}[/bold yellow]")
|
|
if process_single_config(config_file, args):
|
|
successful += 1
|
|
else:
|
|
failed += 1
|
|
console.print(f"[bold red]Failed to process {os.path.basename(config_file)}[/bold red]")
|
|
|
|
# Print summary
|
|
elapsed_time = time.time() - start_time
|
|
console.print(f"\n[bold magenta]{'='*60}[/bold magenta]")
|
|
console.print(f"[bold magenta]BATCH PROCESSING COMPLETE[/bold magenta]")
|
|
console.print(f"[bold magenta]{'='*60}[/bold magenta]")
|
|
console.print(f"[bold green]✓ Successful: {successful}[/bold green]")
|
|
if failed > 0:
|
|
console.print(f"[bold red]✗ Failed: {failed}[/bold red]")
|
|
console.print(f"[cyan]Total time: {elapsed_time/60:.1f} minutes[/cyan]")
|
|
console.print()
|
|
|
|
sys.exit(0 if failed == 0 else 1)
|
|
|
|
# Single config file mode
|
|
if args.config:
|
|
if not os.path.exists(args.config):
|
|
console.print(f"[bold red]Error: Config file not found: {args.config}[/bold red]")
|
|
sys.exit(1)
|
|
|
|
success = process_single_config(args.config, args)
|
|
sys.exit(0 if success else 1)
|
|
|
|
# Original command-line mode (no config file)
|
|
config = {}
|
|
|
|
# Merge command-line arguments with config file (command-line takes precedence)
|
|
api_key = args.api_key or config.get("api_key") or os.environ.get("OPENAI_API_KEY")
|
|
assistant_id = args.assistant_id or config.get("assistant_id")
|
|
document_path = args.document or config.get("document")
|
|
documents = args.documents or config.get("documents")
|
|
output_dir = args.output_dir if args.output_dir != "results" else config.get("output_dir", "results")
|
|
num_questions = args.num_questions if args.num_questions != 20 else config.get("num_questions", 20)
|
|
iterations = args.iterations if args.iterations != 3 else config.get("iterations", 3)
|
|
questions_file = args.questions_file or config.get("questions_file")
|
|
generate_only = args.generate_only or config.get("generate_only", False)
|
|
verbose = args.verbose or config.get("verbose", False)
|
|
model = args.model if args.model != "gpt-4o" else config.get("model", "gpt-4o")
|
|
parallel = args.parallel if args.parallel != 5 else config.get("parallel", 5)
|
|
batch_size = args.batch_size or config.get("batch_size")
|
|
|
|
# Create unique timestamped output directory
|
|
# Get the base name from config file if available, otherwise use 'test'
|
|
config_name = "test"
|
|
if args.config:
|
|
config_name = os.path.splitext(os.path.basename(args.config))[0]
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
unique_output_dir = os.path.join(output_dir, f"{config_name}_{timestamp}")
|
|
output_dir = unique_output_dir
|
|
|
|
# Check for required parameters
|
|
if not api_key:
|
|
console.print("[bold red]Error: No OpenAI API key provided.[/bold red]")
|
|
console.print("Please provide an API key using --api-key, config file, or set the OPENAI_API_KEY environment variable.")
|
|
sys.exit(1)
|
|
|
|
if not assistant_id:
|
|
console.print("[bold red]Error: No assistant ID provided.[/bold red]")
|
|
console.print("Please provide an assistant ID using --assistant-id or in the config file.")
|
|
sys.exit(1)
|
|
|
|
# Check if at least one document source is provided
|
|
if not documents and not document_path:
|
|
console.print("[bold red]Error: No documents provided.[/bold red]")
|
|
console.print("Please provide documents using --documents, --document, or in the config file.")
|
|
sys.exit(1)
|
|
|
|
# Check if document paths exist
|
|
if documents:
|
|
for doc in documents:
|
|
if not os.path.exists(doc):
|
|
console.print(f"[bold red]Warning: Document not found: {doc}[/bold red]")
|
|
elif document_path and not os.path.exists(document_path):
|
|
console.print(f"[bold red]Error: Document or directory not found: {document_path}[/bold red]")
|
|
sys.exit(1)
|
|
|
|
# Print startup banner
|
|
console.print("\n[bold green]╔══════════════════════════════════════════════╗[/bold green]")
|
|
console.print("[bold green]║ RAG Testing Application ║[/bold green]")
|
|
console.print("[bold green]╚══════════════════════════════════════════════╝[/bold green]\n")
|
|
|
|
if verbose:
|
|
console.print("[bold yellow]Verbose mode enabled[/bold yellow]")
|
|
|
|
# Create tester
|
|
tester = RAGTester(
|
|
api_key=api_key,
|
|
assistant_id=assistant_id,
|
|
document_paths=documents,
|
|
document_path=None if documents else document_path,
|
|
output_dir=output_dir,
|
|
verbose=verbose,
|
|
model=model
|
|
)
|
|
|
|
# Load or generate questions
|
|
if questions_file:
|
|
if not os.path.exists(questions_file):
|
|
console.print(f"[bold red]Error: Questions file not found: {questions_file}[/bold red]")
|
|
sys.exit(1)
|
|
|
|
console.print(f"Loading questions from {questions_file}")
|
|
tester.load_questions_from_file(questions_file)
|
|
else:
|
|
console.print(f"Generating {num_questions} test questions")
|
|
tester.generate_test_questions(num_questions)
|
|
|
|
# Exit if generate-only
|
|
if generate_only:
|
|
console.print("[green]Questions generated and saved. Exiting.[/green]")
|
|
sys.exit(0)
|
|
|
|
# Run tests with parallelization
|
|
tester.run_tests(iterations=iterations, max_workers=parallel, batch_size=batch_size)
|
|
|
|
# Evaluate results
|
|
evaluation = tester.evaluate_results()
|
|
|
|
# Generate report
|
|
tester.generate_report()
|
|
|
|
console.print(f"[bold green]Testing complete! Results saved to {output_dir}[/bold green]")
|
|
|
|
if __name__ == "__main__":
|
|
main() |