barclays-rag-test/rag_test_app/cli.py
Vadym Samoilenko ed040ea497 init: add RAG test app with Excel/CSV export
- rag_test_app: OpenAI Assistants benchmark tool
- TEST_TO_RUN: Barclays test configs (Internal Banners, Social Posts, Display Banners, PPC)
- Added report.xlsx + report.csv export alongside HTML report

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-10 13:29:14 +01:00

481 lines
No EOL
18 KiB
Python

#!/usr/bin/env python3
import argparse
import os
import sys
import json
import time
from datetime import datetime
from main import RAGTester
from rich.console import Console
from typing import Dict, Any, List
console = Console()
def load_config(config_path: str) -> Dict[str, Any]:
"""
Load configuration from a JSON file
"""
try:
with open(config_path, 'r') as f:
config = json.load(f)
console.print(f"[green]Loaded configuration from {config_path}[/green]")
return config
except Exception as e:
console.print(f"[bold red]Error loading config file: {str(e)}[/bold red]")
sys.exit(1)
def process_single_config(config_path: str, args: argparse.Namespace) -> bool:
"""
Process a single configuration file.
Returns True if successful, False otherwise.
"""
console.print(f"\n[bold cyan]{'='*60}[/bold cyan]")
console.print(f"[bold cyan]Processing config: {os.path.basename(config_path)}[/bold cyan]")
console.print(f"[bold cyan]{'='*60}[/bold cyan]\n")
try:
# Load config file
if not os.path.exists(config_path):
console.print(f"[bold red]Error: Config file not found: {config_path}[/bold red]")
return False
config = load_config(config_path)
# Merge command-line arguments with config file (command-line takes precedence)
api_key = args.api_key or config.get("api_key") or os.environ.get("OPENAI_API_KEY")
assistant_id = args.assistant_id or config.get("assistant_id")
document_path = args.document or config.get("document")
documents = args.documents or config.get("documents")
output_dir = args.output_dir if args.output_dir != "results" else config.get("output_dir", "results")
num_questions = args.num_questions if args.num_questions != 20 else config.get("num_questions", 20)
iterations = args.iterations if args.iterations != 3 else config.get("iterations", 3)
questions_file = args.questions_file or config.get("questions_file")
generate_only = args.generate_only or config.get("generate_only", False)
verbose = args.verbose or config.get("verbose", False)
model = args.model if args.model != "gpt-4o" else config.get("model", "gpt-4o")
prompt_type = args.prompt_type if args.prompt_type != "task-based" else config.get("prompt_type", "task-based")
parallel = args.parallel if args.parallel != 5 else config.get("parallel", 5)
batch_size = args.batch_size or config.get("batch_size")
# Create unique timestamped output directory
config_name = os.path.splitext(os.path.basename(config_path))[0]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
unique_output_dir = os.path.join(output_dir, f"{config_name}_{timestamp}")
output_dir = unique_output_dir
# Check for required parameters
if not api_key:
console.print("[bold red]Error: No OpenAI API key provided.[/bold red]")
console.print("Please provide an API key using --api-key, config file, or set the OPENAI_API_KEY environment variable.")
return False
if not assistant_id:
console.print("[bold red]Error: No assistant ID provided.[/bold red]")
console.print("Please provide an assistant ID using --assistant-id or in the config file.")
return False
# Check if at least one document source is provided
if not documents and not document_path:
console.print("[bold red]Error: No documents provided.[/bold red]")
console.print("Please provide documents using --documents, --document, or in the config file.")
return False
# Check if document paths exist
if documents:
for doc in documents:
if not os.path.exists(doc):
console.print(f"[bold red]Warning: Document not found: {doc}[/bold red]")
elif document_path and not os.path.exists(document_path):
console.print(f"[bold red]Error: Document or directory not found: {document_path}[/bold red]")
return False
# Print startup banner
console.print("\n[bold green]╔══════════════════════════════════════════════╗[/bold green]")
console.print("[bold green]║ RAG Testing Application ║[/bold green]")
console.print("[bold green]╚══════════════════════════════════════════════╝[/bold green]\n")
if verbose:
console.print("[bold yellow]Verbose mode enabled[/bold yellow]")
# Create tester
tester = RAGTester(
api_key=api_key,
assistant_id=assistant_id,
document_paths=documents,
document_path=None if documents else document_path,
output_dir=output_dir,
verbose=verbose,
model=model,
prompt_type=prompt_type
)
# Load or generate questions
if questions_file:
if not os.path.exists(questions_file):
console.print(f"[bold red]Error: Questions file not found: {questions_file}[/bold red]")
return False
console.print(f"Loading questions from {questions_file}")
tester.load_questions_from_file(questions_file)
else:
console.print(f"Generating {num_questions} test questions")
tester.generate_test_questions(num_questions)
# Exit if generate-only
if generate_only:
console.print("[green]Questions generated and saved. Exiting.[/green]")
return True
# Run tests with parallelization
tester.run_tests(iterations=iterations, max_workers=parallel, batch_size=batch_size)
# Evaluate results
evaluation = tester.evaluate_results()
# Generate report
tester.generate_report()
console.print(f"[bold green]Testing complete! Results saved to {output_dir}[/bold green]")
return True
except Exception as e:
console.print(f"[bold red]Error processing config {config_path}: {str(e)}[/bold red]")
import traceback
console.print(f"[red]{traceback.format_exc()}[/red]")
return False
def create_config_template(output_path: str) -> None:
"""
Create a configuration template file at the specified path
"""
template = {
"assistant_id": "asst_YourAssistantIdHere",
"documents": [
"/path/to/your/document1.txt",
"/path/to/your/document2.txt"
],
"api_key": "YOUR_OPENAI_API_KEY",
"output_dir": "results",
"num_questions": 20,
"iterations": 3,
"questions_file": "", # Leave empty to generate new questions
"generate_only": False,
"verbose": True,
"model": "gpt-4o",
"prompt_type": "task-based", # Options: "task-based", "content-based", "scenario-based"
"parallel": 10,
"batch_size": 30
}
try:
# Ensure directory exists
os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
# Write the template with pretty formatting
with open(output_path, 'w') as f:
json.dump(template, f, indent=2)
console.print(f"[green]Created configuration template at: {output_path}[/green]")
console.print("[cyan]Edit this file with your settings and then run:[/cyan]")
console.print(f"[cyan] python cli.py --config {output_path}[/cyan]")
except Exception as e:
console.print(f"[bold red]Error creating configuration template: {str(e)}[/bold red]")
sys.exit(1)
def main():
"""Main entry point for the RAG Testing CLI"""
parser = argparse.ArgumentParser(
description="Test OpenAI assistants with RAG capabilities",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
# Add config group
config_group = parser.add_argument_group('Configuration')
config_group.add_argument(
"--config",
type=str,
help="Path to a JSON configuration file"
)
config_group.add_argument(
"--config-dir",
type=str,
help="Path to a directory containing multiple JSON configuration files. All configs will be run in sequence."
)
config_group.add_argument(
"--create-config",
type=str,
metavar="OUTPUT_PATH",
help="Create a configuration template file at the specified path and exit"
)
parser.add_argument(
"--api-key",
type=str,
help="OpenAI API key. If not provided, will use OPENAI_API_KEY environment variable."
)
parser.add_argument(
"--assistant-id",
type=str,
help="ID of the OpenAI assistant to test"
)
parser.add_argument(
"--document",
type=str,
help="Path to the document or directory of documents to use for testing"
)
parser.add_argument(
"--documents",
nargs='+',
help="List of document paths to use for testing (takes precedence over --document)"
)
parser.add_argument(
"--output-dir",
type=str,
default="results",
help="Directory to save test results"
)
parser.add_argument(
"--num-questions",
type=int,
default=20,
help="Number of test questions to generate"
)
parser.add_argument(
"--iterations",
type=int,
default=3,
help="Number of times to test each question"
)
parser.add_argument(
"--questions-file",
type=str,
help="Path to a JSON file with pre-generated questions"
)
parser.add_argument(
"--generate-only",
action="store_true",
help="Only generate questions, don't run tests"
)
parser.add_argument(
"--verbose",
action="store_true",
help="Enable verbose output for debugging"
)
parser.add_argument(
"--model",
type=str,
default="gpt-4o",
help="OpenAI model to use for question generation and evaluation"
)
parser.add_argument(
"--prompt-type",
type=str,
choices=["task-based", "content-based", "scenario-based"],
default="task-based",
help="Type of prompts to generate: 'task-based' (real user tasks like 'create a banner'), "
"'content-based' (knowledge questions about documents), "
"'scenario-based' (realistic business scenarios). Default: task-based"
)
parser.add_argument(
"--parallel",
type=int,
default=5,
help="Number of parallel workers for running tests (default: 5)"
)
parser.add_argument(
"--batch-size",
type=int,
help="Number of questions to process in a batch (defaults to same as --parallel if not specified)"
)
args = parser.parse_args()
# Check if we need to create a config template
if args.create_config:
create_config_template(args.create_config)
sys.exit(0)
# Handle batch processing of multiple config files
if args.config_dir:
if not os.path.exists(args.config_dir):
console.print(f"[bold red]Error: Config directory not found: {args.config_dir}[/bold red]")
sys.exit(1)
if not os.path.isdir(args.config_dir):
console.print(f"[bold red]Error: {args.config_dir} is not a directory[/bold red]")
sys.exit(1)
# Find all JSON config files in the directory
config_files = sorted([
os.path.join(args.config_dir, f)
for f in os.listdir(args.config_dir)
if f.endswith('.json')
])
if not config_files:
console.print(f"[bold red]Error: No JSON config files found in {args.config_dir}[/bold red]")
sys.exit(1)
console.print(f"\n[bold magenta]{'='*60}[/bold magenta]")
console.print(f"[bold magenta]BATCH PROCESSING MODE[/bold magenta]")
console.print(f"[bold magenta]Found {len(config_files)} configuration file(s)[/bold magenta]")
console.print(f"[bold magenta]{'='*60}[/bold magenta]\n")
for config_file in config_files:
console.print(f"{os.path.basename(config_file)}")
console.print()
# Process each config file
successful = 0
failed = 0
start_time = time.time()
for idx, config_file in enumerate(config_files, 1):
console.print(f"\n[bold yellow]>>> Processing {idx}/{len(config_files)}[/bold yellow]")
if process_single_config(config_file, args):
successful += 1
else:
failed += 1
console.print(f"[bold red]Failed to process {os.path.basename(config_file)}[/bold red]")
# Print summary
elapsed_time = time.time() - start_time
console.print(f"\n[bold magenta]{'='*60}[/bold magenta]")
console.print(f"[bold magenta]BATCH PROCESSING COMPLETE[/bold magenta]")
console.print(f"[bold magenta]{'='*60}[/bold magenta]")
console.print(f"[bold green]✓ Successful: {successful}[/bold green]")
if failed > 0:
console.print(f"[bold red]✗ Failed: {failed}[/bold red]")
console.print(f"[cyan]Total time: {elapsed_time/60:.1f} minutes[/cyan]")
console.print()
sys.exit(0 if failed == 0 else 1)
# Single config file mode
if args.config:
if not os.path.exists(args.config):
console.print(f"[bold red]Error: Config file not found: {args.config}[/bold red]")
sys.exit(1)
success = process_single_config(args.config, args)
sys.exit(0 if success else 1)
# Original command-line mode (no config file)
config = {}
# Merge command-line arguments with config file (command-line takes precedence)
api_key = args.api_key or config.get("api_key") or os.environ.get("OPENAI_API_KEY")
assistant_id = args.assistant_id or config.get("assistant_id")
document_path = args.document or config.get("document")
documents = args.documents or config.get("documents")
output_dir = args.output_dir if args.output_dir != "results" else config.get("output_dir", "results")
num_questions = args.num_questions if args.num_questions != 20 else config.get("num_questions", 20)
iterations = args.iterations if args.iterations != 3 else config.get("iterations", 3)
questions_file = args.questions_file or config.get("questions_file")
generate_only = args.generate_only or config.get("generate_only", False)
verbose = args.verbose or config.get("verbose", False)
model = args.model if args.model != "gpt-4o" else config.get("model", "gpt-4o")
parallel = args.parallel if args.parallel != 5 else config.get("parallel", 5)
batch_size = args.batch_size or config.get("batch_size")
# Create unique timestamped output directory
# Get the base name from config file if available, otherwise use 'test'
config_name = "test"
if args.config:
config_name = os.path.splitext(os.path.basename(args.config))[0]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
unique_output_dir = os.path.join(output_dir, f"{config_name}_{timestamp}")
output_dir = unique_output_dir
# Check for required parameters
if not api_key:
console.print("[bold red]Error: No OpenAI API key provided.[/bold red]")
console.print("Please provide an API key using --api-key, config file, or set the OPENAI_API_KEY environment variable.")
sys.exit(1)
if not assistant_id:
console.print("[bold red]Error: No assistant ID provided.[/bold red]")
console.print("Please provide an assistant ID using --assistant-id or in the config file.")
sys.exit(1)
# Check if at least one document source is provided
if not documents and not document_path:
console.print("[bold red]Error: No documents provided.[/bold red]")
console.print("Please provide documents using --documents, --document, or in the config file.")
sys.exit(1)
# Check if document paths exist
if documents:
for doc in documents:
if not os.path.exists(doc):
console.print(f"[bold red]Warning: Document not found: {doc}[/bold red]")
elif document_path and not os.path.exists(document_path):
console.print(f"[bold red]Error: Document or directory not found: {document_path}[/bold red]")
sys.exit(1)
# Print startup banner
console.print("\n[bold green]╔══════════════════════════════════════════════╗[/bold green]")
console.print("[bold green]║ RAG Testing Application ║[/bold green]")
console.print("[bold green]╚══════════════════════════════════════════════╝[/bold green]\n")
if verbose:
console.print("[bold yellow]Verbose mode enabled[/bold yellow]")
# Create tester
tester = RAGTester(
api_key=api_key,
assistant_id=assistant_id,
document_paths=documents,
document_path=None if documents else document_path,
output_dir=output_dir,
verbose=verbose,
model=model
)
# Load or generate questions
if questions_file:
if not os.path.exists(questions_file):
console.print(f"[bold red]Error: Questions file not found: {questions_file}[/bold red]")
sys.exit(1)
console.print(f"Loading questions from {questions_file}")
tester.load_questions_from_file(questions_file)
else:
console.print(f"Generating {num_questions} test questions")
tester.generate_test_questions(num_questions)
# Exit if generate-only
if generate_only:
console.print("[green]Questions generated and saved. Exiting.[/green]")
sys.exit(0)
# Run tests with parallelization
tester.run_tests(iterations=iterations, max_workers=parallel, batch_size=batch_size)
# Evaluate results
evaluation = tester.evaluate_results()
# Generate report
tester.generate_report()
console.print(f"[bold green]Testing complete! Results saved to {output_dir}[/bold green]")
if __name__ == "__main__":
main()