master_adapt_detect/cli.py

#!/usr/bin/env python3
"""
CLI Module - Command Line Interface for Master Image Detection
"""

import argparse
import multiprocessing
import sys
from logging_config import setup_dual_logging, log_system_info, log_exception, DualLogger
from gemini_detector import ImageDetector
from openai_detector import OpenAIImageDetector
from vector_detector import VectorImageDetector
from hybrid_detector import HybridImageDetector
from cost_calculator import cost_calculator


def parse_arguments():
    """Parse command line arguments"""
    parser = argparse.ArgumentParser(
        description="Detect master images in layout images using LLM APIs (Gemini, OpenAI), Vector Embeddings, or Hybrid mode",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s --test                                    # Test with 1 layout (Gemini LLM)
  %(prog)s --test --openai                          # Test with 1 layout (OpenAI o3)
  %(prog)s --file 6786505.jpg --openai --split      # Process specific file with OpenAI and splitting
  %(prog)s --limit 10                                # Process first 10 layouts (Gemini LLM)
  %(prog)s --limit 10 --openai                      # Process first 10 layouts (OpenAI o3)
  %(prog)s --all                                    # Process all layouts (Gemini LLM)
  %(prog)s --all --openai                           # Process all layouts (OpenAI o3)
  %(prog)s --test --vector-mode                     # Test with 1 layout (Vector embeddings)
  %(prog)s --limit 10 --vector-mode                 # Process first 10 layouts (Vector embeddings)
  %(prog)s --all --vector-mode                      # Process all layouts (Vector embeddings)
  %(prog)s --test --vector-mode --splitting-mode grid  # Test with grid-based image splitting
  %(prog)s --limit 5 --vector-mode --splitting-mode auto  # Auto-detect splitting for 5 layouts
  %(prog)s --all --vector-mode --similarity-threshold 0.8  # Process all with higher similarity threshold
  %(prog)s --all --output my_results                # Process all with custom output name
  %(prog)s --limit 50 --output batch_50             # Process 50 layouts with custom name
  %(prog)s --all --no-greyscale                     # Process all without greyscale conversion (Gemini only)
  %(prog)s --all --no-contrast                      # Process all without contrast enhancement (Gemini only)
  %(prog)s --all --contrast-factor 2.0              # Process all with higher contrast factor (Gemini only)
  %(prog)s --test --refinement-mode                 # Test with CEN refinement (1 layout)
  %(prog)s --limit 5 --refinement-mode              # Process 5 layouts with CEN refinement
  %(prog)s --all --refinement-mode --output refined_results  # Full run with CEN refinement
  %(prog)s --test --one-at-a-time                   # Test one-at-a-time mode (1 layout, 41 concurrent API calls)
  %(prog)s --test --openai --one-at-a-time          # Test OpenAI one-at-a-time mode
  %(prog)s --all --one-at-a-time --refinement-mode  # Full run with one-at-a-time and CEN refinement
  %(prog)s --test --one-at-a-time --concurrent-workers 8   # Test with 8 concurrent workers (faster)
  %(prog)s --all --openai --one-at-a-time --concurrent-workers 3   # OpenAI full run with 3 concurrent workers
  %(prog)s --test --openai --one-at-a-time --panel-aware-refinement  # Test with panel counting and OpenCV refinement
  %(prog)s --all --openai --one-at-a-time --panel-aware-refinement   # Full run with panel-aware match refinement
  %(prog)s --test --openai --one-at-a-time --refinement-mode --panel-aware-refinement  # Test with both CEN and panel-aware refinement
  %(prog)s --test --hybrid                              # Test with hybrid mode (OpenAI panel counting + local analysis)
  %(prog)s --limit 10 --hybrid                         # Process first 10 layouts with hybrid mode
  %(prog)s --all --hybrid                              # Process all layouts with hybrid mode
  %(prog)s --test --hybrid --panel-threshold 3         # Test hybrid mode with custom panel threshold
  %(prog)s --all --hybrid --refinement-mode            # Full run with hybrid mode and CEN refinement
  %(prog)s --test --hybrid --enable-greyscale          # Test hybrid mode with greyscale override
  %(prog)s --limit 5 --hybrid --enable-contrast        # Test hybrid mode with contrast enhancement override
  %(prog)s --test --hybrid --concurrent-workers 8      # Test hybrid mode with 8 concurrent workers (both paths)
  %(prog)s --all --hybrid --concurrent-workers 3       # Full hybrid run with 3 concurrent workers (both paths)
  %(prog)s --test --hybrid --openai-workers 10 --local-workers 8  # Test with separate worker counts
  %(prog)s --all --hybrid --local-workers 12           # Full run with 12 local workers (OpenAI auto-detects)
  %(prog)s --test --hybrid --split-advanced            # Test with advanced edge detection splitting
  %(prog)s --limit 10 --hybrid --split-advanced --percentile 15 --min-gap 3  # Advanced splitting with custom parameters
  %(prog)s --test --hybrid --vector-mode               # Test hybrid mode with vector similarity instead of inlier analysis
  %(prog)s --limit 10 --hybrid --vector-mode --similarity-threshold 0.8  # Hybrid with higher similarity threshold
  %(prog)s --all --hybrid --vector-mode --split-simple  # Full run with vector similarity and simple splitting
  %(prog)s --all --hybrid --split-simple --fallback-one-at-a-time  # Hybrid with fallback to OpenAI one-at-a-time when needed
  %(prog)s --test --hybrid --parallel-layouts          # Test hybrid mode with parallel layout processing
  %(prog)s --limit 10 --hybrid --parallel-layouts --layout-workers 4  # Process 10 layouts with 4 parallel workers
  %(prog)s --all --hybrid --parallel-layouts --layout-workers 6 --max-concurrent-layouts 4  # Full run with controlled parallelism
  %(prog)s --test --hybrid --enable-cost-tracking      # Test with detailed cost tracking enabled
  %(prog)s --limit 10 --openai --enable-cost-tracking --cost-report  # OpenAI mode with cost tracking and report generation
  %(prog)s --all --hybrid --enable-cost-tracking --cost-estimate 300  # Full run with cost tracking and monthly estimate
        """
    )

    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('--test', action='store_true',
                      help='Test mode: process only 1 layout image')
    group.add_argument('--limit', type=int, metavar='N',
                      help='Process first N layout images')
    group.add_argument('--all', action='store_true',
                      help='Process all layout images')
    group.add_argument('--file', type=str, metavar='FILENAME',
                      help='Process a specific layout file (e.g., --file 6786505.jpg)')

    parser.add_argument('--output', type=str, default=None, metavar='NAME',
                       help='Output filename (without .json extension). Default: auto-generated based on mode')

    # Image processing options
    parser.add_argument('--no-greyscale', action='store_true',
                       help='Disable greyscale conversion (enabled by default for Gemini/OpenAI, disabled for hybrid)')
    parser.add_argument('--no-contrast', action='store_true',
                       help='Disable contrast enhancement (enabled by default for Gemini/OpenAI, disabled for hybrid)')
    parser.add_argument('--contrast-factor', type=float, default=1.5, metavar='FACTOR',
                       help='Contrast enhancement factor (default: 1.5)')

    # Hybrid mode image processing overrides
    parser.add_argument('--enable-greyscale', action='store_true',
                       help='Enable greyscale conversion for hybrid mode (disabled by default)')
    parser.add_argument('--enable-contrast', action='store_true',
                       help='Enable contrast enhancement for hybrid mode (disabled by default)')

    # Operating mode options
    provider_group = parser.add_mutually_exclusive_group()
    provider_group.add_argument('--openai', action='store_true',
                       help='Use OpenAI o3 model instead of Gemini (requires OPENAI_API_KEY)')
    provider_group.add_argument('--hybrid', action='store_true',
                       help='Use hybrid mode: OpenAI panel counting + local analysis (≤2 panels) or full OpenAI (≥3 panels)')

    # Vector mode option (can be combined with hybrid mode)
    parser.add_argument('--vector-mode', action='store_true',
                       help='Use Google Vertex AI vector embeddings for similarity matching. Can be combined with --hybrid to replace inlier analysis with vector similarity.')

    parser.add_argument('--similarity-threshold', type=float, default=0.75, metavar='THRESHOLD',
                       help='Similarity threshold for vector mode (0.0-1.0, default: 0.75)')
    parser.add_argument('--no-truncation', action='store_true',
                       help='Disable truncation of match results in hybrid mode (keeps all matches instead of limiting to panel count)')
    parser.add_argument('--splitting-mode', type=str, default='none',
                       choices=['none', 'auto', 'grid'], metavar='MODE',
                       help='Image splitting mode for vector mode: none, auto, grid (default: none)')
    parser.add_argument('--min-crop-size', type=int, default=200, metavar='PIXELS',
                       help='Minimum crop size in pixels for splitting (default: 200)')
    parser.add_argument('--crop-padding', type=int, default=20, metavar='PIXELS',
                       help='Padding around detected crops in pixels (default: 20)')
    parser.add_argument('--refinement-mode', action='store_true',
                       help='Enable CEN refinement mode (after initial detection, refines CEN vs non-CEN matches)')
    parser.add_argument('--one-at-a-time', action='store_true',
                       help='Process masters one at a time using separate processes (makes 41 separate API calls per layout for exact matching)')
    parser.add_argument('--concurrent-workers', type=int, default=None, metavar='N',
                       help='Number of concurrent processes (auto-detects optimal values: OpenAI=total_masters, Local=CPU_cores if not specified)')
    parser.add_argument('--openai-workers', type=int, default=None, metavar='N',
                       help='Number of concurrent processes for OpenAI analysis (default: total number of master images)')
    parser.add_argument('--local-workers', type=int, default=None, metavar='N',
                       help='Number of concurrent processes for local inlier analysis (default: number of CPU cores)')
    parser.add_argument('--panel-aware-refinement', action='store_true',
                       help='Enable panel-aware refinement: count panels with OpenAI o3, then use OpenCV inlier analysis to select best matches (only works with --openai --one-at-a-time)')
    parser.add_argument('--split', action='store_true',
                       help='Enable panel splitting mode: split multi-panel layouts into individual images using multiple CV methods, then match each split to masters')
    parser.add_argument('--split-advanced', action='store_true',
                       help='Enable advanced panel splitting mode: use edge detection and gutter analysis for more accurate splitting')
    parser.add_argument('--split-simple', action='store_true',
                       help='Enable simple panel splitting mode: evenly split layout into panels based on OpenAI analysis count (hybrid mode only)')
    parser.add_argument('--percentile', type=float, default=10, metavar='THRESHOLD',
                       help='Percentile threshold (0-100) for detecting gutters in advanced splitting; lower = stricter (default: 10)')
    parser.add_argument('--min-gap', type=int, default=5, metavar='PIXELS',
                       help='Minimum consecutive low-energy columns needed to mark a gutter in advanced splitting (default: 5)')

    # Hybrid mode specific options
    parser.add_argument('--panel-threshold', type=int, default=2, metavar='N',
                       help='Panel threshold for hybrid mode: ≤N panels use local analysis, >N panels use OpenAI (default: 2)')
    parser.add_argument('--inlier-threshold', type=float, default=0.65, metavar='THRESHOLD',
                       help='Inlier confidence threshold for hybrid local analysis (default: 0.65)')
    parser.add_argument('--inlier-ratio-threshold', type=float, default=0.4, metavar='THRESHOLD',
                       help='Minimum inlier ratio for confident matches (default: 0.4)')
    parser.add_argument('--fallback-one-at-a-time', action='store_true',
                       help='Enable fallback to OpenAI one-at-a-time method when matched masters < detected panels. Uses multiprocessing with number of workers equal to number of masters.')

    # Parallel processing options
    parser.add_argument('--parallel-layouts', action='store_true',
                       help='Enable parallel layout processing with serial inlier analysis coordination')
    parser.add_argument('--layout-workers', type=int, default=None, metavar='N',
                       help='Number of concurrent layout workers for parallel processing (default: auto-detect based on CPU cores)')
    parser.add_argument('--max-concurrent-layouts', type=int, default=None, metavar='N',
                       help='Maximum layouts processing simultaneously (default: same as layout-workers)')

    # Cost tracking options
    parser.add_argument('--enable-cost-tracking', action='store_true',
                       help='Enable detailed cost tracking and reporting for OpenAI API usage')
    parser.add_argument('--cost-report', action='store_true',
                       help='Generate detailed cost report after processing')
    parser.add_argument('--cost-estimate', type=int, metavar='LAYOUTS',
                       help='Estimate monthly cost based on specified number of layouts per month (default: 300)')

    return parser.parse_args()


def main():
    """Main execution function"""
    # Initialize dual logging first
    logger = setup_dual_logging()
    dual_logger = DualLogger(logger)

    # Log system information
    log_system_info(logger)

    try:
        args = parse_arguments()

        # Initialize cost tracking if enabled
        if args.enable_cost_tracking:
            cost_calculator.enable_tracking = True
            dual_logger.print("Cost tracking enabled")

        # Determine processing parameters
        if args.test:
            limit = 1
            default_output = "test_results"
            dual_logger.print("Running in TEST mode - processing 1 layout image...")
        elif args.limit:
            limit = args.limit
            default_output = f"batch_{limit}_results"
            dual_logger.print(f"Processing first {limit} layout images...")
        elif args.file:
            # Process specific file
            specific_file = args.file
            if not specific_file.endswith('.jpg'):
                specific_file += '.jpg'
            limit = 1
            default_output = f"file_{specific_file[:-4]}_results"
            dual_logger.print(f"Processing specific file: {specific_file}...")
        else:  # args.all
            limit = None
            specific_file = None
            default_output = "full_results"
            dual_logger.print("Processing ALL layout images...")

        # Set specific_file to None for other modes
        if not args.file:
            specific_file = None

        # Determine which detector to use
        if args.hybrid:
            analysis_method = "vector similarity" if args.vector_mode else "local analysis"
            dual_logger.print(f"Using HYBRID mode with OpenAI panel counting + {analysis_method}")

            # Validate hybrid mode arguments
            if args.panel_aware_refinement:
                dual_logger.warning("--panel-aware-refinement is not needed in hybrid mode (panel analysis is built-in), ignoring...")
            if args.one_at_a_time:
                dual_logger.warning("--one-at-a-time is handled automatically in hybrid mode, ignoring...")
            if args.openai:
                dual_logger.error("--hybrid cannot be used with --openai")
                return 1
            if args.split and args.split_advanced:
                dual_logger.error("Cannot use both --split and --split-advanced at the same time")
                return 1
            if args.split_simple and not args.hybrid:
                dual_logger.error("--split-simple can only be used with --hybrid mode")
                return 1
            if args.split_simple and (args.split or args.split_advanced):
                dual_logger.error("--split-simple cannot be used with --split or --split-advanced")
                return 1

            # Handle image processing options for hybrid mode
            if args.enable_greyscale:
                enable_greyscale = True
                dual_logger.print("Greyscale processing enabled (override)")
            else:
                enable_greyscale = False
                dual_logger.print("Greyscale processing disabled (default for hybrid mode)")

            if args.enable_contrast:
                enable_contrast_enhancement = True
                dual_logger.print("Contrast enhancement enabled (override)")
            else:
                enable_contrast_enhancement = False
                dual_logger.print("Contrast enhancement disabled (default for hybrid mode)")

            # Determine worker counts for hybrid mode
            if args.concurrent_workers is not None:
                openai_workers = args.concurrent_workers
                local_workers = args.concurrent_workers
            else:
                openai_workers = args.openai_workers
                local_workers = args.local_workers

            # Initialize hybrid detector
            detector = HybridImageDetector(
                panel_threshold=args.panel_threshold,
                inlier_threshold=args.inlier_threshold,
                inlier_ratio_threshold=args.inlier_ratio_threshold,
                enable_greyscale=enable_greyscale,
                enable_contrast_enhancement=enable_contrast_enhancement,
                contrast_factor=args.contrast_factor,
                refinement_mode=args.refinement_mode,
                openai_workers=openai_workers,
                local_workers=local_workers,
                split_mode=args.split,
                split_advanced=args.split_advanced,
                split_simple=args.split_simple,
                percentile=args.percentile,
                min_gap=args.min_gap,
                vector_mode=args.vector_mode,
                similarity_threshold=args.similarity_threshold,
                fallback_one_at_a_time=args.fallback_one_at_a_time,
                parallel_layouts=args.parallel_layouts,
                layout_workers=args.layout_workers,
                max_concurrent_layouts=args.max_concurrent_layouts,
                no_truncation=args.no_truncation
            )

            # Add hybrid mode suffix to default output name
            if not args.output:
                default_output += "_hybrid"
                if args.panel_threshold != 2:
                    default_output += f"_threshold{args.panel_threshold}"
                if args.refinement_mode:
                    default_output += "_refined"
                if args.split:
                    default_output += "_split"
                if args.split_advanced:
                    default_output += "_split_advanced"
                if args.split_simple:
                    default_output += "_split_simple"
                if args.vector_mode:
                    default_output += "_vector"
                if args.fallback_one_at_a_time:
                    default_output += "_fallback"
                if args.parallel_layouts:
                    default_output += "_parallel"

            output_name = args.output if args.output else default_output

            dual_logger.print(f"Results will be saved as: {output_name}.json")
            analysis_method = "vector similarity" if args.vector_mode else "local analysis"
            dual_logger.print(f"Panel threshold: ≤{args.panel_threshold} panels → {analysis_method}, ≥{args.panel_threshold + 1} panels → split + {analysis_method}")
            dual_logger.print(f"Inlier threshold: {args.inlier_threshold}")
            dual_logger.print(f"CEN refinement: {'enabled' if args.refinement_mode else 'disabled'}")
            dual_logger.print(f"Vector mode: {'enabled' if args.vector_mode else 'disabled'}")
            if args.vector_mode:
                dual_logger.print(f"Similarity threshold: {args.similarity_threshold}")
            dual_logger.print(f"Fallback one-at-a-time: {'enabled' if args.fallback_one_at_a_time else 'disabled'}")
            dual_logger.print(f"Parallel layouts: {'enabled' if args.parallel_layouts else 'disabled'}")
            if args.parallel_layouts:
                dual_logger.print(f"Layout workers: {detector.layout_workers}")
                dual_logger.print(f"Max concurrent layouts: {detector.max_concurrent_layouts}")
            dual_logger.print(f"OpenAI workers: {detector.openai_workers}")
            dual_logger.print(f"Local workers: {detector.local_workers}")
            dual_logger.print("-" * 60)

            try:
                # Use parallel processing if requested
                if args.parallel_layouts:
                    results = detector.process_all_layouts_hybrid_parallel(limit=limit, specific_file=specific_file)
                else:
                    results = detector.process_all_layouts_hybrid(limit=limit, specific_file=specific_file)
                output_file = detector.save_results(results, output_name)
                summary = detector.generate_summary(results)

                dual_logger.print("\n" + "="*60)
                dual_logger.print("HYBRID PROCESSING SUMMARY")
                dual_logger.print("="*60)
                dual_logger.print(f"Total layouts processed: {summary['total_layouts_processed']}")
                dual_logger.print(f"Layouts with matches: {summary['layouts_with_matches']}")
                dual_logger.print(f"Layouts without matches: {summary['layouts_without_matches']}")
                dual_logger.print(f"Local analysis used: {summary['local_analysis_used']} ({summary['local_analysis_percentage']}%)")
                dual_logger.print(f"Split + inlier analysis used: {summary['split_analysis_used']} ({summary['split_analysis_percentage']}%)")
                dual_logger.print(f"Panel threshold: ≤{summary['panel_threshold']}")
                dual_logger.print(f"Inlier threshold: {summary['inlier_threshold']}")

                if summary['total_duplicates_removed'] > 0:
                    dual_logger.print(f"\nDEDUPLICATION RESULTS:")
                    dual_logger.print(f"Layouts with duplicates removed: {summary['layouts_with_deduplication']}")
                    dual_logger.print(f"Total duplicate masters removed: {summary['total_duplicates_removed']}")
                    dual_logger.print(f"Deduplication rate: {summary['deduplication_rate']}%")

                if summary['most_used_masters']:
                    dual_logger.print(f"\nTop {min(10, len(summary['most_used_masters']))} most frequently detected masters:")
                    for master_id, count in summary['most_used_masters']:
                        dual_logger.print(f"  {master_id}.jpg: {count} times")

                dual_logger.print(f"\nFull results saved to: {output_file}")
                dual_logger.print("="*60)

                # Print cost summary if tracking is enabled
                if args.enable_cost_tracking:
                    cost_calculator.print_cost_summary()

                # Generate cost report if requested
                if args.cost_report:
                    cost_report_file = cost_calculator.save_cost_report()
                    if cost_report_file:
                        dual_logger.print(f"Cost report saved to: {cost_report_file}")

                # Show cost estimate if requested
                if args.cost_estimate:
                    estimate = cost_calculator.estimate_monthly_cost(args.cost_estimate)
                    if 'error' not in estimate:
                        dual_logger.print(f"\nMONTHLY COST ESTIMATE")
                        dual_logger.print(f"Based on {estimate['based_on_layouts']} processed layouts:")
                        dual_logger.print(f"  Average cost per layout: ${estimate['average_cost_per_layout']:.4f}")
                        dual_logger.print(f"  Estimated monthly cost ({estimate['layouts_per_month']} layouts): ${estimate['estimated_monthly_cost']:.2f}")
                        dual_logger.print(f"  Estimated annual cost: ${estimate['estimated_annual_cost']:.2f}")

            except KeyboardInterrupt:
                dual_logger.print("\n\nProcessing interrupted by user.")
                dual_logger.print("Partial results may have been saved automatically.")
            except Exception as e:
                dual_logger.error(f"\nError during hybrid processing: {e}")
                log_exception(logger)
                return 1
            finally:
                detector.cleanup_temp_files()

        elif args.vector_mode and not args.hybrid:
            dual_logger.print("Using VECTOR EMBEDDING mode with Google Vertex AI")

            # Validate vector mode arguments
            if args.refinement_mode:
                dual_logger.warning("--refinement-mode is not supported in vector mode, ignoring...")
            if args.one_at_a_time:
                dual_logger.warning("--one-at-a-time is not applicable in vector mode, ignoring...")
            if args.panel_aware_refinement:
                dual_logger.error("--panel-aware-refinement is only supported with --openai --one-at-a-time mode")
                return 1
            if args.split_advanced:
                dual_logger.warning("--split-advanced is only supported in hybrid mode, ignoring...")
            if not args.no_greyscale or not args.no_contrast or args.contrast_factor != 1.5:
                dual_logger.warning("Image processing options (greyscale, contrast) are not used in vector mode...")

            # Initialize vector detector
            detector = VectorImageDetector(
                similarity_threshold=args.similarity_threshold,
                splitting_mode=args.splitting_mode,
                min_crop_size=args.min_crop_size,
                crop_padding=args.crop_padding,
                split_mode=args.split
            )

            # Add vector mode suffix to default output name
            if not args.output:
                default_output += "_vector"
                if args.splitting_mode != "none":
                    default_output += f"_{args.splitting_mode}"
                if args.similarity_threshold != 0.75:
                    default_output += f"_thresh{args.similarity_threshold}"
                if args.split:
                    default_output += "_split"

            output_name = args.output if args.output else default_output

            dual_logger.print(f"Results will be saved as: {output_name}.json")
            dual_logger.print(f"Similarity threshold: {args.similarity_threshold}")
            dual_logger.print(f"Splitting mode: {args.splitting_mode}")
            if args.splitting_mode != "none":
                dual_logger.print(f"Min crop size: {args.min_crop_size}px, Crop padding: {args.crop_padding}px")
            dual_logger.print("-" * 60)

            try:
                results = detector.process_all_layouts_vector(limit=limit, specific_file=specific_file)
                output_file = detector.save_results(results, output_name)
                summary = detector.generate_summary(results)

                dual_logger.print("\n" + "="*60)
                dual_logger.print("VECTOR PROCESSING SUMMARY")
                dual_logger.print("="*60)
                dual_logger.print(f"Total layouts processed: {summary['total_layouts_processed']}")
                dual_logger.print(f"Layouts with matches: {summary['layouts_with_matches']}")
                dual_logger.print(f"Layouts without matches: {summary['layouts_without_matches']}")
                dual_logger.print(f"Similarity threshold: {summary['similarity_threshold']}")
                dual_logger.print(f"Embedding dimensions: {summary['embedding_dimensions']}")

                if summary['most_used_masters']:
                    dual_logger.print(f"\nTop {min(10, len(summary['most_used_masters']))} most frequently detected masters:")
                    for master_id, count in summary['most_used_masters']:
                        dual_logger.print(f"  {master_id}.jpg: {count} times")

                dual_logger.print(f"\nFull results saved to: {output_file}")
                dual_logger.print("="*60)

            except KeyboardInterrupt:
                dual_logger.print("\n\nProcessing interrupted by user.")
                dual_logger.print("Partial results may have been saved automatically.")
            except Exception as e:
                dual_logger.error(f"\nError during vector processing: {e}")
                log_exception(logger)
                return 1

        elif args.openai:
            dual_logger.print("Using OPENAI LLM mode with o3 model")

            # Validate panel-aware refinement requirements
            if args.panel_aware_refinement:
                if not args.one_at_a_time:
                    dual_logger.error("--panel-aware-refinement requires --one-at-a-time mode")
                    return 1
                dual_logger.print("Panel-aware refinement ENABLED - will count panels and refine matches using OpenCV inlier analysis")
            if args.split_advanced:
                dual_logger.warning("--split-advanced is only supported in hybrid mode, ignoring...")

            # Initialize OpenAI detector with image processing settings
            detector = OpenAIImageDetector(
                enable_greyscale=not args.no_greyscale,
                enable_contrast_enhancement=not args.no_contrast,
                contrast_factor=args.contrast_factor,
                refinement_mode=args.refinement_mode,
                one_at_a_time_mode=args.one_at_a_time,
                max_concurrent_workers=args.concurrent_workers,
                panel_aware_refinement=args.panel_aware_refinement,
                split_mode=args.split
            )

            # Add mode suffix to default output name
            if not args.output:
                default_output += "_openai"
                if args.one_at_a_time:
                    default_output += "_one_at_a_time"
                if args.panel_aware_refinement:
                    default_output += "_panel_aware"
                if args.split:
                    default_output += "_split"

            output_name = args.output if args.output else default_output

            dual_logger.print(f"Results will be saved as: {output_name}.json")
            if args.one_at_a_time:
                dual_logger.print(f"Concurrent processes for one-at-a-time mode: {args.concurrent_workers}")
                if args.concurrent_workers and args.concurrent_workers > 10:
                    dual_logger.print("WARNING: High concurrency (>10) may cause API rate limits!")
                    dual_logger.print("Recommended range: 3-8 processes for stable performance.")
            dual_logger.print("-" * 60)

            try:
                results = detector.process_all_layouts(limit=limit, specific_file=specific_file)
                output_file = detector.save_results(results, output_name)
                summary = detector.generate_summary(results)

                dual_logger.print("\n" + "="*60)
                dual_logger.print("OPENAI PROCESSING SUMMARY")
                dual_logger.print("="*60)
                dual_logger.print(f"Total layouts processed: {summary['total_layouts_processed']}")
                dual_logger.print(f"Layouts with matches: {summary['layouts_with_matches']}")
                dual_logger.print(f"Layouts without matches: {summary['layouts_without_matches']}")
                dual_logger.print(f"Provider: {summary['provider']}")
                dual_logger.print(f"Model: {summary['model']}")

                if 'total_duplicates_removed' in summary and summary['total_duplicates_removed'] > 0:
                    dual_logger.print(f"\nDEDUPLICATION RESULTS:")
                    dual_logger.print(f"Layouts with duplicates removed: {summary['layouts_with_deduplication']}")
                    dual_logger.print(f"Total duplicate masters removed: {summary['total_duplicates_removed']}")
                    dual_logger.print(f"Deduplication rate: {summary['deduplication_rate']}%")

                if summary['most_used_masters']:
                    dual_logger.print(f"\nTop {min(10, len(summary['most_used_masters']))} most frequently detected masters:")
                    for master_id, count in summary['most_used_masters']:
                        dual_logger.print(f"  {master_id}.jpg: {count} times")

                dual_logger.print(f"\nFull results saved to: {output_file}")
                dual_logger.print("="*60)

            except KeyboardInterrupt:
                dual_logger.print("\n\nProcessing interrupted by user.")
                dual_logger.print("Partial results may have been saved automatically.")
            except Exception as e:
                dual_logger.error(f"\nError during OpenAI processing: {e}")
                log_exception(logger)
                return 1
            finally:
                detector.cleanup_temp_files()

        else:
            dual_logger.print("Using GEMINI LLM mode")

            # Validate panel-aware refinement requirements
            if args.panel_aware_refinement:
                dual_logger.error("--panel-aware-refinement is only supported with --openai mode")
                return 1
            if args.split_advanced:
                dual_logger.warning("--split-advanced is only supported in hybrid mode, ignoring...")

            # Initialize Gemini detector with image processing settings
            detector = ImageDetector(
                enable_greyscale=not args.no_greyscale,
                enable_contrast_enhancement=not args.no_contrast,
                contrast_factor=args.contrast_factor,
                refinement_mode=args.refinement_mode,
                one_at_a_time_mode=args.one_at_a_time,
                max_concurrent_workers=args.concurrent_workers,
                split_mode=args.split
            )

            # Add mode suffix to default output name
            if not args.output:
                if args.one_at_a_time:
                    default_output += "_one_at_a_time"
                if args.refinement_mode:
                    default_output += "_refined"
                if args.split:
                    default_output += "_split"

            output_name = args.output if args.output else default_output

            dual_logger.print(f"Results will be saved as: {output_name}.json")
            if args.one_at_a_time:
                dual_logger.print(f"Concurrent processes for one-at-a-time mode: {args.concurrent_workers}")
                if args.concurrent_workers and args.concurrent_workers > 10:
                    dual_logger.print("WARNING: High concurrency (>10) may cause API rate limits!")
                    dual_logger.print("Recommended range: 3-8 processes for stable performance.")
            dual_logger.print("-" * 60)

            try:
                results = detector.process_all_layouts(limit=limit, specific_file=specific_file)
                output_file = detector.save_results(results, output_name)
                summary = detector.generate_summary(results)

                dual_logger.print("\n" + "="*60)
                dual_logger.print("GEMINI PROCESSING SUMMARY")
                dual_logger.print("="*60)
                dual_logger.print(f"Total layouts processed: {summary['total_layouts_processed']}")
                dual_logger.print(f"Layouts with matches: {summary['layouts_with_matches']}")
                dual_logger.print(f"Layouts without matches: {summary['layouts_without_matches']}")

                if 'total_duplicates_removed' in summary and summary['total_duplicates_removed'] > 0:
                    dual_logger.print(f"\nDEDUPLICATION RESULTS:")
                    dual_logger.print(f"Layouts with duplicates removed: {summary['layouts_with_deduplication']}")
                    dual_logger.print(f"Total duplicate masters removed: {summary['total_duplicates_removed']}")
                    dual_logger.print(f"Deduplication rate: {summary['deduplication_rate']}%")

                if summary['most_used_masters']:
                    dual_logger.print(f"\nTop {min(10, len(summary['most_used_masters']))} most frequently detected masters:")
                    for master_id, count in summary['most_used_masters']:
                        dual_logger.print(f"  {master_id}.jpg: {count} times")

                dual_logger.print(f"\nFull results saved to: {output_file}")
                dual_logger.print("="*60)

            except KeyboardInterrupt:
                dual_logger.print("\n\nProcessing interrupted by user.")
                dual_logger.print("Partial results may have been saved automatically.")
            except Exception as e:
                dual_logger.error(f"\nError during Gemini processing: {e}")
                log_exception(logger)
                return 1
            finally:
                detector.cleanup_temp_files()

        return 0

    except Exception as e:
        dual_logger.error(f"Unexpected error: {e}")
        log_exception(logger)
        return 1


if __name__ == "__main__":
    # Required for multiprocessing on macOS and Windows
    multiprocessing.set_start_method('spawn', force=True)
    exit(main())