#!/usr/bin/env python3 """ CLI Module - Command Line Interface for Master Image Detection """ import argparse import multiprocessing import sys from logging_config import setup_dual_logging, log_system_info, log_exception, DualLogger from gemini_detector import ImageDetector from openai_detector import OpenAIImageDetector from vector_detector import VectorImageDetector from hybrid_detector import HybridImageDetector from cost_calculator import cost_calculator def parse_arguments(): """Parse command line arguments""" parser = argparse.ArgumentParser( description="Detect master images in layout images using LLM APIs (Gemini, OpenAI), Vector Embeddings, or Hybrid mode", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: %(prog)s --test # Test with 1 layout (Gemini LLM) %(prog)s --test --openai # Test with 1 layout (OpenAI o3) %(prog)s --file 6786505.jpg --openai --split # Process specific file with OpenAI and splitting %(prog)s --limit 10 # Process first 10 layouts (Gemini LLM) %(prog)s --limit 10 --openai # Process first 10 layouts (OpenAI o3) %(prog)s --all # Process all layouts (Gemini LLM) %(prog)s --all --openai # Process all layouts (OpenAI o3) %(prog)s --test --vector-mode # Test with 1 layout (Vector embeddings) %(prog)s --limit 10 --vector-mode # Process first 10 layouts (Vector embeddings) %(prog)s --all --vector-mode # Process all layouts (Vector embeddings) %(prog)s --test --vector-mode --splitting-mode grid # Test with grid-based image splitting %(prog)s --limit 5 --vector-mode --splitting-mode auto # Auto-detect splitting for 5 layouts %(prog)s --all --vector-mode --similarity-threshold 0.8 # Process all with higher similarity threshold %(prog)s --all --output my_results # Process all with custom output name %(prog)s --limit 50 --output batch_50 # Process 50 layouts with custom name %(prog)s --all --no-greyscale # Process all without greyscale conversion (Gemini only) %(prog)s --all --no-contrast # Process all without contrast enhancement (Gemini only) %(prog)s --all --contrast-factor 2.0 # Process all with higher contrast factor (Gemini only) %(prog)s --test --refinement-mode # Test with CEN refinement (1 layout) %(prog)s --limit 5 --refinement-mode # Process 5 layouts with CEN refinement %(prog)s --all --refinement-mode --output refined_results # Full run with CEN refinement %(prog)s --test --one-at-a-time # Test one-at-a-time mode (1 layout, 41 concurrent API calls) %(prog)s --test --openai --one-at-a-time # Test OpenAI one-at-a-time mode %(prog)s --all --one-at-a-time --refinement-mode # Full run with one-at-a-time and CEN refinement %(prog)s --test --one-at-a-time --concurrent-workers 8 # Test with 8 concurrent workers (faster) %(prog)s --all --openai --one-at-a-time --concurrent-workers 3 # OpenAI full run with 3 concurrent workers %(prog)s --test --openai --one-at-a-time --panel-aware-refinement # Test with panel counting and OpenCV refinement %(prog)s --all --openai --one-at-a-time --panel-aware-refinement # Full run with panel-aware match refinement %(prog)s --test --openai --one-at-a-time --refinement-mode --panel-aware-refinement # Test with both CEN and panel-aware refinement %(prog)s --test --hybrid # Test with hybrid mode (OpenAI panel counting + local analysis) %(prog)s --limit 10 --hybrid # Process first 10 layouts with hybrid mode %(prog)s --all --hybrid # Process all layouts with hybrid mode %(prog)s --test --hybrid --panel-threshold 3 # Test hybrid mode with custom panel threshold %(prog)s --all --hybrid --refinement-mode # Full run with hybrid mode and CEN refinement %(prog)s --test --hybrid --enable-greyscale # Test hybrid mode with greyscale override %(prog)s --limit 5 --hybrid --enable-contrast # Test hybrid mode with contrast enhancement override %(prog)s --test --hybrid --concurrent-workers 8 # Test hybrid mode with 8 concurrent workers (both paths) %(prog)s --all --hybrid --concurrent-workers 3 # Full hybrid run with 3 concurrent workers (both paths) %(prog)s --test --hybrid --openai-workers 10 --local-workers 8 # Test with separate worker counts %(prog)s --all --hybrid --local-workers 12 # Full run with 12 local workers (OpenAI auto-detects) %(prog)s --test --hybrid --split-advanced # Test with advanced edge detection splitting %(prog)s --limit 10 --hybrid --split-advanced --percentile 15 --min-gap 3 # Advanced splitting with custom parameters %(prog)s --test --hybrid --vector-mode # Test hybrid mode with vector similarity instead of inlier analysis %(prog)s --limit 10 --hybrid --vector-mode --similarity-threshold 0.8 # Hybrid with higher similarity threshold %(prog)s --all --hybrid --vector-mode --split-simple # Full run with vector similarity and simple splitting %(prog)s --all --hybrid --split-simple --fallback-one-at-a-time # Hybrid with fallback to OpenAI one-at-a-time when needed %(prog)s --test --hybrid --parallel-layouts # Test hybrid mode with parallel layout processing %(prog)s --limit 10 --hybrid --parallel-layouts --layout-workers 4 # Process 10 layouts with 4 parallel workers %(prog)s --all --hybrid --parallel-layouts --layout-workers 6 --max-concurrent-layouts 4 # Full run with controlled parallelism %(prog)s --test --hybrid --enable-cost-tracking # Test with detailed cost tracking enabled %(prog)s --limit 10 --openai --enable-cost-tracking --cost-report # OpenAI mode with cost tracking and report generation %(prog)s --all --hybrid --enable-cost-tracking --cost-estimate 300 # Full run with cost tracking and monthly estimate """ ) group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--test', action='store_true', help='Test mode: process only 1 layout image') group.add_argument('--limit', type=int, metavar='N', help='Process first N layout images') group.add_argument('--all', action='store_true', help='Process all layout images') group.add_argument('--file', type=str, metavar='FILENAME', help='Process a specific layout file (e.g., --file 6786505.jpg)') parser.add_argument('--output', type=str, default=None, metavar='NAME', help='Output filename (without .json extension). Default: auto-generated based on mode') # Image processing options parser.add_argument('--no-greyscale', action='store_true', help='Disable greyscale conversion (enabled by default for Gemini/OpenAI, disabled for hybrid)') parser.add_argument('--no-contrast', action='store_true', help='Disable contrast enhancement (enabled by default for Gemini/OpenAI, disabled for hybrid)') parser.add_argument('--contrast-factor', type=float, default=1.5, metavar='FACTOR', help='Contrast enhancement factor (default: 1.5)') # Hybrid mode image processing overrides parser.add_argument('--enable-greyscale', action='store_true', help='Enable greyscale conversion for hybrid mode (disabled by default)') parser.add_argument('--enable-contrast', action='store_true', help='Enable contrast enhancement for hybrid mode (disabled by default)') # Operating mode options provider_group = parser.add_mutually_exclusive_group() provider_group.add_argument('--openai', action='store_true', help='Use OpenAI o3 model instead of Gemini (requires OPENAI_API_KEY)') provider_group.add_argument('--hybrid', action='store_true', help='Use hybrid mode: OpenAI panel counting + local analysis (≤2 panels) or full OpenAI (≥3 panels)') # Vector mode option (can be combined with hybrid mode) parser.add_argument('--vector-mode', action='store_true', help='Use Google Vertex AI vector embeddings for similarity matching. Can be combined with --hybrid to replace inlier analysis with vector similarity.') parser.add_argument('--similarity-threshold', type=float, default=0.75, metavar='THRESHOLD', help='Similarity threshold for vector mode (0.0-1.0, default: 0.75)') parser.add_argument('--no-truncation', action='store_true', help='Disable truncation of match results in hybrid mode (keeps all matches instead of limiting to panel count)') parser.add_argument('--splitting-mode', type=str, default='none', choices=['none', 'auto', 'grid'], metavar='MODE', help='Image splitting mode for vector mode: none, auto, grid (default: none)') parser.add_argument('--min-crop-size', type=int, default=200, metavar='PIXELS', help='Minimum crop size in pixels for splitting (default: 200)') parser.add_argument('--crop-padding', type=int, default=20, metavar='PIXELS', help='Padding around detected crops in pixels (default: 20)') parser.add_argument('--refinement-mode', action='store_true', help='Enable CEN refinement mode (after initial detection, refines CEN vs non-CEN matches)') parser.add_argument('--one-at-a-time', action='store_true', help='Process masters one at a time using separate processes (makes 41 separate API calls per layout for exact matching)') parser.add_argument('--concurrent-workers', type=int, default=None, metavar='N', help='Number of concurrent processes (auto-detects optimal values: OpenAI=total_masters, Local=CPU_cores if not specified)') parser.add_argument('--openai-workers', type=int, default=None, metavar='N', help='Number of concurrent processes for OpenAI analysis (default: total number of master images)') parser.add_argument('--local-workers', type=int, default=None, metavar='N', help='Number of concurrent processes for local inlier analysis (default: number of CPU cores)') parser.add_argument('--panel-aware-refinement', action='store_true', help='Enable panel-aware refinement: count panels with OpenAI o3, then use OpenCV inlier analysis to select best matches (only works with --openai --one-at-a-time)') parser.add_argument('--split', action='store_true', help='Enable panel splitting mode: split multi-panel layouts into individual images using multiple CV methods, then match each split to masters') parser.add_argument('--split-advanced', action='store_true', help='Enable advanced panel splitting mode: use edge detection and gutter analysis for more accurate splitting') parser.add_argument('--split-simple', action='store_true', help='Enable simple panel splitting mode: evenly split layout into panels based on OpenAI analysis count (hybrid mode only)') parser.add_argument('--percentile', type=float, default=10, metavar='THRESHOLD', help='Percentile threshold (0-100) for detecting gutters in advanced splitting; lower = stricter (default: 10)') parser.add_argument('--min-gap', type=int, default=5, metavar='PIXELS', help='Minimum consecutive low-energy columns needed to mark a gutter in advanced splitting (default: 5)') # Hybrid mode specific options parser.add_argument('--panel-threshold', type=int, default=2, metavar='N', help='Panel threshold for hybrid mode: ≤N panels use local analysis, >N panels use OpenAI (default: 2)') parser.add_argument('--inlier-threshold', type=float, default=0.65, metavar='THRESHOLD', help='Inlier confidence threshold for hybrid local analysis (default: 0.65)') parser.add_argument('--inlier-ratio-threshold', type=float, default=0.4, metavar='THRESHOLD', help='Minimum inlier ratio for confident matches (default: 0.4)') parser.add_argument('--fallback-one-at-a-time', action='store_true', help='Enable fallback to OpenAI one-at-a-time method when matched masters < detected panels. Uses multiprocessing with number of workers equal to number of masters.') # Parallel processing options parser.add_argument('--parallel-layouts', action='store_true', help='Enable parallel layout processing with serial inlier analysis coordination') parser.add_argument('--layout-workers', type=int, default=None, metavar='N', help='Number of concurrent layout workers for parallel processing (default: auto-detect based on CPU cores)') parser.add_argument('--max-concurrent-layouts', type=int, default=None, metavar='N', help='Maximum layouts processing simultaneously (default: same as layout-workers)') # Cost tracking options parser.add_argument('--enable-cost-tracking', action='store_true', help='Enable detailed cost tracking and reporting for OpenAI API usage') parser.add_argument('--cost-report', action='store_true', help='Generate detailed cost report after processing') parser.add_argument('--cost-estimate', type=int, metavar='LAYOUTS', help='Estimate monthly cost based on specified number of layouts per month (default: 300)') return parser.parse_args() def main(): """Main execution function""" # Initialize dual logging first logger = setup_dual_logging() dual_logger = DualLogger(logger) # Log system information log_system_info(logger) try: args = parse_arguments() # Initialize cost tracking if enabled if args.enable_cost_tracking: cost_calculator.enable_tracking = True dual_logger.print("Cost tracking enabled") # Determine processing parameters if args.test: limit = 1 default_output = "test_results" dual_logger.print("Running in TEST mode - processing 1 layout image...") elif args.limit: limit = args.limit default_output = f"batch_{limit}_results" dual_logger.print(f"Processing first {limit} layout images...") elif args.file: # Process specific file specific_file = args.file if not specific_file.endswith('.jpg'): specific_file += '.jpg' limit = 1 default_output = f"file_{specific_file[:-4]}_results" dual_logger.print(f"Processing specific file: {specific_file}...") else: # args.all limit = None specific_file = None default_output = "full_results" dual_logger.print("Processing ALL layout images...") # Set specific_file to None for other modes if not args.file: specific_file = None # Determine which detector to use if args.hybrid: analysis_method = "vector similarity" if args.vector_mode else "local analysis" dual_logger.print(f"Using HYBRID mode with OpenAI panel counting + {analysis_method}") # Validate hybrid mode arguments if args.panel_aware_refinement: dual_logger.warning("--panel-aware-refinement is not needed in hybrid mode (panel analysis is built-in), ignoring...") if args.one_at_a_time: dual_logger.warning("--one-at-a-time is handled automatically in hybrid mode, ignoring...") if args.openai: dual_logger.error("--hybrid cannot be used with --openai") return 1 if args.split and args.split_advanced: dual_logger.error("Cannot use both --split and --split-advanced at the same time") return 1 if args.split_simple and not args.hybrid: dual_logger.error("--split-simple can only be used with --hybrid mode") return 1 if args.split_simple and (args.split or args.split_advanced): dual_logger.error("--split-simple cannot be used with --split or --split-advanced") return 1 # Handle image processing options for hybrid mode if args.enable_greyscale: enable_greyscale = True dual_logger.print("Greyscale processing enabled (override)") else: enable_greyscale = False dual_logger.print("Greyscale processing disabled (default for hybrid mode)") if args.enable_contrast: enable_contrast_enhancement = True dual_logger.print("Contrast enhancement enabled (override)") else: enable_contrast_enhancement = False dual_logger.print("Contrast enhancement disabled (default for hybrid mode)") # Determine worker counts for hybrid mode if args.concurrent_workers is not None: openai_workers = args.concurrent_workers local_workers = args.concurrent_workers else: openai_workers = args.openai_workers local_workers = args.local_workers # Initialize hybrid detector detector = HybridImageDetector( panel_threshold=args.panel_threshold, inlier_threshold=args.inlier_threshold, inlier_ratio_threshold=args.inlier_ratio_threshold, enable_greyscale=enable_greyscale, enable_contrast_enhancement=enable_contrast_enhancement, contrast_factor=args.contrast_factor, refinement_mode=args.refinement_mode, openai_workers=openai_workers, local_workers=local_workers, split_mode=args.split, split_advanced=args.split_advanced, split_simple=args.split_simple, percentile=args.percentile, min_gap=args.min_gap, vector_mode=args.vector_mode, similarity_threshold=args.similarity_threshold, fallback_one_at_a_time=args.fallback_one_at_a_time, parallel_layouts=args.parallel_layouts, layout_workers=args.layout_workers, max_concurrent_layouts=args.max_concurrent_layouts, no_truncation=args.no_truncation ) # Add hybrid mode suffix to default output name if not args.output: default_output += "_hybrid" if args.panel_threshold != 2: default_output += f"_threshold{args.panel_threshold}" if args.refinement_mode: default_output += "_refined" if args.split: default_output += "_split" if args.split_advanced: default_output += "_split_advanced" if args.split_simple: default_output += "_split_simple" if args.vector_mode: default_output += "_vector" if args.fallback_one_at_a_time: default_output += "_fallback" if args.parallel_layouts: default_output += "_parallel" output_name = args.output if args.output else default_output dual_logger.print(f"Results will be saved as: {output_name}.json") analysis_method = "vector similarity" if args.vector_mode else "local analysis" dual_logger.print(f"Panel threshold: ≤{args.panel_threshold} panels → {analysis_method}, ≥{args.panel_threshold + 1} panels → split + {analysis_method}") dual_logger.print(f"Inlier threshold: {args.inlier_threshold}") dual_logger.print(f"CEN refinement: {'enabled' if args.refinement_mode else 'disabled'}") dual_logger.print(f"Vector mode: {'enabled' if args.vector_mode else 'disabled'}") if args.vector_mode: dual_logger.print(f"Similarity threshold: {args.similarity_threshold}") dual_logger.print(f"Fallback one-at-a-time: {'enabled' if args.fallback_one_at_a_time else 'disabled'}") dual_logger.print(f"Parallel layouts: {'enabled' if args.parallel_layouts else 'disabled'}") if args.parallel_layouts: dual_logger.print(f"Layout workers: {detector.layout_workers}") dual_logger.print(f"Max concurrent layouts: {detector.max_concurrent_layouts}") dual_logger.print(f"OpenAI workers: {detector.openai_workers}") dual_logger.print(f"Local workers: {detector.local_workers}") dual_logger.print("-" * 60) try: # Use parallel processing if requested if args.parallel_layouts: results = detector.process_all_layouts_hybrid_parallel(limit=limit, specific_file=specific_file) else: results = detector.process_all_layouts_hybrid(limit=limit, specific_file=specific_file) output_file = detector.save_results(results, output_name) summary = detector.generate_summary(results) dual_logger.print("\n" + "="*60) dual_logger.print("HYBRID PROCESSING SUMMARY") dual_logger.print("="*60) dual_logger.print(f"Total layouts processed: {summary['total_layouts_processed']}") dual_logger.print(f"Layouts with matches: {summary['layouts_with_matches']}") dual_logger.print(f"Layouts without matches: {summary['layouts_without_matches']}") dual_logger.print(f"Local analysis used: {summary['local_analysis_used']} ({summary['local_analysis_percentage']}%)") dual_logger.print(f"Split + inlier analysis used: {summary['split_analysis_used']} ({summary['split_analysis_percentage']}%)") dual_logger.print(f"Panel threshold: ≤{summary['panel_threshold']}") dual_logger.print(f"Inlier threshold: {summary['inlier_threshold']}") if summary['total_duplicates_removed'] > 0: dual_logger.print(f"\nDEDUPLICATION RESULTS:") dual_logger.print(f"Layouts with duplicates removed: {summary['layouts_with_deduplication']}") dual_logger.print(f"Total duplicate masters removed: {summary['total_duplicates_removed']}") dual_logger.print(f"Deduplication rate: {summary['deduplication_rate']}%") if summary['most_used_masters']: dual_logger.print(f"\nTop {min(10, len(summary['most_used_masters']))} most frequently detected masters:") for master_id, count in summary['most_used_masters']: dual_logger.print(f" {master_id}.jpg: {count} times") dual_logger.print(f"\nFull results saved to: {output_file}") dual_logger.print("="*60) # Print cost summary if tracking is enabled if args.enable_cost_tracking: cost_calculator.print_cost_summary() # Generate cost report if requested if args.cost_report: cost_report_file = cost_calculator.save_cost_report() if cost_report_file: dual_logger.print(f"Cost report saved to: {cost_report_file}") # Show cost estimate if requested if args.cost_estimate: estimate = cost_calculator.estimate_monthly_cost(args.cost_estimate) if 'error' not in estimate: dual_logger.print(f"\nMONTHLY COST ESTIMATE") dual_logger.print(f"Based on {estimate['based_on_layouts']} processed layouts:") dual_logger.print(f" Average cost per layout: ${estimate['average_cost_per_layout']:.4f}") dual_logger.print(f" Estimated monthly cost ({estimate['layouts_per_month']} layouts): ${estimate['estimated_monthly_cost']:.2f}") dual_logger.print(f" Estimated annual cost: ${estimate['estimated_annual_cost']:.2f}") except KeyboardInterrupt: dual_logger.print("\n\nProcessing interrupted by user.") dual_logger.print("Partial results may have been saved automatically.") except Exception as e: dual_logger.error(f"\nError during hybrid processing: {e}") log_exception(logger) return 1 finally: detector.cleanup_temp_files() elif args.vector_mode and not args.hybrid: dual_logger.print("Using VECTOR EMBEDDING mode with Google Vertex AI") # Validate vector mode arguments if args.refinement_mode: dual_logger.warning("--refinement-mode is not supported in vector mode, ignoring...") if args.one_at_a_time: dual_logger.warning("--one-at-a-time is not applicable in vector mode, ignoring...") if args.panel_aware_refinement: dual_logger.error("--panel-aware-refinement is only supported with --openai --one-at-a-time mode") return 1 if args.split_advanced: dual_logger.warning("--split-advanced is only supported in hybrid mode, ignoring...") if not args.no_greyscale or not args.no_contrast or args.contrast_factor != 1.5: dual_logger.warning("Image processing options (greyscale, contrast) are not used in vector mode...") # Initialize vector detector detector = VectorImageDetector( similarity_threshold=args.similarity_threshold, splitting_mode=args.splitting_mode, min_crop_size=args.min_crop_size, crop_padding=args.crop_padding, split_mode=args.split ) # Add vector mode suffix to default output name if not args.output: default_output += "_vector" if args.splitting_mode != "none": default_output += f"_{args.splitting_mode}" if args.similarity_threshold != 0.75: default_output += f"_thresh{args.similarity_threshold}" if args.split: default_output += "_split" output_name = args.output if args.output else default_output dual_logger.print(f"Results will be saved as: {output_name}.json") dual_logger.print(f"Similarity threshold: {args.similarity_threshold}") dual_logger.print(f"Splitting mode: {args.splitting_mode}") if args.splitting_mode != "none": dual_logger.print(f"Min crop size: {args.min_crop_size}px, Crop padding: {args.crop_padding}px") dual_logger.print("-" * 60) try: results = detector.process_all_layouts_vector(limit=limit, specific_file=specific_file) output_file = detector.save_results(results, output_name) summary = detector.generate_summary(results) dual_logger.print("\n" + "="*60) dual_logger.print("VECTOR PROCESSING SUMMARY") dual_logger.print("="*60) dual_logger.print(f"Total layouts processed: {summary['total_layouts_processed']}") dual_logger.print(f"Layouts with matches: {summary['layouts_with_matches']}") dual_logger.print(f"Layouts without matches: {summary['layouts_without_matches']}") dual_logger.print(f"Similarity threshold: {summary['similarity_threshold']}") dual_logger.print(f"Embedding dimensions: {summary['embedding_dimensions']}") if summary['most_used_masters']: dual_logger.print(f"\nTop {min(10, len(summary['most_used_masters']))} most frequently detected masters:") for master_id, count in summary['most_used_masters']: dual_logger.print(f" {master_id}.jpg: {count} times") dual_logger.print(f"\nFull results saved to: {output_file}") dual_logger.print("="*60) except KeyboardInterrupt: dual_logger.print("\n\nProcessing interrupted by user.") dual_logger.print("Partial results may have been saved automatically.") except Exception as e: dual_logger.error(f"\nError during vector processing: {e}") log_exception(logger) return 1 elif args.openai: dual_logger.print("Using OPENAI LLM mode with o3 model") # Validate panel-aware refinement requirements if args.panel_aware_refinement: if not args.one_at_a_time: dual_logger.error("--panel-aware-refinement requires --one-at-a-time mode") return 1 dual_logger.print("Panel-aware refinement ENABLED - will count panels and refine matches using OpenCV inlier analysis") if args.split_advanced: dual_logger.warning("--split-advanced is only supported in hybrid mode, ignoring...") # Initialize OpenAI detector with image processing settings detector = OpenAIImageDetector( enable_greyscale=not args.no_greyscale, enable_contrast_enhancement=not args.no_contrast, contrast_factor=args.contrast_factor, refinement_mode=args.refinement_mode, one_at_a_time_mode=args.one_at_a_time, max_concurrent_workers=args.concurrent_workers, panel_aware_refinement=args.panel_aware_refinement, split_mode=args.split ) # Add mode suffix to default output name if not args.output: default_output += "_openai" if args.one_at_a_time: default_output += "_one_at_a_time" if args.panel_aware_refinement: default_output += "_panel_aware" if args.split: default_output += "_split" output_name = args.output if args.output else default_output dual_logger.print(f"Results will be saved as: {output_name}.json") if args.one_at_a_time: dual_logger.print(f"Concurrent processes for one-at-a-time mode: {args.concurrent_workers}") if args.concurrent_workers and args.concurrent_workers > 10: dual_logger.print("WARNING: High concurrency (>10) may cause API rate limits!") dual_logger.print("Recommended range: 3-8 processes for stable performance.") dual_logger.print("-" * 60) try: results = detector.process_all_layouts(limit=limit, specific_file=specific_file) output_file = detector.save_results(results, output_name) summary = detector.generate_summary(results) dual_logger.print("\n" + "="*60) dual_logger.print("OPENAI PROCESSING SUMMARY") dual_logger.print("="*60) dual_logger.print(f"Total layouts processed: {summary['total_layouts_processed']}") dual_logger.print(f"Layouts with matches: {summary['layouts_with_matches']}") dual_logger.print(f"Layouts without matches: {summary['layouts_without_matches']}") dual_logger.print(f"Provider: {summary['provider']}") dual_logger.print(f"Model: {summary['model']}") if 'total_duplicates_removed' in summary and summary['total_duplicates_removed'] > 0: dual_logger.print(f"\nDEDUPLICATION RESULTS:") dual_logger.print(f"Layouts with duplicates removed: {summary['layouts_with_deduplication']}") dual_logger.print(f"Total duplicate masters removed: {summary['total_duplicates_removed']}") dual_logger.print(f"Deduplication rate: {summary['deduplication_rate']}%") if summary['most_used_masters']: dual_logger.print(f"\nTop {min(10, len(summary['most_used_masters']))} most frequently detected masters:") for master_id, count in summary['most_used_masters']: dual_logger.print(f" {master_id}.jpg: {count} times") dual_logger.print(f"\nFull results saved to: {output_file}") dual_logger.print("="*60) except KeyboardInterrupt: dual_logger.print("\n\nProcessing interrupted by user.") dual_logger.print("Partial results may have been saved automatically.") except Exception as e: dual_logger.error(f"\nError during OpenAI processing: {e}") log_exception(logger) return 1 finally: detector.cleanup_temp_files() else: dual_logger.print("Using GEMINI LLM mode") # Validate panel-aware refinement requirements if args.panel_aware_refinement: dual_logger.error("--panel-aware-refinement is only supported with --openai mode") return 1 if args.split_advanced: dual_logger.warning("--split-advanced is only supported in hybrid mode, ignoring...") # Initialize Gemini detector with image processing settings detector = ImageDetector( enable_greyscale=not args.no_greyscale, enable_contrast_enhancement=not args.no_contrast, contrast_factor=args.contrast_factor, refinement_mode=args.refinement_mode, one_at_a_time_mode=args.one_at_a_time, max_concurrent_workers=args.concurrent_workers, split_mode=args.split ) # Add mode suffix to default output name if not args.output: if args.one_at_a_time: default_output += "_one_at_a_time" if args.refinement_mode: default_output += "_refined" if args.split: default_output += "_split" output_name = args.output if args.output else default_output dual_logger.print(f"Results will be saved as: {output_name}.json") if args.one_at_a_time: dual_logger.print(f"Concurrent processes for one-at-a-time mode: {args.concurrent_workers}") if args.concurrent_workers and args.concurrent_workers > 10: dual_logger.print("WARNING: High concurrency (>10) may cause API rate limits!") dual_logger.print("Recommended range: 3-8 processes for stable performance.") dual_logger.print("-" * 60) try: results = detector.process_all_layouts(limit=limit, specific_file=specific_file) output_file = detector.save_results(results, output_name) summary = detector.generate_summary(results) dual_logger.print("\n" + "="*60) dual_logger.print("GEMINI PROCESSING SUMMARY") dual_logger.print("="*60) dual_logger.print(f"Total layouts processed: {summary['total_layouts_processed']}") dual_logger.print(f"Layouts with matches: {summary['layouts_with_matches']}") dual_logger.print(f"Layouts without matches: {summary['layouts_without_matches']}") if 'total_duplicates_removed' in summary and summary['total_duplicates_removed'] > 0: dual_logger.print(f"\nDEDUPLICATION RESULTS:") dual_logger.print(f"Layouts with duplicates removed: {summary['layouts_with_deduplication']}") dual_logger.print(f"Total duplicate masters removed: {summary['total_duplicates_removed']}") dual_logger.print(f"Deduplication rate: {summary['deduplication_rate']}%") if summary['most_used_masters']: dual_logger.print(f"\nTop {min(10, len(summary['most_used_masters']))} most frequently detected masters:") for master_id, count in summary['most_used_masters']: dual_logger.print(f" {master_id}.jpg: {count} times") dual_logger.print(f"\nFull results saved to: {output_file}") dual_logger.print("="*60) except KeyboardInterrupt: dual_logger.print("\n\nProcessing interrupted by user.") dual_logger.print("Partial results may have been saved automatically.") except Exception as e: dual_logger.error(f"\nError during Gemini processing: {e}") log_exception(logger) return 1 finally: detector.cleanup_temp_files() return 0 except Exception as e: dual_logger.error(f"Unexpected error: {e}") log_exception(logger) return 1 if __name__ == "__main__": # Required for multiprocessing on macOS and Windows multiprocessing.set_start_method('spawn', force=True) exit(main())