master_adapt_detect/tune_14_panel_split.py

#!/usr/bin/env python3
"""
Parameter tuning script for 14-panel splitting
Iteratively adjusts CV method parameters to achieve exactly 14 panels for 6786505.jpg
"""

import sys
import os
import cv2
import numpy as np
from pathlib import Path
from panel_splitter import PanelSplitter
from typing import Dict, List, Tuple, Any
import json
from itertools import product
import time
import multiprocessing as mp
from functools import partial

def convert_numpy_types(obj):
    """Convert NumPy types to native Python types for JSON serialization"""
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {key: convert_numpy_types(value) for key, value in obj.items()}
    elif isinstance(obj, list):
        return [convert_numpy_types(item) for item in obj]
    elif isinstance(obj, tuple):
        return tuple(convert_numpy_types(item) for item in obj)
    else:
        return obj

def test_gradient_config_worker(args):
    """Worker function for gradient analysis parameter testing"""
    config, image_path, target_panel_count, width, height = args

    try:
        # Load image
        image = cv2.imread(image_path)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        separators = []
        for sigma in config['sigma_scales']:
            # Smooth the image
            smoothed = cv2.GaussianBlur(gray, (0, 0), sigma)

            # Calculate vertical gradient (for horizontal separators)
            grad_y = cv2.Sobel(smoothed, cv2.CV_64F, 0, 1, ksize=3)

            # Project to get horizontal profile
            profile = np.mean(np.abs(grad_y), axis=1)

            # Find peaks with tuned parameters
            prominence = np.std(profile) * config['prominence_factor']
            distance = int(height / target_panel_count * config['distance_factor'])

            from scipy.signal import find_peaks
            peaks, properties = find_peaks(profile, prominence=prominence, distance=distance)

            # Add to separators
            for peak in peaks:
                if len(properties['prominences']) > 0:
                    prom_idx = list(peaks).index(peak)
                    if prom_idx < len(properties['prominences']):
                        confidence = properties['prominences'][prom_idx] / np.max(properties['prominences'])
                        separators.append({
                            'position': peak,
                            'confidence': confidence,
                            'sigma': sigma
                        })

        # Convert to bounds
        separators.sort(key=lambda x: x['position'])

        bounds = []
        prev_y = 0

        for sep in separators:
            if sep['position'] > prev_y + height // (target_panel_count * 2):
                bounds.append({
                    'bounds': (0, prev_y, width, sep['position'] - prev_y),
                    'confidence': sep['confidence']
                })
                prev_y = sep['position']

        # Add final panel
        if prev_y < height - height // (target_panel_count * 2):
            bounds.append({
                'bounds': (0, prev_y, width, height - prev_y),
                'confidence': 0.8
            })

        return {
            'method': 'gradient_analysis',
            'config': config,
            'panel_count': len(bounds),
            'bounds': bounds,
            'success': len(bounds) == target_panel_count
        }

    except Exception as e:
        return {
            'method': 'gradient_analysis',
            'config': config,
            'panel_count': 0,
            'bounds': [],
            'success': False,
            'error': str(e)
        }

def test_canny_config_worker(args):
    """Worker function for Canny edge detection parameter testing"""
    config, image_path, target_panel_count, width, height = args

    try:
        # Load image
        image = cv2.imread(image_path)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Multi-threshold Canny detection
        all_edges = []
        for low, high in config['threshold_set']:
            edges = cv2.Canny(gray, low, high)

            # Morphological operations
            kernel = np.ones(config['morphology_kernel'], np.uint8)
            edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
            all_edges.append(edges)

        # Combine edge maps
        combined_edges = np.maximum.reduce(all_edges)

        # Find horizontal lines using Hough transform
        lines = cv2.HoughLinesP(
            combined_edges,
            1,
            np.pi/180,
            threshold=config['hough_threshold'],
            minLineLength=config['min_line_length'],
            maxLineGap=config['max_line_gap']
        )

        # Filter for horizontal lines
        horizontal_lines = []
        if lines is not None:
            for line in lines:
                x1, y1, x2, y2 = line[0]
                if abs(y2 - y1) < height // 20:  # Nearly horizontal
                    horizontal_lines.append({
                        'y_position': (y1 + y2) // 2,
                        'length': abs(x2 - x1),
                        'confidence': min(1.0, abs(x2 - x1) / width)
                    })

        # Sort by y position and create bounds
        horizontal_lines.sort(key=lambda x: x['y_position'])

        bounds = []
        prev_y = 0

        for line in horizontal_lines:
            y_pos = line['y_position']
            if y_pos > prev_y + height // (target_panel_count * 2):
                bounds.append({
                    'bounds': (0, prev_y, width, y_pos - prev_y),
                    'confidence': line['confidence']
                })
                prev_y = y_pos

        # Add final panel
        if prev_y < height - height // (target_panel_count * 2):
            bounds.append({
                'bounds': (0, prev_y, width, height - prev_y),
                'confidence': 0.8
            })

        return {
            'method': 'canny_detection',
            'config': config,
            'panel_count': len(bounds),
            'bounds': bounds,
            'success': len(bounds) == target_panel_count
        }

    except Exception as e:
        return {
            'method': 'canny_detection',
            'config': config,
            'panel_count': 0,
            'bounds': [],
            'success': False,
            'error': str(e)
        }

def test_template_config_worker(args):
    """Worker function for template matching parameter testing"""
    config, image_path, target_panel_count, width, height = args

    try:
        # Load image
        image = cv2.imread(image_path)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        tc = config['template_config']

        # Create templates
        templates = []
        template_width = width // tc['template_width_fraction']

        # White horizontal line template
        white_template = np.ones((tc['white_line_height'], template_width), dtype=np.uint8) * 255
        templates.append(('white_line', white_template))

        # Black horizontal line template
        black_template = np.zeros((tc['black_line_height'], template_width), dtype=np.uint8)
        templates.append(('black_line', black_template))

        # Gutter template (white with black edges)
        gutter_template = np.ones((tc['gutter_height'], template_width), dtype=np.uint8) * 255
        gutter_template[0, :] = 0
        gutter_template[-1, :] = 0
        templates.append(('gutter', gutter_template))

        # Find matches
        all_matches = []

        for template_name, template in templates:
            result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF_NORMED)

            # Find good matches
            locations = np.where(result >= tc['match_threshold'])

            for y, x in zip(locations[0], locations[1]):
                confidence = result[y, x]
                all_matches.append({
                    'y_position': y + template.shape[0] // 2,
                    'confidence': confidence,
                    'template': template_name
                })

        # Sort and merge nearby matches
        all_matches.sort(key=lambda x: x['y_position'])

        merged_matches = []
        for match in all_matches:
            if not merged_matches or match['y_position'] - merged_matches[-1]['y_position'] > config['merge_distance']:
                merged_matches.append(match)
            else:
                # Keep the one with higher confidence
                if match['confidence'] > merged_matches[-1]['confidence']:
                    merged_matches[-1] = match

        # Create bounds
        bounds = []
        prev_y = 0

        for match in merged_matches:
            y_pos = match['y_position']
            if y_pos > prev_y + height // (target_panel_count * 2):
                bounds.append({
                    'bounds': (0, prev_y, width, y_pos - prev_y),
                    'confidence': match['confidence']
                })
                prev_y = y_pos

        # Add final panel
        if prev_y < height - height // (target_panel_count * 2):
            bounds.append({
                'bounds': (0, prev_y, width, height - prev_y),
                'confidence': 0.8
            })

        return {
            'method': 'template_matching',
            'config': config,
            'panel_count': len(bounds),
            'bounds': bounds,
            'success': len(bounds) == target_panel_count
        }

    except Exception as e:
        return {
            'method': 'template_matching',
            'config': config,
            'panel_count': 0,
            'bounds': [],
            'success': False,
            'error': str(e)
        }

class ParameterTuner:
    def __init__(self, target_image_path: str, target_panel_count: int = 14):
        self.target_image_path = target_image_path
        self.target_panel_count = target_panel_count
        self.image = cv2.imread(target_image_path)
        self.height, self.width = self.image.shape[:2]

        # Results storage
        self.results = []
        self.best_configs = []

        # Multiprocessing setup
        self.num_workers = mp.cpu_count()

        print(f"🎯 Target: {target_panel_count} panels for {Path(target_image_path).name}")
        print(f"📐 Image dimensions: {self.width}x{self.height}")
        print(f"🚀 Using {self.num_workers} parallel workers")

    def test_gradient_analysis_params(self):
        """Test Enhanced Gradient Analysis with different parameters using multiprocessing"""
        print("\n🔬 TUNING GRADIENT ANALYSIS PARAMETERS")
        print("=" * 50)

        # Parameter ranges to test
        sigma_ranges = [
            [3, 7, 15],      # Fine scale
            [5, 10, 20],     # Current default
            [7, 15, 25],     # Coarse scale
            [5, 12, 18],     # Medium scale
            [4, 8, 16],      # Balanced scale
        ]

        prominence_factors = [0.3, 0.4, 0.5, 0.6, 0.7]
        distance_factors = [0.8, 1.0, 1.2, 1.5, 2.0]

        # Create all configuration combinations
        configs = []
        for sigma_set in sigma_ranges:
            for prom_factor in prominence_factors:
                for dist_factor in distance_factors:
                    config = {
                        'method': 'gradient_analysis',
                        'sigma_scales': sigma_set,
                        'prominence_factor': prom_factor,
                        'distance_factor': dist_factor
                    }
                    configs.append(config)

        print(f"Testing {len(configs)} gradient analysis configurations...")

        # Prepare arguments for multiprocessing
        args_list = [
            (config, self.target_image_path, self.target_panel_count, self.width, self.height)
            for config in configs
        ]

        # Use multiprocessing to test configurations
        with mp.Pool(processes=self.num_workers) as pool:
            method_results = pool.map(test_gradient_config_worker, args_list)

        # Check for exact matches
        exact_matches = [r for r in method_results if r['success']]
        for result in exact_matches:
            print(f"✅ EXACT MATCH: Panels: {result['panel_count']}")
            self.best_configs.append(result)

        # Find best results
        best_results = sorted(method_results, key=lambda x: abs(x['panel_count'] - self.target_panel_count))[:5]
        print(f"\n🏆 Top 5 Gradient Analysis Results:")
        for i, result in enumerate(best_results, 1):
            print(f"  {i}. Panels: {result['panel_count']}")

        return method_results

    def test_canny_params(self):
        """Test Advanced Canny Edge Detection with different parameters using multiprocessing"""
        print("\n🔬 TUNING CANNY EDGE DETECTION PARAMETERS")
        print("=" * 50)

        # Parameter ranges
        threshold_sets = [
            [(30, 100), (80, 160), (120, 200)],  # Low sensitivity
            [(50, 150), (100, 200), (150, 250)], # Current default
            [(70, 180), (120, 220), (170, 280)], # High sensitivity
            [(40, 120), (90, 180), (140, 240)],  # Balanced
        ]

        morphology_kernels = [
            (1, 1), (3, 1), (5, 1), (7, 1), (9, 1)  # Vertical kernels for horizontal lines
        ]

        hough_thresholds = [
            self.width // 8,  # Low threshold
            self.width // 6,  # Medium-low
            self.width // 4,  # Current default
            self.width // 3,  # High threshold
        ]

        min_line_lengths = [
            self.width // 5,  # Short lines
            self.width // 4,  # Medium-short
            self.width // 3,  # Current default
            self.width // 2,  # Long lines
        ]

        max_line_gaps = [
            self.width // 20,  # Small gaps
            self.width // 15,  # Medium gaps
            self.width // 10,  # Current default
            self.width // 8,   # Large gaps
        ]

        # Create all configuration combinations
        configs = []
        for thresh_set in threshold_sets:
            for kernel in morphology_kernels:
                for hough_thresh in hough_thresholds:
                    for min_len in min_line_lengths:
                        for max_gap in max_line_gaps:
                            config = {
                                'method': 'canny_detection',
                                'threshold_set': thresh_set,
                                'morphology_kernel': kernel,
                                'hough_threshold': hough_thresh,
                                'min_line_length': min_len,
                                'max_line_gap': max_gap
                            }
                            configs.append(config)

        print(f"Testing {len(configs)} canny detection configurations...")

        # Prepare arguments for multiprocessing
        args_list = [
            (config, self.target_image_path, self.target_panel_count, self.width, self.height)
            for config in configs
        ]

        # Use multiprocessing to test configurations
        with mp.Pool(processes=self.num_workers) as pool:
            method_results = pool.map(test_canny_config_worker, args_list)

        # Check for exact matches
        exact_matches = [r for r in method_results if r['success']]
        for result in exact_matches:
            print(f"✅ EXACT MATCH: Panels: {result['panel_count']}")
            self.best_configs.append(result)

        # Find best results
        best_results = sorted(method_results, key=lambda x: abs(x['panel_count'] - self.target_panel_count))[:5]
        print(f"\n🏆 Top 5 Canny Detection Results:")
        for i, result in enumerate(best_results, 1):
            print(f"  {i}. Panels: {result['panel_count']}")

        return method_results

    def test_template_matching_params(self):
        """Test Template Matching with different parameters using multiprocessing"""
        print("\n🔬 TUNING TEMPLATE MATCHING PARAMETERS")
        print("=" * 50)

        # Template configurations
        template_configs = [
            {
                'white_line_height': 3,
                'black_line_height': 3,
                'gutter_height': 6,
                'template_width_fraction': 4,
                'match_threshold': 0.4
            },
            {
                'white_line_height': 5,
                'black_line_height': 5,
                'gutter_height': 10,
                'template_width_fraction': 4,
                'match_threshold': 0.5
            },
            {
                'white_line_height': 7,
                'black_line_height': 7,
                'gutter_height': 14,
                'template_width_fraction': 4,
                'match_threshold': 0.6
            },
            {
                'white_line_height': 4,
                'black_line_height': 4,
                'gutter_height': 8,
                'template_width_fraction': 3,
                'match_threshold': 0.45
            },
            {
                'white_line_height': 6,
                'black_line_height': 6,
                'gutter_height': 12,
                'template_width_fraction': 5,
                'match_threshold': 0.55
            }
        ]

        merge_distances = [
            self.height // (self.target_panel_count * 3),
            self.height // (self.target_panel_count * 2),
            self.height // (self.target_panel_count * 1.5),
        ]

        # Create all configuration combinations
        configs = []
        for template_config in template_configs:
            for merge_dist in merge_distances:
                config = {
                    'method': 'template_matching',
                    'template_config': template_config,
                    'merge_distance': merge_dist
                }
                configs.append(config)

        print(f"Testing {len(configs)} template matching configurations...")

        # Prepare arguments for multiprocessing
        args_list = [
            (config, self.target_image_path, self.target_panel_count, self.width, self.height)
            for config in configs
        ]

        # Use multiprocessing to test configurations
        with mp.Pool(processes=self.num_workers) as pool:
            method_results = pool.map(test_template_config_worker, args_list)

        # Check for exact matches
        exact_matches = [r for r in method_results if r['success']]
        for result in exact_matches:
            print(f"✅ EXACT MATCH: Panels: {result['panel_count']}")
            self.best_configs.append(result)

        # Find best results
        best_results = sorted(method_results, key=lambda x: abs(x['panel_count'] - self.target_panel_count))[:5]
        print(f"\n🏆 Top 5 Template Matching Results:")
        for i, result in enumerate(best_results, 1):
            print(f"  {i}. Panels: {result['panel_count']}")

        return method_results

    def test_consensus_params(self):
        """Test consensus system with different parameters (simplified for multiprocessing)"""
        print("\n🔬 TUNING CONSENSUS SYSTEM PARAMETERS")
        print("=" * 50)

        # For now, return empty results to speed up testing
        # Focus on the main methods that are already working
        print("Skipping consensus tuning - focusing on main methods")
        return []

    def run_full_tuning(self):
        """Run the complete parameter tuning process"""
        print("🚀 STARTING COMPREHENSIVE PARAMETER TUNING")
        print("=" * 60)

        start_time = time.time()

        # Test all methods
        gradient_results = self.test_gradient_analysis_params()
        canny_results = self.test_canny_params()
        template_results = self.test_template_matching_params()
        consensus_results = self.test_consensus_params()

        # Combine all results
        all_results = gradient_results + canny_results + template_results + consensus_results

        # Find the absolute best configurations
        exact_matches = [r for r in all_results if r['success']]
        close_matches = sorted([r for r in all_results if not r['success']],
                              key=lambda x: abs(x['panel_count'] - self.target_panel_count))[:10]

        # Generate summary
        elapsed_time = time.time() - start_time

        print(f"\n" + "=" * 60)
        print(f"🏁 TUNING COMPLETE - Time: {elapsed_time:.1f}s")
        print(f"=" * 60)

        print(f"\n🎯 EXACT MATCHES ({len(exact_matches)} found):")
        for i, match in enumerate(exact_matches, 1):
            print(f"  {i}. Method: {match['method']}")
            print(f"     Config: {match['config']}")
            print(f"     Panel Count: {match['panel_count']}")
            print()

        print(f"\n📊 CLOSE MATCHES (Top 10):")
        for i, match in enumerate(close_matches, 1):
            print(f"  {i}. Method: {match['method']}, Panels: {match['panel_count']}")
            print(f"     Config: {match['config']}")
            print()

        # Save results to file
        results_file = "tuning_results_14_panel.json"
        output_data = {
            'target_panel_count': self.target_panel_count,
            'image_path': self.target_image_path,
            'image_dimensions': {'width': self.width, 'height': self.height},
            'exact_matches': exact_matches,
            'close_matches': close_matches,
            'all_results': all_results,
            'tuning_time': elapsed_time,
            'total_configurations_tested': len(all_results)
        }

        # Convert NumPy types to native Python types for JSON serialization
        output_data = convert_numpy_types(output_data)

        with open(results_file, 'w') as f:
            json.dump(output_data, f, indent=2)

        print(f"💾 Results saved to: {results_file}")

        return exact_matches, close_matches

def main():
    """Main tuning function"""
    print("🎯 14-PANEL SPLITTING PARAMETER TUNING")
    print("=" * 60)

    target_image = "layouts/6786505.jpg"
    target_panels = 14

    if not Path(target_image).exists():
        print(f"❌ ERROR: Target image {target_image} not found!")
        return 1

    # Initialize tuner
    tuner = ParameterTuner(target_image, target_panels)

    # Run tuning
    exact_matches, close_matches = tuner.run_full_tuning()

    if exact_matches:
        print(f"\n🎉 SUCCESS: Found {len(exact_matches)} configurations that produce exactly {target_panels} panels!")
        print("\n🔧 RECOMMENDED SETTINGS:")
        print("Update your PanelSplitter class with these optimal parameters:")

        for i, match in enumerate(exact_matches[:3], 1):  # Show top 3
            print(f"\n  Option {i} - {match['method']}:")
            print(f"    {match['config']}")
    else:
        print(f"\n⚠️  No exact matches found. Best alternatives:")
        for i, match in enumerate(close_matches[:3], 1):
            print(f"  {i}. {match['method']}: {match['panel_count']} panels")

    return 0

if __name__ == "__main__":
    # Required for multiprocessing on macOS/Windows
    mp.set_start_method('spawn', force=True)
    sys.exit(main())