master_adapt_detect/panel_splitter.py

#!/usr/bin/env python3
"""
Panel Splitter Module - Multi-method panel splitting for comic/manga layouts
"""

import os
import cv2
import numpy as np
from typing import List, Dict, Tuple, Optional
import json
from pathlib import Path
from scipy import ndimage
from scipy.signal import find_peaks
from sklearn.cluster import KMeans
from skimage.feature import local_binary_pattern
import matplotlib.pyplot as plt


class PanelSplitter:
    """
    Multi-method panel splitting class that uses various computer vision techniques
    to split multi-panel layouts into individual images, then matches each split
    to master images using inlier analysis.
    """

    def __init__(self, debug=False):
        """
        Initialize the PanelSplitter

        Args:
            debug (bool): Enable debug mode for visualization
        """
        self.debug = debug
        self.debug_dir = "debug_splitting"
        if self.debug and not os.path.exists(self.debug_dir):
            os.makedirs(self.debug_dir)

    def split_layout_and_match(self, layout_path: str, master_images: List[str],
                              detector_instance=None) -> Dict:
        """
        Main method to split a layout and match splits to master images

        Args:
            layout_path (str): Path to the layout image
            master_images (List[str]): List of master image paths
            detector_instance: The detector instance to use for matching

        Returns:
            Dict: Detection results with matches from all splits
        """
        # Step 1: Get panel count from OpenAI (if detector supports it)
        target_panel_count = 1
        panel_confidence = "unknown"

        if hasattr(detector_instance, 'count_panels_in_layout'):
            print(f"Getting panel count for {os.path.basename(layout_path)}...")
            panel_result = detector_instance.count_panels_in_layout(layout_path)
            target_panel_count = panel_result.get('panel_count', 1)
            panel_confidence = panel_result.get('confidence', 'unknown')
            print(f"OpenAI detected {target_panel_count} panels (confidence: {panel_confidence})")

        # Step 2: Split the layout into individual panels
        print(f"Splitting layout with target count: {target_panel_count}")
        splits = self.split_panels(layout_path, target_panel_count)

        if not splits:
            print("No splits detected, returning empty results")
            return {
                'layout_path': layout_path,
                'detected_masters': [],
                'panel_count': target_panel_count,
                'panel_confidence': panel_confidence,
                'split_mode': 'enabled',
                'splits_generated': 0
            }

        print(f"Generated {len(splits)} splits")

        # Step 3: Match each split to master images
        all_matches = []
        split_results = []

        for i, split_info in enumerate(splits):
            print(f"Processing split {i+1}/{len(splits)}")

            # Save split image temporarily for matching
            split_image = split_info['image']
            temp_split_path = f"/tmp/split_{i}.jpg"
            cv2.imwrite(temp_split_path, split_image)

            # Match this split to master images using existing inlier analysis
            if hasattr(detector_instance, 'match_split_to_masters'):
                split_matches = detector_instance.match_split_to_masters(
                    temp_split_path, master_images
                )
            else:
                # Use basic inlier analysis if method doesn't exist
                split_matches = self._match_split_basic(temp_split_path, master_images)

            # Add split metadata to matches
            for match in split_matches:
                match['split_index'] = i
                match['split_bounds'] = split_info['bounds']
                match['split_confidence'] = split_info['confidence']
                all_matches.append(match)

            split_results.append({
                'split_index': i,
                'bounds': split_info['bounds'],
                'confidence': split_info['confidence'],
                'matches': split_matches
            })

            # Clean up temporary file
            if os.path.exists(temp_split_path):
                os.remove(temp_split_path)

        # Step 4: Aggregate results
        result = {
            'layout_path': layout_path,
            'detected_masters': [match['master_id'] for match in all_matches],
            'panel_count': target_panel_count,
            'panel_confidence': panel_confidence,
            'split_mode': 'enabled',
            'splits_generated': len(splits),
            'split_results': split_results,
            'all_matches': all_matches
        }

        # Remove duplicates while preserving highest confidence matches
        result = self._deduplicate_matches(result)

        return result

    def split_panels(self, image_path: str, target_panel_count: int) -> List[Dict]:
        """
        Split a layout image into individual panels using multiple methods

        Args:
            image_path (str): Path to the layout image
            target_panel_count (int): Target number of panels to split into

        Returns:
            List[Dict]: List of split information with image data and metadata
        """
        # Load image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Error: Could not load image {image_path}")
            return []

        height, width = image.shape[:2]
        print(f"Image dimensions: {width}x{height}")

        # Use only optimized Canny detection method
        print("Using optimized Canny detection for panel splitting")

        try:
            method_results = self._optimized_canny_detection(image, target_panel_count)
            if not method_results:
                print("Optimized Canny detection failed, falling back to simple division")
                return self._fallback_simple_division(image, target_panel_count)
        except Exception as e:
            print(f"Optimized Canny detection failed: {e}")
            return self._fallback_simple_division(image, target_panel_count)

        # Use results directly (no consensus needed for single method)
        consensus_splits = method_results

        # Create split images
        splits = []
        for i, split_bounds in enumerate(consensus_splits):
            x, y, w, h = split_bounds['bounds']
            split_image = image[y:y+h, x:x+w]

            # Skip extremely small splits (reduced threshold for 14-panel detection)
            if w < 20 or h < 20:
                continue

            splits.append({
                'image': split_image,
                'bounds': (x, y, w, h),
                'confidence': split_bounds['confidence'],
                'method_votes': split_bounds.get('method_votes', [])
            })

        if self.debug:
            self._save_debug_visualization(image_path, image, splits)

        return splits

    def _enhanced_gradient_analysis(self, image: np.ndarray, target_count: int) -> List[Dict]:
        """Enhanced gradient peak analysis for panel detection"""
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        height, width = gray.shape

        # Multi-scale gradient analysis
        separators = []
        scales = [5, 10, 20]

        for sigma in scales:
            # Smooth the image
            smoothed = cv2.GaussianBlur(gray, (0, 0), sigma)

            # Calculate vertical gradient (for horizontal separators)
            grad_y = cv2.Sobel(smoothed, cv2.CV_64F, 0, 1, ksize=3)

            # Project to get horizontal profile
            profile = np.mean(np.abs(grad_y), axis=1)

            # Find peaks
            prominence = np.std(profile) * 0.5
            peaks, properties = find_peaks(profile, prominence=prominence, distance=height//target_count//2)

            # Add to separators with confidence based on prominence
            for peak in peaks:
                confidence = properties['prominences'][list(peaks).index(peak)] / np.max(properties['prominences'])
                separators.append({
                    'position': peak,
                    'confidence': confidence,
                    'method': 'gradient_analysis',
                    'scale': sigma
                })

        # Convert separator positions to bounding boxes
        separators.sort(key=lambda x: x['position'])

        # Create bounds from separators
        bounds = []
        prev_y = 0

        for sep in separators:
            if sep['position'] > prev_y + height // (target_count * 2):  # Minimum panel height
                bounds.append({
                    'bounds': (0, prev_y, width, sep['position'] - prev_y),
                    'confidence': sep['confidence'],
                    'method': 'gradient_analysis'
                })
                prev_y = sep['position']

        # Add final panel
        if prev_y < height - height // (target_count * 2):
            bounds.append({
                'bounds': (0, prev_y, width, height - prev_y),
                'confidence': 0.8,
                'method': 'gradient_analysis'
            })

        return bounds

    def _optimized_canny_detection(self, image: np.ndarray, target_count: int) -> List[Dict]:
        """Optimized Canny edge detection for panel separators with tuned parameters"""
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        height, width = gray.shape

        # Exact configuration from tuning results that produced 14 panels
        threshold_set = [(50, 150), (100, 200), (150, 250)]
        morphology_kernel = (3, 1)
        hough_threshold = 1324
        min_line_length = 3530
        max_line_gap = 1059

        # Multi-threshold Canny detection
        all_edges = []
        for low, high in threshold_set:
            edges = cv2.Canny(gray, low, high)

            # Morphological operations
            kernel = np.ones(morphology_kernel, np.uint8)
            edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
            all_edges.append(edges)

        # Combine edge maps
        combined_edges = np.maximum.reduce(all_edges)

        # Find horizontal lines using Hough transform
        lines = cv2.HoughLinesP(
            combined_edges,
            1,
            np.pi/180,
            threshold=hough_threshold,
            minLineLength=min_line_length,
            maxLineGap=max_line_gap
        )

        # Filter for horizontal lines
        horizontal_lines = []
        if lines is not None:
            for line in lines:
                x1, y1, x2, y2 = line[0]
                if abs(y2 - y1) < height // 20:  # Nearly horizontal
                    horizontal_lines.append({
                        'y_position': (y1 + y2) // 2,
                        'length': abs(x2 - x1),
                        'confidence': min(1.0, abs(x2 - x1) / width)
                    })

        # Sort by y position and create bounds
        horizontal_lines.sort(key=lambda x: x['y_position'])

        bounds = []
        prev_y = 0

        for line in horizontal_lines:
            y_pos = line['y_position']
            # Use the same threshold that worked in debug script
            if y_pos > prev_y + height // (target_count * 2):
                bounds.append({
                    'bounds': (0, prev_y, width, y_pos - prev_y),
                    'confidence': line['confidence'],
                    'method': 'optimized_canny_detection'
                })
                prev_y = y_pos

        # Add final panel
        if prev_y < height - height // (target_count * 2):
            bounds.append({
                'bounds': (0, prev_y, width, height - prev_y),
                'confidence': 0.8,
                'method': 'optimized_canny_detection'
            })

        return bounds

    def _template_matching_method(self, image: np.ndarray, target_count: int) -> List[Dict]:
        """Template matching for common panel separators"""
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        height, width = gray.shape

        # Create separator templates
        templates = []

        # White horizontal line template
        white_template = np.ones((5, width//4), dtype=np.uint8) * 255
        templates.append(('white_line', white_template))

        # Black horizontal line template
        black_template = np.zeros((5, width//4), dtype=np.uint8)
        templates.append(('black_line', black_template))

        # Gutter template (white with black edges)
        gutter_template = np.ones((10, width//4), dtype=np.uint8) * 255
        gutter_template[0, :] = 0
        gutter_template[-1, :] = 0
        templates.append(('gutter', gutter_template))

        # Find matches for each template
        all_matches = []

        for template_name, template in templates:
            result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF_NORMED)

            # Find good matches
            locations = np.where(result >= 0.5)

            for y, x in zip(locations[0], locations[1]):
                confidence = result[y, x]
                all_matches.append({
                    'y_position': y + template.shape[0] // 2,
                    'confidence': confidence,
                    'template': template_name
                })

        # Sort by y position and remove duplicates
        all_matches.sort(key=lambda x: x['y_position'])

        # Merge nearby matches
        merged_matches = []
        for match in all_matches:
            if not merged_matches or match['y_position'] - merged_matches[-1]['y_position'] > height // (target_count * 2):
                merged_matches.append(match)
            else:
                # Keep the one with higher confidence
                if match['confidence'] > merged_matches[-1]['confidence']:
                    merged_matches[-1] = match

        # Create bounds
        bounds = []
        prev_y = 0

        for match in merged_matches:
            y_pos = match['y_position']
            if y_pos > prev_y + height // (target_count * 2):
                bounds.append({
                    'bounds': (0, prev_y, width, y_pos - prev_y),
                    'confidence': match['confidence'],
                    'method': 'template_matching'
                })
                prev_y = y_pos

        # Add final panel
        if prev_y < height - height // (target_count * 2):
            bounds.append({
                'bounds': (0, prev_y, width, height - prev_y),
                'confidence': 0.8,
                'method': 'template_matching'
            })

        return bounds

    def _contour_analysis_method(self, image: np.ndarray, target_count: int) -> List[Dict]:
        """Contour-based panel detection"""
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        height, width = gray.shape

        # Find contours
        edges = cv2.Canny(gray, 50, 150)
        contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Filter for rectangular contours
        panel_candidates = []

        for contour in contours:
            # Approximate contour to polygon
            epsilon = 0.02 * cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, epsilon, True)

            # Check if it's roughly rectangular (4 corners)
            if len(approx) >= 4:
                x, y, w, h = cv2.boundingRect(contour)

                # Filter by size and aspect ratio
                if w > width // 4 and h > height // (target_count * 2):
                    area = cv2.contourArea(contour)
                    rect_area = w * h

                    # Check if it's mostly rectangular
                    if area / rect_area > 0.7:
                        panel_candidates.append({
                            'bounds': (x, y, w, h),
                            'confidence': min(1.0, area / rect_area),
                            'method': 'contour_analysis'
                        })

        # Sort by y position
        panel_candidates.sort(key=lambda x: x['bounds'][1])

        # Remove overlapping candidates
        filtered_candidates = []
        for candidate in panel_candidates:
            overlap = False
            for existing in filtered_candidates:
                if self._rectangles_overlap(candidate['bounds'], existing['bounds']):
                    overlap = True
                    break
            if not overlap:
                filtered_candidates.append(candidate)

        return filtered_candidates

    def _texture_analysis_method(self, image: np.ndarray, target_count: int) -> List[Dict]:
        """Texture-based panel separation"""
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        height, width = gray.shape

        # Local Binary Pattern analysis
        radius = 3
        n_points = 8 * radius
        lbp = local_binary_pattern(gray, n_points, radius, method='uniform')

        # Create texture profile by analyzing horizontal strips
        strip_height = height // (target_count * 4)
        texture_profile = []

        for y in range(0, height - strip_height, strip_height // 2):
            strip = lbp[y:y + strip_height, :]
            texture_variance = np.var(strip)
            texture_profile.append(texture_variance)

        # Find low-texture regions (potential separators)
        texture_profile = np.array(texture_profile)
        threshold = np.percentile(texture_profile, 25)  # Bottom 25%

        separators = []
        for i, variance in enumerate(texture_profile):
            if variance < threshold:
                y_position = i * strip_height // 2
                confidence = 1.0 - (variance / np.max(texture_profile))
                separators.append({
                    'y_position': y_position,
                    'confidence': confidence
                })

        # Merge nearby separators
        merged_separators = []
        for sep in separators:
            if not merged_separators or sep['y_position'] - merged_separators[-1]['y_position'] > height // (target_count * 2):
                merged_separators.append(sep)
            else:
                # Keep the one with higher confidence
                if sep['confidence'] > merged_separators[-1]['confidence']:
                    merged_separators[-1] = sep

        # Create bounds
        bounds = []
        prev_y = 0

        for sep in merged_separators:
            y_pos = sep['y_position']
            if y_pos > prev_y + height // (target_count * 2):
                bounds.append({
                    'bounds': (0, prev_y, width, y_pos - prev_y),
                    'confidence': sep['confidence'],
                    'method': 'texture_analysis'
                })
                prev_y = y_pos

        # Add final panel
        if prev_y < height - height // (target_count * 2):
            bounds.append({
                'bounds': (0, prev_y, width, height - prev_y),
                'confidence': 0.8,
                'method': 'texture_analysis'
            })

        return bounds

    def _clustering_method(self, image: np.ndarray, target_count: int) -> List[Dict]:
        """Clustering-based panel detection"""
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        height, width = gray.shape

        # Downsample for faster processing
        scale_factor = 0.25
        small_height = int(height * scale_factor)
        small_width = int(width * scale_factor)
        small_gray = cv2.resize(gray, (small_width, small_height))

        # Create feature vectors for each pixel (position + intensity)
        y_coords, x_coords = np.meshgrid(range(small_height), range(small_width), indexing='ij')

        features = np.column_stack([
            y_coords.flatten(),
            x_coords.flatten(),
            small_gray.flatten()
        ])

        # Normalize features
        features = features.astype(np.float32)
        features[:, 0] /= small_height
        features[:, 1] /= small_width
        features[:, 2] /= 255.0

        # Apply K-means clustering
        n_clusters = target_count + 1  # +1 for potential separators
        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
        labels = kmeans.fit_predict(features)

        # Reshape labels back to image shape
        label_image = labels.reshape(small_height, small_width)

        # Find horizontal boundaries between clusters
        boundaries = []
        for y in range(1, small_height - 1):
            # Check if this row represents a cluster boundary
            current_clusters = set(label_image[y, :])
            above_clusters = set(label_image[y-1, :])
            below_clusters = set(label_image[y+1, :])

            # If clusters change significantly, it might be a boundary
            if len(current_clusters.intersection(above_clusters)) < len(current_clusters) * 0.7 or \
               len(current_clusters.intersection(below_clusters)) < len(current_clusters) * 0.7:
                boundaries.append({
                    'y_position': int(y / scale_factor),
                    'confidence': 0.7
                })

        # Create bounds from boundaries
        bounds = []
        prev_y = 0

        for boundary in boundaries:
            y_pos = boundary['y_position']
            if y_pos > prev_y + height // (target_count * 2):
                bounds.append({
                    'bounds': (0, prev_y, width, y_pos - prev_y),
                    'confidence': boundary['confidence'],
                    'method': 'clustering'
                })
                prev_y = y_pos

        # Add final panel
        if prev_y < height - height // (target_count * 2):
            bounds.append({
                'bounds': (0, prev_y, width, height - prev_y),
                'confidence': 0.8,
                'method': 'clustering'
            })

        return bounds

    def _apply_consensus(self, method_results: List[List[Dict]], image: np.ndarray,
                        target_count: int) -> List[Dict]:
        """Apply consensus voting to combine results from multiple methods"""
        height, width = image.shape[:2]

        # Collect all proposed boundaries
        all_boundaries = []

        for method_result in method_results:
            for panel in method_result:
                bounds = panel['bounds']
                # Add both top and bottom boundaries
                all_boundaries.append({
                    'y_position': bounds[1],  # Top boundary
                    'confidence': panel['confidence'],
                    'method': panel['method'],
                    'type': 'top'
                })
                all_boundaries.append({
                    'y_position': bounds[1] + bounds[3],  # Bottom boundary
                    'confidence': panel['confidence'],
                    'method': panel['method'],
                    'type': 'bottom'
                })

        # Sort boundaries by position
        all_boundaries.sort(key=lambda x: x['y_position'])

        # Cluster nearby boundaries
        clustered_boundaries = []
        cluster_threshold = height // (target_count * 3)

        for boundary in all_boundaries:
            # Skip image edges
            if boundary['y_position'] < cluster_threshold or boundary['y_position'] > height - cluster_threshold:
                continue

            # Find existing cluster or create new one
            added_to_cluster = False
            for cluster in clustered_boundaries:
                if abs(boundary['y_position'] - cluster['y_position']) < cluster_threshold:
                    # Add to existing cluster
                    cluster['boundaries'].append(boundary)
                    # Update cluster position (weighted average)
                    total_weight = sum(b['confidence'] for b in cluster['boundaries'])
                    cluster['y_position'] = sum(b['y_position'] * b['confidence']
                                              for b in cluster['boundaries']) / total_weight
                    cluster['confidence'] = total_weight / len(cluster['boundaries'])
                    added_to_cluster = True
                    break

            if not added_to_cluster:
                clustered_boundaries.append({
                    'y_position': boundary['y_position'],
                    'confidence': boundary['confidence'],
                    'boundaries': [boundary]
                })

        # Sort clustered boundaries and select best ones
        clustered_boundaries.sort(key=lambda x: x['y_position'])

        # Filter boundaries based on confidence and target count
        min_confidence = 0.3
        good_boundaries = [b for b in clustered_boundaries if b['confidence'] >= min_confidence]

        # Limit to reasonable number of boundaries
        if len(good_boundaries) > target_count - 1:
            good_boundaries.sort(key=lambda x: x['confidence'], reverse=True)
            good_boundaries = good_boundaries[:target_count - 1]
            good_boundaries.sort(key=lambda x: x['y_position'])

        # Create final panel bounds
        final_bounds = []
        prev_y = 0

        for boundary in good_boundaries:
            y_pos = int(boundary['y_position'])
            if y_pos > prev_y + height // (target_count * 2):
                method_votes = [b['method'] for b in boundary['boundaries']]
                final_bounds.append({
                    'bounds': (0, prev_y, width, y_pos - prev_y),
                    'confidence': boundary['confidence'],
                    'method_votes': method_votes
                })
                prev_y = y_pos

        # Add final panel
        if prev_y < height - height // (target_count * 2):
            final_bounds.append({
                'bounds': (0, prev_y, width, height - prev_y),
                'confidence': 0.8,
                'method_votes': ['consensus']
            })

        return final_bounds

    def _fallback_simple_division(self, image: np.ndarray, target_count: int) -> List[Dict]:
        """Fallback method: simple equal division"""
        height, width = image.shape[:2]
        panel_height = height // target_count

        splits = []
        for i in range(target_count):
            y = i * panel_height
            h = panel_height if i < target_count - 1 else height - y

            splits.append({
                'image': image[y:y+h, :],
                'bounds': (0, y, width, h),
                'confidence': 0.5,
                'method_votes': ['simple_division']
            })

        return splits

    def _match_split_basic(self, split_path: str, master_images: List[str]) -> List[Dict]:
        """Basic matching using OpenCV features (fallback)"""
        matches = []

        try:
            # Load the split image
            split_img = cv2.imread(split_path, cv2.IMREAD_GRAYSCALE)
            if split_img is None:
                return matches

            # Initialize feature detector
            akaze = cv2.AKAZE_create()
            bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)

            # Detect keypoints and descriptors for split image
            kp_split, des_split = akaze.detectAndCompute(split_img, None)

            if des_split is None:
                return matches

            # Load master images from the master_images directory
            master_images_path = Path("master_images")

            for master_id in master_images:
                master_path = master_images_path / f"{master_id}.jpg"
                if not master_path.exists():
                    continue

                # Load master image
                master_img = cv2.imread(str(master_path), cv2.IMREAD_GRAYSCALE)
                if master_img is None:
                    continue

                # Detect keypoints and descriptors for master image
                kp_master, des_master = akaze.detectAndCompute(master_img, None)

                if des_master is None:
                    continue

                # Match features
                matches_raw = bf.knnMatch(des_split, des_master, k=2)

                # Apply Lowe's ratio test
                good_matches = []
                for match_pair in matches_raw:
                    if len(match_pair) == 2:
                        m, n = match_pair
                        if m.distance < 0.7 * n.distance:
                            good_matches.append(m)

                # If we have enough good matches, try to find homography
                if len(good_matches) >= 10:
                    src_pts = np.float32([kp_split[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
                    dst_pts = np.float32([kp_master[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)

                    try:
                        M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
                        if M is not None:
                            inliers = int(np.sum(mask))
                            inlier_ratio = inliers / len(good_matches)

                            # Basic confidence scoring
                            if inliers >= 15 and inlier_ratio >= 0.6:
                                confidence = 'high'
                            elif inliers >= 8 and inlier_ratio >= 0.4:
                                confidence = 'medium'
                            else:
                                confidence = 'low'

                            # Only include medium and high confidence matches
                            if confidence in ['medium', 'high']:
                                matches.append({
                                    'master_id': master_id,
                                    'confidence': confidence,
                                    'inliers': inliers,
                                    'match_details': {
                                        'inliers': inliers,
                                        'good_matches': len(good_matches),
                                        'inlier_ratio': round(inlier_ratio, 3)
                                    }
                                })
                    except:
                        continue

        except Exception as e:
            print(f"Error in basic matching: {e}")

        return matches

    def _deduplicate_matches(self, result: Dict) -> Dict:
        """Remove duplicate matches, keeping highest confidence ones"""
        if not result['all_matches']:
            return result

        # Group matches by master_id
        master_groups = {}
        for match in result['all_matches']:
            master_id = match['master_id']
            if master_id not in master_groups:
                master_groups[master_id] = []
            master_groups[master_id].append(match)

        # Keep only the highest confidence match for each master
        deduplicated_matches = []
        for master_id, matches in master_groups.items():
            best_match = max(matches, key=lambda x: x.get('confidence', 0))
            deduplicated_matches.append(best_match)

        result['all_matches'] = deduplicated_matches
        result['detected_masters'] = [match['master_id'] for match in deduplicated_matches]

        return result

    def _rectangles_overlap(self, rect1: Tuple[int, int, int, int],
                           rect2: Tuple[int, int, int, int]) -> bool:
        """Check if two rectangles overlap"""
        x1, y1, w1, h1 = rect1
        x2, y2, w2, h2 = rect2

        return not (x1 + w1 < x2 or x2 + w2 < x1 or y1 + h1 < y2 or y2 + h2 < y1)

    def _save_debug_visualization(self, image_path: str, image: np.ndarray,
                                 splits: List[Dict]) -> None:
        """Save debug visualization of the splitting results"""
        if not self.debug:
            return

        base_name = os.path.splitext(os.path.basename(image_path))[0]

        # Create visualization with boundaries
        vis_image = image.copy()

        for i, split in enumerate(splits):
            x, y, w, h = split['bounds']

            # Draw rectangle
            cv2.rectangle(vis_image, (x, y), (x + w, y + h), (0, 255, 0), 2)

            # Add label
            label = f"Panel {i+1} ({split['confidence']:.2f})"
            cv2.putText(vis_image, label, (x + 5, y + 20),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # Save visualization
        vis_path = os.path.join(self.debug_dir, f"{base_name}_splits.jpg")
        cv2.imwrite(vis_path, vis_image)

        # Save individual splits
        for i, split in enumerate(splits):
            split_path = os.path.join(self.debug_dir, f"{base_name}_split_{i+1}.jpg")
            cv2.imwrite(split_path, split['image'])