#!/usr/bin/env python3 """ Panel Splitter Module - Multi-method panel splitting for comic/manga layouts """ import os import cv2 import numpy as np from typing import List, Dict, Tuple, Optional import json from pathlib import Path from scipy import ndimage from scipy.signal import find_peaks from sklearn.cluster import KMeans from skimage.feature import local_binary_pattern import matplotlib.pyplot as plt class PanelSplitter: """ Multi-method panel splitting class that uses various computer vision techniques to split multi-panel layouts into individual images, then matches each split to master images using inlier analysis. """ def __init__(self, debug=False): """ Initialize the PanelSplitter Args: debug (bool): Enable debug mode for visualization """ self.debug = debug self.debug_dir = "debug_splitting" if self.debug and not os.path.exists(self.debug_dir): os.makedirs(self.debug_dir) def split_layout_and_match(self, layout_path: str, master_images: List[str], detector_instance=None) -> Dict: """ Main method to split a layout and match splits to master images Args: layout_path (str): Path to the layout image master_images (List[str]): List of master image paths detector_instance: The detector instance to use for matching Returns: Dict: Detection results with matches from all splits """ # Step 1: Get panel count from OpenAI (if detector supports it) target_panel_count = 1 panel_confidence = "unknown" if hasattr(detector_instance, 'count_panels_in_layout'): print(f"Getting panel count for {os.path.basename(layout_path)}...") panel_result = detector_instance.count_panels_in_layout(layout_path) target_panel_count = panel_result.get('panel_count', 1) panel_confidence = panel_result.get('confidence', 'unknown') print(f"OpenAI detected {target_panel_count} panels (confidence: {panel_confidence})") # Step 2: Split the layout into individual panels print(f"Splitting layout with target count: {target_panel_count}") splits = self.split_panels(layout_path, target_panel_count) if not splits: print("No splits detected, returning empty results") return { 'layout_path': layout_path, 'detected_masters': [], 'panel_count': target_panel_count, 'panel_confidence': panel_confidence, 'split_mode': 'enabled', 'splits_generated': 0 } print(f"Generated {len(splits)} splits") # Step 3: Match each split to master images all_matches = [] split_results = [] for i, split_info in enumerate(splits): print(f"Processing split {i+1}/{len(splits)}") # Save split image temporarily for matching split_image = split_info['image'] temp_split_path = f"/tmp/split_{i}.jpg" cv2.imwrite(temp_split_path, split_image) # Match this split to master images using existing inlier analysis if hasattr(detector_instance, 'match_split_to_masters'): split_matches = detector_instance.match_split_to_masters( temp_split_path, master_images ) else: # Use basic inlier analysis if method doesn't exist split_matches = self._match_split_basic(temp_split_path, master_images) # Add split metadata to matches for match in split_matches: match['split_index'] = i match['split_bounds'] = split_info['bounds'] match['split_confidence'] = split_info['confidence'] all_matches.append(match) split_results.append({ 'split_index': i, 'bounds': split_info['bounds'], 'confidence': split_info['confidence'], 'matches': split_matches }) # Clean up temporary file if os.path.exists(temp_split_path): os.remove(temp_split_path) # Step 4: Aggregate results result = { 'layout_path': layout_path, 'detected_masters': [match['master_id'] for match in all_matches], 'panel_count': target_panel_count, 'panel_confidence': panel_confidence, 'split_mode': 'enabled', 'splits_generated': len(splits), 'split_results': split_results, 'all_matches': all_matches } # Remove duplicates while preserving highest confidence matches result = self._deduplicate_matches(result) return result def split_panels(self, image_path: str, target_panel_count: int) -> List[Dict]: """ Split a layout image into individual panels using multiple methods Args: image_path (str): Path to the layout image target_panel_count (int): Target number of panels to split into Returns: List[Dict]: List of split information with image data and metadata """ # Load image image = cv2.imread(image_path) if image is None: print(f"Error: Could not load image {image_path}") return [] height, width = image.shape[:2] print(f"Image dimensions: {width}x{height}") # Use only optimized Canny detection method print("Using optimized Canny detection for panel splitting") try: method_results = self._optimized_canny_detection(image, target_panel_count) if not method_results: print("Optimized Canny detection failed, falling back to simple division") return self._fallback_simple_division(image, target_panel_count) except Exception as e: print(f"Optimized Canny detection failed: {e}") return self._fallback_simple_division(image, target_panel_count) # Use results directly (no consensus needed for single method) consensus_splits = method_results # Create split images splits = [] for i, split_bounds in enumerate(consensus_splits): x, y, w, h = split_bounds['bounds'] split_image = image[y:y+h, x:x+w] # Skip extremely small splits (reduced threshold for 14-panel detection) if w < 20 or h < 20: continue splits.append({ 'image': split_image, 'bounds': (x, y, w, h), 'confidence': split_bounds['confidence'], 'method_votes': split_bounds.get('method_votes', []) }) if self.debug: self._save_debug_visualization(image_path, image, splits) return splits def _enhanced_gradient_analysis(self, image: np.ndarray, target_count: int) -> List[Dict]: """Enhanced gradient peak analysis for panel detection""" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) height, width = gray.shape # Multi-scale gradient analysis separators = [] scales = [5, 10, 20] for sigma in scales: # Smooth the image smoothed = cv2.GaussianBlur(gray, (0, 0), sigma) # Calculate vertical gradient (for horizontal separators) grad_y = cv2.Sobel(smoothed, cv2.CV_64F, 0, 1, ksize=3) # Project to get horizontal profile profile = np.mean(np.abs(grad_y), axis=1) # Find peaks prominence = np.std(profile) * 0.5 peaks, properties = find_peaks(profile, prominence=prominence, distance=height//target_count//2) # Add to separators with confidence based on prominence for peak in peaks: confidence = properties['prominences'][list(peaks).index(peak)] / np.max(properties['prominences']) separators.append({ 'position': peak, 'confidence': confidence, 'method': 'gradient_analysis', 'scale': sigma }) # Convert separator positions to bounding boxes separators.sort(key=lambda x: x['position']) # Create bounds from separators bounds = [] prev_y = 0 for sep in separators: if sep['position'] > prev_y + height // (target_count * 2): # Minimum panel height bounds.append({ 'bounds': (0, prev_y, width, sep['position'] - prev_y), 'confidence': sep['confidence'], 'method': 'gradient_analysis' }) prev_y = sep['position'] # Add final panel if prev_y < height - height // (target_count * 2): bounds.append({ 'bounds': (0, prev_y, width, height - prev_y), 'confidence': 0.8, 'method': 'gradient_analysis' }) return bounds def _optimized_canny_detection(self, image: np.ndarray, target_count: int) -> List[Dict]: """Optimized Canny edge detection for panel separators with tuned parameters""" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) height, width = gray.shape # Exact configuration from tuning results that produced 14 panels threshold_set = [(50, 150), (100, 200), (150, 250)] morphology_kernel = (3, 1) hough_threshold = 1324 min_line_length = 3530 max_line_gap = 1059 # Multi-threshold Canny detection all_edges = [] for low, high in threshold_set: edges = cv2.Canny(gray, low, high) # Morphological operations kernel = np.ones(morphology_kernel, np.uint8) edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel) all_edges.append(edges) # Combine edge maps combined_edges = np.maximum.reduce(all_edges) # Find horizontal lines using Hough transform lines = cv2.HoughLinesP( combined_edges, 1, np.pi/180, threshold=hough_threshold, minLineLength=min_line_length, maxLineGap=max_line_gap ) # Filter for horizontal lines horizontal_lines = [] if lines is not None: for line in lines: x1, y1, x2, y2 = line[0] if abs(y2 - y1) < height // 20: # Nearly horizontal horizontal_lines.append({ 'y_position': (y1 + y2) // 2, 'length': abs(x2 - x1), 'confidence': min(1.0, abs(x2 - x1) / width) }) # Sort by y position and create bounds horizontal_lines.sort(key=lambda x: x['y_position']) bounds = [] prev_y = 0 for line in horizontal_lines: y_pos = line['y_position'] # Use the same threshold that worked in debug script if y_pos > prev_y + height // (target_count * 2): bounds.append({ 'bounds': (0, prev_y, width, y_pos - prev_y), 'confidence': line['confidence'], 'method': 'optimized_canny_detection' }) prev_y = y_pos # Add final panel if prev_y < height - height // (target_count * 2): bounds.append({ 'bounds': (0, prev_y, width, height - prev_y), 'confidence': 0.8, 'method': 'optimized_canny_detection' }) return bounds def _template_matching_method(self, image: np.ndarray, target_count: int) -> List[Dict]: """Template matching for common panel separators""" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) height, width = gray.shape # Create separator templates templates = [] # White horizontal line template white_template = np.ones((5, width//4), dtype=np.uint8) * 255 templates.append(('white_line', white_template)) # Black horizontal line template black_template = np.zeros((5, width//4), dtype=np.uint8) templates.append(('black_line', black_template)) # Gutter template (white with black edges) gutter_template = np.ones((10, width//4), dtype=np.uint8) * 255 gutter_template[0, :] = 0 gutter_template[-1, :] = 0 templates.append(('gutter', gutter_template)) # Find matches for each template all_matches = [] for template_name, template in templates: result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF_NORMED) # Find good matches locations = np.where(result >= 0.5) for y, x in zip(locations[0], locations[1]): confidence = result[y, x] all_matches.append({ 'y_position': y + template.shape[0] // 2, 'confidence': confidence, 'template': template_name }) # Sort by y position and remove duplicates all_matches.sort(key=lambda x: x['y_position']) # Merge nearby matches merged_matches = [] for match in all_matches: if not merged_matches or match['y_position'] - merged_matches[-1]['y_position'] > height // (target_count * 2): merged_matches.append(match) else: # Keep the one with higher confidence if match['confidence'] > merged_matches[-1]['confidence']: merged_matches[-1] = match # Create bounds bounds = [] prev_y = 0 for match in merged_matches: y_pos = match['y_position'] if y_pos > prev_y + height // (target_count * 2): bounds.append({ 'bounds': (0, prev_y, width, y_pos - prev_y), 'confidence': match['confidence'], 'method': 'template_matching' }) prev_y = y_pos # Add final panel if prev_y < height - height // (target_count * 2): bounds.append({ 'bounds': (0, prev_y, width, height - prev_y), 'confidence': 0.8, 'method': 'template_matching' }) return bounds def _contour_analysis_method(self, image: np.ndarray, target_count: int) -> List[Dict]: """Contour-based panel detection""" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) height, width = gray.shape # Find contours edges = cv2.Canny(gray, 50, 150) contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Filter for rectangular contours panel_candidates = [] for contour in contours: # Approximate contour to polygon epsilon = 0.02 * cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, epsilon, True) # Check if it's roughly rectangular (4 corners) if len(approx) >= 4: x, y, w, h = cv2.boundingRect(contour) # Filter by size and aspect ratio if w > width // 4 and h > height // (target_count * 2): area = cv2.contourArea(contour) rect_area = w * h # Check if it's mostly rectangular if area / rect_area > 0.7: panel_candidates.append({ 'bounds': (x, y, w, h), 'confidence': min(1.0, area / rect_area), 'method': 'contour_analysis' }) # Sort by y position panel_candidates.sort(key=lambda x: x['bounds'][1]) # Remove overlapping candidates filtered_candidates = [] for candidate in panel_candidates: overlap = False for existing in filtered_candidates: if self._rectangles_overlap(candidate['bounds'], existing['bounds']): overlap = True break if not overlap: filtered_candidates.append(candidate) return filtered_candidates def _texture_analysis_method(self, image: np.ndarray, target_count: int) -> List[Dict]: """Texture-based panel separation""" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) height, width = gray.shape # Local Binary Pattern analysis radius = 3 n_points = 8 * radius lbp = local_binary_pattern(gray, n_points, radius, method='uniform') # Create texture profile by analyzing horizontal strips strip_height = height // (target_count * 4) texture_profile = [] for y in range(0, height - strip_height, strip_height // 2): strip = lbp[y:y + strip_height, :] texture_variance = np.var(strip) texture_profile.append(texture_variance) # Find low-texture regions (potential separators) texture_profile = np.array(texture_profile) threshold = np.percentile(texture_profile, 25) # Bottom 25% separators = [] for i, variance in enumerate(texture_profile): if variance < threshold: y_position = i * strip_height // 2 confidence = 1.0 - (variance / np.max(texture_profile)) separators.append({ 'y_position': y_position, 'confidence': confidence }) # Merge nearby separators merged_separators = [] for sep in separators: if not merged_separators or sep['y_position'] - merged_separators[-1]['y_position'] > height // (target_count * 2): merged_separators.append(sep) else: # Keep the one with higher confidence if sep['confidence'] > merged_separators[-1]['confidence']: merged_separators[-1] = sep # Create bounds bounds = [] prev_y = 0 for sep in merged_separators: y_pos = sep['y_position'] if y_pos > prev_y + height // (target_count * 2): bounds.append({ 'bounds': (0, prev_y, width, y_pos - prev_y), 'confidence': sep['confidence'], 'method': 'texture_analysis' }) prev_y = y_pos # Add final panel if prev_y < height - height // (target_count * 2): bounds.append({ 'bounds': (0, prev_y, width, height - prev_y), 'confidence': 0.8, 'method': 'texture_analysis' }) return bounds def _clustering_method(self, image: np.ndarray, target_count: int) -> List[Dict]: """Clustering-based panel detection""" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) height, width = gray.shape # Downsample for faster processing scale_factor = 0.25 small_height = int(height * scale_factor) small_width = int(width * scale_factor) small_gray = cv2.resize(gray, (small_width, small_height)) # Create feature vectors for each pixel (position + intensity) y_coords, x_coords = np.meshgrid(range(small_height), range(small_width), indexing='ij') features = np.column_stack([ y_coords.flatten(), x_coords.flatten(), small_gray.flatten() ]) # Normalize features features = features.astype(np.float32) features[:, 0] /= small_height features[:, 1] /= small_width features[:, 2] /= 255.0 # Apply K-means clustering n_clusters = target_count + 1 # +1 for potential separators kmeans = KMeans(n_clusters=n_clusters, random_state=42) labels = kmeans.fit_predict(features) # Reshape labels back to image shape label_image = labels.reshape(small_height, small_width) # Find horizontal boundaries between clusters boundaries = [] for y in range(1, small_height - 1): # Check if this row represents a cluster boundary current_clusters = set(label_image[y, :]) above_clusters = set(label_image[y-1, :]) below_clusters = set(label_image[y+1, :]) # If clusters change significantly, it might be a boundary if len(current_clusters.intersection(above_clusters)) < len(current_clusters) * 0.7 or \ len(current_clusters.intersection(below_clusters)) < len(current_clusters) * 0.7: boundaries.append({ 'y_position': int(y / scale_factor), 'confidence': 0.7 }) # Create bounds from boundaries bounds = [] prev_y = 0 for boundary in boundaries: y_pos = boundary['y_position'] if y_pos > prev_y + height // (target_count * 2): bounds.append({ 'bounds': (0, prev_y, width, y_pos - prev_y), 'confidence': boundary['confidence'], 'method': 'clustering' }) prev_y = y_pos # Add final panel if prev_y < height - height // (target_count * 2): bounds.append({ 'bounds': (0, prev_y, width, height - prev_y), 'confidence': 0.8, 'method': 'clustering' }) return bounds def _apply_consensus(self, method_results: List[List[Dict]], image: np.ndarray, target_count: int) -> List[Dict]: """Apply consensus voting to combine results from multiple methods""" height, width = image.shape[:2] # Collect all proposed boundaries all_boundaries = [] for method_result in method_results: for panel in method_result: bounds = panel['bounds'] # Add both top and bottom boundaries all_boundaries.append({ 'y_position': bounds[1], # Top boundary 'confidence': panel['confidence'], 'method': panel['method'], 'type': 'top' }) all_boundaries.append({ 'y_position': bounds[1] + bounds[3], # Bottom boundary 'confidence': panel['confidence'], 'method': panel['method'], 'type': 'bottom' }) # Sort boundaries by position all_boundaries.sort(key=lambda x: x['y_position']) # Cluster nearby boundaries clustered_boundaries = [] cluster_threshold = height // (target_count * 3) for boundary in all_boundaries: # Skip image edges if boundary['y_position'] < cluster_threshold or boundary['y_position'] > height - cluster_threshold: continue # Find existing cluster or create new one added_to_cluster = False for cluster in clustered_boundaries: if abs(boundary['y_position'] - cluster['y_position']) < cluster_threshold: # Add to existing cluster cluster['boundaries'].append(boundary) # Update cluster position (weighted average) total_weight = sum(b['confidence'] for b in cluster['boundaries']) cluster['y_position'] = sum(b['y_position'] * b['confidence'] for b in cluster['boundaries']) / total_weight cluster['confidence'] = total_weight / len(cluster['boundaries']) added_to_cluster = True break if not added_to_cluster: clustered_boundaries.append({ 'y_position': boundary['y_position'], 'confidence': boundary['confidence'], 'boundaries': [boundary] }) # Sort clustered boundaries and select best ones clustered_boundaries.sort(key=lambda x: x['y_position']) # Filter boundaries based on confidence and target count min_confidence = 0.3 good_boundaries = [b for b in clustered_boundaries if b['confidence'] >= min_confidence] # Limit to reasonable number of boundaries if len(good_boundaries) > target_count - 1: good_boundaries.sort(key=lambda x: x['confidence'], reverse=True) good_boundaries = good_boundaries[:target_count - 1] good_boundaries.sort(key=lambda x: x['y_position']) # Create final panel bounds final_bounds = [] prev_y = 0 for boundary in good_boundaries: y_pos = int(boundary['y_position']) if y_pos > prev_y + height // (target_count * 2): method_votes = [b['method'] for b in boundary['boundaries']] final_bounds.append({ 'bounds': (0, prev_y, width, y_pos - prev_y), 'confidence': boundary['confidence'], 'method_votes': method_votes }) prev_y = y_pos # Add final panel if prev_y < height - height // (target_count * 2): final_bounds.append({ 'bounds': (0, prev_y, width, height - prev_y), 'confidence': 0.8, 'method_votes': ['consensus'] }) return final_bounds def _fallback_simple_division(self, image: np.ndarray, target_count: int) -> List[Dict]: """Fallback method: simple equal division""" height, width = image.shape[:2] panel_height = height // target_count splits = [] for i in range(target_count): y = i * panel_height h = panel_height if i < target_count - 1 else height - y splits.append({ 'image': image[y:y+h, :], 'bounds': (0, y, width, h), 'confidence': 0.5, 'method_votes': ['simple_division'] }) return splits def _match_split_basic(self, split_path: str, master_images: List[str]) -> List[Dict]: """Basic matching using OpenCV features (fallback)""" matches = [] try: # Load the split image split_img = cv2.imread(split_path, cv2.IMREAD_GRAYSCALE) if split_img is None: return matches # Initialize feature detector akaze = cv2.AKAZE_create() bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False) # Detect keypoints and descriptors for split image kp_split, des_split = akaze.detectAndCompute(split_img, None) if des_split is None: return matches # Load master images from the master_images directory master_images_path = Path("master_images") for master_id in master_images: master_path = master_images_path / f"{master_id}.jpg" if not master_path.exists(): continue # Load master image master_img = cv2.imread(str(master_path), cv2.IMREAD_GRAYSCALE) if master_img is None: continue # Detect keypoints and descriptors for master image kp_master, des_master = akaze.detectAndCompute(master_img, None) if des_master is None: continue # Match features matches_raw = bf.knnMatch(des_split, des_master, k=2) # Apply Lowe's ratio test good_matches = [] for match_pair in matches_raw: if len(match_pair) == 2: m, n = match_pair if m.distance < 0.7 * n.distance: good_matches.append(m) # If we have enough good matches, try to find homography if len(good_matches) >= 10: src_pts = np.float32([kp_split[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2) dst_pts = np.float32([kp_master[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2) try: M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0) if M is not None: inliers = int(np.sum(mask)) inlier_ratio = inliers / len(good_matches) # Basic confidence scoring if inliers >= 15 and inlier_ratio >= 0.6: confidence = 'high' elif inliers >= 8 and inlier_ratio >= 0.4: confidence = 'medium' else: confidence = 'low' # Only include medium and high confidence matches if confidence in ['medium', 'high']: matches.append({ 'master_id': master_id, 'confidence': confidence, 'inliers': inliers, 'match_details': { 'inliers': inliers, 'good_matches': len(good_matches), 'inlier_ratio': round(inlier_ratio, 3) } }) except: continue except Exception as e: print(f"Error in basic matching: {e}") return matches def _deduplicate_matches(self, result: Dict) -> Dict: """Remove duplicate matches, keeping highest confidence ones""" if not result['all_matches']: return result # Group matches by master_id master_groups = {} for match in result['all_matches']: master_id = match['master_id'] if master_id not in master_groups: master_groups[master_id] = [] master_groups[master_id].append(match) # Keep only the highest confidence match for each master deduplicated_matches = [] for master_id, matches in master_groups.items(): best_match = max(matches, key=lambda x: x.get('confidence', 0)) deduplicated_matches.append(best_match) result['all_matches'] = deduplicated_matches result['detected_masters'] = [match['master_id'] for match in deduplicated_matches] return result def _rectangles_overlap(self, rect1: Tuple[int, int, int, int], rect2: Tuple[int, int, int, int]) -> bool: """Check if two rectangles overlap""" x1, y1, w1, h1 = rect1 x2, y2, w2, h2 = rect2 return not (x1 + w1 < x2 or x2 + w2 < x1 or y1 + h1 < y2 or y2 + h2 < y1) def _save_debug_visualization(self, image_path: str, image: np.ndarray, splits: List[Dict]) -> None: """Save debug visualization of the splitting results""" if not self.debug: return base_name = os.path.splitext(os.path.basename(image_path))[0] # Create visualization with boundaries vis_image = image.copy() for i, split in enumerate(splits): x, y, w, h = split['bounds'] # Draw rectangle cv2.rectangle(vis_image, (x, y), (x + w, y + h), (0, 255, 0), 2) # Add label label = f"Panel {i+1} ({split['confidence']:.2f})" cv2.putText(vis_image, label, (x + 5, y + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) # Save visualization vis_path = os.path.join(self.debug_dir, f"{base_name}_splits.jpg") cv2.imwrite(vis_path, vis_image) # Save individual splits for i, split in enumerate(splits): split_path = os.path.join(self.debug_dir, f"{base_name}_split_{i+1}.jpg") cv2.imwrite(split_path, split['image'])