857 lines
No EOL
34 KiB
Python
857 lines
No EOL
34 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Panel Splitter Module - Multi-method panel splitting for comic/manga layouts
|
|
"""
|
|
|
|
import os
|
|
import cv2
|
|
import numpy as np
|
|
from typing import List, Dict, Tuple, Optional
|
|
import json
|
|
from pathlib import Path
|
|
from scipy import ndimage
|
|
from scipy.signal import find_peaks
|
|
from sklearn.cluster import KMeans
|
|
from skimage.feature import local_binary_pattern
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
class PanelSplitter:
|
|
"""
|
|
Multi-method panel splitting class that uses various computer vision techniques
|
|
to split multi-panel layouts into individual images, then matches each split
|
|
to master images using inlier analysis.
|
|
"""
|
|
|
|
def __init__(self, debug=False):
|
|
"""
|
|
Initialize the PanelSplitter
|
|
|
|
Args:
|
|
debug (bool): Enable debug mode for visualization
|
|
"""
|
|
self.debug = debug
|
|
self.debug_dir = "debug_splitting"
|
|
if self.debug and not os.path.exists(self.debug_dir):
|
|
os.makedirs(self.debug_dir)
|
|
|
|
def split_layout_and_match(self, layout_path: str, master_images: List[str],
|
|
detector_instance=None) -> Dict:
|
|
"""
|
|
Main method to split a layout and match splits to master images
|
|
|
|
Args:
|
|
layout_path (str): Path to the layout image
|
|
master_images (List[str]): List of master image paths
|
|
detector_instance: The detector instance to use for matching
|
|
|
|
Returns:
|
|
Dict: Detection results with matches from all splits
|
|
"""
|
|
# Step 1: Get panel count from OpenAI (if detector supports it)
|
|
target_panel_count = 1
|
|
panel_confidence = "unknown"
|
|
|
|
if hasattr(detector_instance, 'count_panels_in_layout'):
|
|
print(f"Getting panel count for {os.path.basename(layout_path)}...")
|
|
panel_result = detector_instance.count_panels_in_layout(layout_path)
|
|
target_panel_count = panel_result.get('panel_count', 1)
|
|
panel_confidence = panel_result.get('confidence', 'unknown')
|
|
print(f"OpenAI detected {target_panel_count} panels (confidence: {panel_confidence})")
|
|
|
|
# Step 2: Split the layout into individual panels
|
|
print(f"Splitting layout with target count: {target_panel_count}")
|
|
splits = self.split_panels(layout_path, target_panel_count)
|
|
|
|
if not splits:
|
|
print("No splits detected, returning empty results")
|
|
return {
|
|
'layout_path': layout_path,
|
|
'detected_masters': [],
|
|
'panel_count': target_panel_count,
|
|
'panel_confidence': panel_confidence,
|
|
'split_mode': 'enabled',
|
|
'splits_generated': 0
|
|
}
|
|
|
|
print(f"Generated {len(splits)} splits")
|
|
|
|
# Step 3: Match each split to master images
|
|
all_matches = []
|
|
split_results = []
|
|
|
|
for i, split_info in enumerate(splits):
|
|
print(f"Processing split {i+1}/{len(splits)}")
|
|
|
|
# Save split image temporarily for matching
|
|
split_image = split_info['image']
|
|
temp_split_path = f"/tmp/split_{i}.jpg"
|
|
cv2.imwrite(temp_split_path, split_image)
|
|
|
|
# Match this split to master images using existing inlier analysis
|
|
if hasattr(detector_instance, 'match_split_to_masters'):
|
|
split_matches = detector_instance.match_split_to_masters(
|
|
temp_split_path, master_images
|
|
)
|
|
else:
|
|
# Use basic inlier analysis if method doesn't exist
|
|
split_matches = self._match_split_basic(temp_split_path, master_images)
|
|
|
|
# Add split metadata to matches
|
|
for match in split_matches:
|
|
match['split_index'] = i
|
|
match['split_bounds'] = split_info['bounds']
|
|
match['split_confidence'] = split_info['confidence']
|
|
all_matches.append(match)
|
|
|
|
split_results.append({
|
|
'split_index': i,
|
|
'bounds': split_info['bounds'],
|
|
'confidence': split_info['confidence'],
|
|
'matches': split_matches
|
|
})
|
|
|
|
# Clean up temporary file
|
|
if os.path.exists(temp_split_path):
|
|
os.remove(temp_split_path)
|
|
|
|
# Step 4: Aggregate results
|
|
result = {
|
|
'layout_path': layout_path,
|
|
'detected_masters': [match['master_id'] for match in all_matches],
|
|
'panel_count': target_panel_count,
|
|
'panel_confidence': panel_confidence,
|
|
'split_mode': 'enabled',
|
|
'splits_generated': len(splits),
|
|
'split_results': split_results,
|
|
'all_matches': all_matches
|
|
}
|
|
|
|
# Remove duplicates while preserving highest confidence matches
|
|
result = self._deduplicate_matches(result)
|
|
|
|
return result
|
|
|
|
def split_panels(self, image_path: str, target_panel_count: int) -> List[Dict]:
|
|
"""
|
|
Split a layout image into individual panels using multiple methods
|
|
|
|
Args:
|
|
image_path (str): Path to the layout image
|
|
target_panel_count (int): Target number of panels to split into
|
|
|
|
Returns:
|
|
List[Dict]: List of split information with image data and metadata
|
|
"""
|
|
# Load image
|
|
image = cv2.imread(image_path)
|
|
if image is None:
|
|
print(f"Error: Could not load image {image_path}")
|
|
return []
|
|
|
|
height, width = image.shape[:2]
|
|
print(f"Image dimensions: {width}x{height}")
|
|
|
|
# Use only optimized Canny detection method
|
|
print("Using optimized Canny detection for panel splitting")
|
|
|
|
try:
|
|
method_results = self._optimized_canny_detection(image, target_panel_count)
|
|
if not method_results:
|
|
print("Optimized Canny detection failed, falling back to simple division")
|
|
return self._fallback_simple_division(image, target_panel_count)
|
|
except Exception as e:
|
|
print(f"Optimized Canny detection failed: {e}")
|
|
return self._fallback_simple_division(image, target_panel_count)
|
|
|
|
# Use results directly (no consensus needed for single method)
|
|
consensus_splits = method_results
|
|
|
|
# Create split images
|
|
splits = []
|
|
for i, split_bounds in enumerate(consensus_splits):
|
|
x, y, w, h = split_bounds['bounds']
|
|
split_image = image[y:y+h, x:x+w]
|
|
|
|
# Skip extremely small splits (reduced threshold for 14-panel detection)
|
|
if w < 20 or h < 20:
|
|
continue
|
|
|
|
splits.append({
|
|
'image': split_image,
|
|
'bounds': (x, y, w, h),
|
|
'confidence': split_bounds['confidence'],
|
|
'method_votes': split_bounds.get('method_votes', [])
|
|
})
|
|
|
|
if self.debug:
|
|
self._save_debug_visualization(image_path, image, splits)
|
|
|
|
return splits
|
|
|
|
def _enhanced_gradient_analysis(self, image: np.ndarray, target_count: int) -> List[Dict]:
|
|
"""Enhanced gradient peak analysis for panel detection"""
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
height, width = gray.shape
|
|
|
|
# Multi-scale gradient analysis
|
|
separators = []
|
|
scales = [5, 10, 20]
|
|
|
|
for sigma in scales:
|
|
# Smooth the image
|
|
smoothed = cv2.GaussianBlur(gray, (0, 0), sigma)
|
|
|
|
# Calculate vertical gradient (for horizontal separators)
|
|
grad_y = cv2.Sobel(smoothed, cv2.CV_64F, 0, 1, ksize=3)
|
|
|
|
# Project to get horizontal profile
|
|
profile = np.mean(np.abs(grad_y), axis=1)
|
|
|
|
# Find peaks
|
|
prominence = np.std(profile) * 0.5
|
|
peaks, properties = find_peaks(profile, prominence=prominence, distance=height//target_count//2)
|
|
|
|
# Add to separators with confidence based on prominence
|
|
for peak in peaks:
|
|
confidence = properties['prominences'][list(peaks).index(peak)] / np.max(properties['prominences'])
|
|
separators.append({
|
|
'position': peak,
|
|
'confidence': confidence,
|
|
'method': 'gradient_analysis',
|
|
'scale': sigma
|
|
})
|
|
|
|
# Convert separator positions to bounding boxes
|
|
separators.sort(key=lambda x: x['position'])
|
|
|
|
# Create bounds from separators
|
|
bounds = []
|
|
prev_y = 0
|
|
|
|
for sep in separators:
|
|
if sep['position'] > prev_y + height // (target_count * 2): # Minimum panel height
|
|
bounds.append({
|
|
'bounds': (0, prev_y, width, sep['position'] - prev_y),
|
|
'confidence': sep['confidence'],
|
|
'method': 'gradient_analysis'
|
|
})
|
|
prev_y = sep['position']
|
|
|
|
# Add final panel
|
|
if prev_y < height - height // (target_count * 2):
|
|
bounds.append({
|
|
'bounds': (0, prev_y, width, height - prev_y),
|
|
'confidence': 0.8,
|
|
'method': 'gradient_analysis'
|
|
})
|
|
|
|
return bounds
|
|
|
|
def _optimized_canny_detection(self, image: np.ndarray, target_count: int) -> List[Dict]:
|
|
"""Optimized Canny edge detection for panel separators with tuned parameters"""
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
height, width = gray.shape
|
|
|
|
# Exact configuration from tuning results that produced 14 panels
|
|
threshold_set = [(50, 150), (100, 200), (150, 250)]
|
|
morphology_kernel = (3, 1)
|
|
hough_threshold = 1324
|
|
min_line_length = 3530
|
|
max_line_gap = 1059
|
|
|
|
# Multi-threshold Canny detection
|
|
all_edges = []
|
|
for low, high in threshold_set:
|
|
edges = cv2.Canny(gray, low, high)
|
|
|
|
# Morphological operations
|
|
kernel = np.ones(morphology_kernel, np.uint8)
|
|
edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
|
|
all_edges.append(edges)
|
|
|
|
# Combine edge maps
|
|
combined_edges = np.maximum.reduce(all_edges)
|
|
|
|
# Find horizontal lines using Hough transform
|
|
lines = cv2.HoughLinesP(
|
|
combined_edges,
|
|
1,
|
|
np.pi/180,
|
|
threshold=hough_threshold,
|
|
minLineLength=min_line_length,
|
|
maxLineGap=max_line_gap
|
|
)
|
|
|
|
# Filter for horizontal lines
|
|
horizontal_lines = []
|
|
if lines is not None:
|
|
for line in lines:
|
|
x1, y1, x2, y2 = line[0]
|
|
if abs(y2 - y1) < height // 20: # Nearly horizontal
|
|
horizontal_lines.append({
|
|
'y_position': (y1 + y2) // 2,
|
|
'length': abs(x2 - x1),
|
|
'confidence': min(1.0, abs(x2 - x1) / width)
|
|
})
|
|
|
|
# Sort by y position and create bounds
|
|
horizontal_lines.sort(key=lambda x: x['y_position'])
|
|
|
|
bounds = []
|
|
prev_y = 0
|
|
|
|
for line in horizontal_lines:
|
|
y_pos = line['y_position']
|
|
# Use the same threshold that worked in debug script
|
|
if y_pos > prev_y + height // (target_count * 2):
|
|
bounds.append({
|
|
'bounds': (0, prev_y, width, y_pos - prev_y),
|
|
'confidence': line['confidence'],
|
|
'method': 'optimized_canny_detection'
|
|
})
|
|
prev_y = y_pos
|
|
|
|
# Add final panel
|
|
if prev_y < height - height // (target_count * 2):
|
|
bounds.append({
|
|
'bounds': (0, prev_y, width, height - prev_y),
|
|
'confidence': 0.8,
|
|
'method': 'optimized_canny_detection'
|
|
})
|
|
|
|
return bounds
|
|
|
|
def _template_matching_method(self, image: np.ndarray, target_count: int) -> List[Dict]:
|
|
"""Template matching for common panel separators"""
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
height, width = gray.shape
|
|
|
|
# Create separator templates
|
|
templates = []
|
|
|
|
# White horizontal line template
|
|
white_template = np.ones((5, width//4), dtype=np.uint8) * 255
|
|
templates.append(('white_line', white_template))
|
|
|
|
# Black horizontal line template
|
|
black_template = np.zeros((5, width//4), dtype=np.uint8)
|
|
templates.append(('black_line', black_template))
|
|
|
|
# Gutter template (white with black edges)
|
|
gutter_template = np.ones((10, width//4), dtype=np.uint8) * 255
|
|
gutter_template[0, :] = 0
|
|
gutter_template[-1, :] = 0
|
|
templates.append(('gutter', gutter_template))
|
|
|
|
# Find matches for each template
|
|
all_matches = []
|
|
|
|
for template_name, template in templates:
|
|
result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF_NORMED)
|
|
|
|
# Find good matches
|
|
locations = np.where(result >= 0.5)
|
|
|
|
for y, x in zip(locations[0], locations[1]):
|
|
confidence = result[y, x]
|
|
all_matches.append({
|
|
'y_position': y + template.shape[0] // 2,
|
|
'confidence': confidence,
|
|
'template': template_name
|
|
})
|
|
|
|
# Sort by y position and remove duplicates
|
|
all_matches.sort(key=lambda x: x['y_position'])
|
|
|
|
# Merge nearby matches
|
|
merged_matches = []
|
|
for match in all_matches:
|
|
if not merged_matches or match['y_position'] - merged_matches[-1]['y_position'] > height // (target_count * 2):
|
|
merged_matches.append(match)
|
|
else:
|
|
# Keep the one with higher confidence
|
|
if match['confidence'] > merged_matches[-1]['confidence']:
|
|
merged_matches[-1] = match
|
|
|
|
# Create bounds
|
|
bounds = []
|
|
prev_y = 0
|
|
|
|
for match in merged_matches:
|
|
y_pos = match['y_position']
|
|
if y_pos > prev_y + height // (target_count * 2):
|
|
bounds.append({
|
|
'bounds': (0, prev_y, width, y_pos - prev_y),
|
|
'confidence': match['confidence'],
|
|
'method': 'template_matching'
|
|
})
|
|
prev_y = y_pos
|
|
|
|
# Add final panel
|
|
if prev_y < height - height // (target_count * 2):
|
|
bounds.append({
|
|
'bounds': (0, prev_y, width, height - prev_y),
|
|
'confidence': 0.8,
|
|
'method': 'template_matching'
|
|
})
|
|
|
|
return bounds
|
|
|
|
def _contour_analysis_method(self, image: np.ndarray, target_count: int) -> List[Dict]:
|
|
"""Contour-based panel detection"""
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
height, width = gray.shape
|
|
|
|
# Find contours
|
|
edges = cv2.Canny(gray, 50, 150)
|
|
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
# Filter for rectangular contours
|
|
panel_candidates = []
|
|
|
|
for contour in contours:
|
|
# Approximate contour to polygon
|
|
epsilon = 0.02 * cv2.arcLength(contour, True)
|
|
approx = cv2.approxPolyDP(contour, epsilon, True)
|
|
|
|
# Check if it's roughly rectangular (4 corners)
|
|
if len(approx) >= 4:
|
|
x, y, w, h = cv2.boundingRect(contour)
|
|
|
|
# Filter by size and aspect ratio
|
|
if w > width // 4 and h > height // (target_count * 2):
|
|
area = cv2.contourArea(contour)
|
|
rect_area = w * h
|
|
|
|
# Check if it's mostly rectangular
|
|
if area / rect_area > 0.7:
|
|
panel_candidates.append({
|
|
'bounds': (x, y, w, h),
|
|
'confidence': min(1.0, area / rect_area),
|
|
'method': 'contour_analysis'
|
|
})
|
|
|
|
# Sort by y position
|
|
panel_candidates.sort(key=lambda x: x['bounds'][1])
|
|
|
|
# Remove overlapping candidates
|
|
filtered_candidates = []
|
|
for candidate in panel_candidates:
|
|
overlap = False
|
|
for existing in filtered_candidates:
|
|
if self._rectangles_overlap(candidate['bounds'], existing['bounds']):
|
|
overlap = True
|
|
break
|
|
if not overlap:
|
|
filtered_candidates.append(candidate)
|
|
|
|
return filtered_candidates
|
|
|
|
def _texture_analysis_method(self, image: np.ndarray, target_count: int) -> List[Dict]:
|
|
"""Texture-based panel separation"""
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
height, width = gray.shape
|
|
|
|
# Local Binary Pattern analysis
|
|
radius = 3
|
|
n_points = 8 * radius
|
|
lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
|
|
|
|
# Create texture profile by analyzing horizontal strips
|
|
strip_height = height // (target_count * 4)
|
|
texture_profile = []
|
|
|
|
for y in range(0, height - strip_height, strip_height // 2):
|
|
strip = lbp[y:y + strip_height, :]
|
|
texture_variance = np.var(strip)
|
|
texture_profile.append(texture_variance)
|
|
|
|
# Find low-texture regions (potential separators)
|
|
texture_profile = np.array(texture_profile)
|
|
threshold = np.percentile(texture_profile, 25) # Bottom 25%
|
|
|
|
separators = []
|
|
for i, variance in enumerate(texture_profile):
|
|
if variance < threshold:
|
|
y_position = i * strip_height // 2
|
|
confidence = 1.0 - (variance / np.max(texture_profile))
|
|
separators.append({
|
|
'y_position': y_position,
|
|
'confidence': confidence
|
|
})
|
|
|
|
# Merge nearby separators
|
|
merged_separators = []
|
|
for sep in separators:
|
|
if not merged_separators or sep['y_position'] - merged_separators[-1]['y_position'] > height // (target_count * 2):
|
|
merged_separators.append(sep)
|
|
else:
|
|
# Keep the one with higher confidence
|
|
if sep['confidence'] > merged_separators[-1]['confidence']:
|
|
merged_separators[-1] = sep
|
|
|
|
# Create bounds
|
|
bounds = []
|
|
prev_y = 0
|
|
|
|
for sep in merged_separators:
|
|
y_pos = sep['y_position']
|
|
if y_pos > prev_y + height // (target_count * 2):
|
|
bounds.append({
|
|
'bounds': (0, prev_y, width, y_pos - prev_y),
|
|
'confidence': sep['confidence'],
|
|
'method': 'texture_analysis'
|
|
})
|
|
prev_y = y_pos
|
|
|
|
# Add final panel
|
|
if prev_y < height - height // (target_count * 2):
|
|
bounds.append({
|
|
'bounds': (0, prev_y, width, height - prev_y),
|
|
'confidence': 0.8,
|
|
'method': 'texture_analysis'
|
|
})
|
|
|
|
return bounds
|
|
|
|
def _clustering_method(self, image: np.ndarray, target_count: int) -> List[Dict]:
|
|
"""Clustering-based panel detection"""
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
height, width = gray.shape
|
|
|
|
# Downsample for faster processing
|
|
scale_factor = 0.25
|
|
small_height = int(height * scale_factor)
|
|
small_width = int(width * scale_factor)
|
|
small_gray = cv2.resize(gray, (small_width, small_height))
|
|
|
|
# Create feature vectors for each pixel (position + intensity)
|
|
y_coords, x_coords = np.meshgrid(range(small_height), range(small_width), indexing='ij')
|
|
|
|
features = np.column_stack([
|
|
y_coords.flatten(),
|
|
x_coords.flatten(),
|
|
small_gray.flatten()
|
|
])
|
|
|
|
# Normalize features
|
|
features = features.astype(np.float32)
|
|
features[:, 0] /= small_height
|
|
features[:, 1] /= small_width
|
|
features[:, 2] /= 255.0
|
|
|
|
# Apply K-means clustering
|
|
n_clusters = target_count + 1 # +1 for potential separators
|
|
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
|
labels = kmeans.fit_predict(features)
|
|
|
|
# Reshape labels back to image shape
|
|
label_image = labels.reshape(small_height, small_width)
|
|
|
|
# Find horizontal boundaries between clusters
|
|
boundaries = []
|
|
for y in range(1, small_height - 1):
|
|
# Check if this row represents a cluster boundary
|
|
current_clusters = set(label_image[y, :])
|
|
above_clusters = set(label_image[y-1, :])
|
|
below_clusters = set(label_image[y+1, :])
|
|
|
|
# If clusters change significantly, it might be a boundary
|
|
if len(current_clusters.intersection(above_clusters)) < len(current_clusters) * 0.7 or \
|
|
len(current_clusters.intersection(below_clusters)) < len(current_clusters) * 0.7:
|
|
boundaries.append({
|
|
'y_position': int(y / scale_factor),
|
|
'confidence': 0.7
|
|
})
|
|
|
|
# Create bounds from boundaries
|
|
bounds = []
|
|
prev_y = 0
|
|
|
|
for boundary in boundaries:
|
|
y_pos = boundary['y_position']
|
|
if y_pos > prev_y + height // (target_count * 2):
|
|
bounds.append({
|
|
'bounds': (0, prev_y, width, y_pos - prev_y),
|
|
'confidence': boundary['confidence'],
|
|
'method': 'clustering'
|
|
})
|
|
prev_y = y_pos
|
|
|
|
# Add final panel
|
|
if prev_y < height - height // (target_count * 2):
|
|
bounds.append({
|
|
'bounds': (0, prev_y, width, height - prev_y),
|
|
'confidence': 0.8,
|
|
'method': 'clustering'
|
|
})
|
|
|
|
return bounds
|
|
|
|
def _apply_consensus(self, method_results: List[List[Dict]], image: np.ndarray,
|
|
target_count: int) -> List[Dict]:
|
|
"""Apply consensus voting to combine results from multiple methods"""
|
|
height, width = image.shape[:2]
|
|
|
|
# Collect all proposed boundaries
|
|
all_boundaries = []
|
|
|
|
for method_result in method_results:
|
|
for panel in method_result:
|
|
bounds = panel['bounds']
|
|
# Add both top and bottom boundaries
|
|
all_boundaries.append({
|
|
'y_position': bounds[1], # Top boundary
|
|
'confidence': panel['confidence'],
|
|
'method': panel['method'],
|
|
'type': 'top'
|
|
})
|
|
all_boundaries.append({
|
|
'y_position': bounds[1] + bounds[3], # Bottom boundary
|
|
'confidence': panel['confidence'],
|
|
'method': panel['method'],
|
|
'type': 'bottom'
|
|
})
|
|
|
|
# Sort boundaries by position
|
|
all_boundaries.sort(key=lambda x: x['y_position'])
|
|
|
|
# Cluster nearby boundaries
|
|
clustered_boundaries = []
|
|
cluster_threshold = height // (target_count * 3)
|
|
|
|
for boundary in all_boundaries:
|
|
# Skip image edges
|
|
if boundary['y_position'] < cluster_threshold or boundary['y_position'] > height - cluster_threshold:
|
|
continue
|
|
|
|
# Find existing cluster or create new one
|
|
added_to_cluster = False
|
|
for cluster in clustered_boundaries:
|
|
if abs(boundary['y_position'] - cluster['y_position']) < cluster_threshold:
|
|
# Add to existing cluster
|
|
cluster['boundaries'].append(boundary)
|
|
# Update cluster position (weighted average)
|
|
total_weight = sum(b['confidence'] for b in cluster['boundaries'])
|
|
cluster['y_position'] = sum(b['y_position'] * b['confidence']
|
|
for b in cluster['boundaries']) / total_weight
|
|
cluster['confidence'] = total_weight / len(cluster['boundaries'])
|
|
added_to_cluster = True
|
|
break
|
|
|
|
if not added_to_cluster:
|
|
clustered_boundaries.append({
|
|
'y_position': boundary['y_position'],
|
|
'confidence': boundary['confidence'],
|
|
'boundaries': [boundary]
|
|
})
|
|
|
|
# Sort clustered boundaries and select best ones
|
|
clustered_boundaries.sort(key=lambda x: x['y_position'])
|
|
|
|
# Filter boundaries based on confidence and target count
|
|
min_confidence = 0.3
|
|
good_boundaries = [b for b in clustered_boundaries if b['confidence'] >= min_confidence]
|
|
|
|
# Limit to reasonable number of boundaries
|
|
if len(good_boundaries) > target_count - 1:
|
|
good_boundaries.sort(key=lambda x: x['confidence'], reverse=True)
|
|
good_boundaries = good_boundaries[:target_count - 1]
|
|
good_boundaries.sort(key=lambda x: x['y_position'])
|
|
|
|
# Create final panel bounds
|
|
final_bounds = []
|
|
prev_y = 0
|
|
|
|
for boundary in good_boundaries:
|
|
y_pos = int(boundary['y_position'])
|
|
if y_pos > prev_y + height // (target_count * 2):
|
|
method_votes = [b['method'] for b in boundary['boundaries']]
|
|
final_bounds.append({
|
|
'bounds': (0, prev_y, width, y_pos - prev_y),
|
|
'confidence': boundary['confidence'],
|
|
'method_votes': method_votes
|
|
})
|
|
prev_y = y_pos
|
|
|
|
# Add final panel
|
|
if prev_y < height - height // (target_count * 2):
|
|
final_bounds.append({
|
|
'bounds': (0, prev_y, width, height - prev_y),
|
|
'confidence': 0.8,
|
|
'method_votes': ['consensus']
|
|
})
|
|
|
|
return final_bounds
|
|
|
|
def _fallback_simple_division(self, image: np.ndarray, target_count: int) -> List[Dict]:
|
|
"""Fallback method: simple equal division"""
|
|
height, width = image.shape[:2]
|
|
panel_height = height // target_count
|
|
|
|
splits = []
|
|
for i in range(target_count):
|
|
y = i * panel_height
|
|
h = panel_height if i < target_count - 1 else height - y
|
|
|
|
splits.append({
|
|
'image': image[y:y+h, :],
|
|
'bounds': (0, y, width, h),
|
|
'confidence': 0.5,
|
|
'method_votes': ['simple_division']
|
|
})
|
|
|
|
return splits
|
|
|
|
def _match_split_basic(self, split_path: str, master_images: List[str]) -> List[Dict]:
|
|
"""Basic matching using OpenCV features (fallback)"""
|
|
matches = []
|
|
|
|
try:
|
|
# Load the split image
|
|
split_img = cv2.imread(split_path, cv2.IMREAD_GRAYSCALE)
|
|
if split_img is None:
|
|
return matches
|
|
|
|
# Initialize feature detector
|
|
akaze = cv2.AKAZE_create()
|
|
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)
|
|
|
|
# Detect keypoints and descriptors for split image
|
|
kp_split, des_split = akaze.detectAndCompute(split_img, None)
|
|
|
|
if des_split is None:
|
|
return matches
|
|
|
|
# Load master images from the master_images directory
|
|
master_images_path = Path("master_images")
|
|
|
|
for master_id in master_images:
|
|
master_path = master_images_path / f"{master_id}.jpg"
|
|
if not master_path.exists():
|
|
continue
|
|
|
|
# Load master image
|
|
master_img = cv2.imread(str(master_path), cv2.IMREAD_GRAYSCALE)
|
|
if master_img is None:
|
|
continue
|
|
|
|
# Detect keypoints and descriptors for master image
|
|
kp_master, des_master = akaze.detectAndCompute(master_img, None)
|
|
|
|
if des_master is None:
|
|
continue
|
|
|
|
# Match features
|
|
matches_raw = bf.knnMatch(des_split, des_master, k=2)
|
|
|
|
# Apply Lowe's ratio test
|
|
good_matches = []
|
|
for match_pair in matches_raw:
|
|
if len(match_pair) == 2:
|
|
m, n = match_pair
|
|
if m.distance < 0.7 * n.distance:
|
|
good_matches.append(m)
|
|
|
|
# If we have enough good matches, try to find homography
|
|
if len(good_matches) >= 10:
|
|
src_pts = np.float32([kp_split[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
|
|
dst_pts = np.float32([kp_master[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
|
|
|
|
try:
|
|
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
|
|
if M is not None:
|
|
inliers = int(np.sum(mask))
|
|
inlier_ratio = inliers / len(good_matches)
|
|
|
|
# Basic confidence scoring
|
|
if inliers >= 15 and inlier_ratio >= 0.6:
|
|
confidence = 'high'
|
|
elif inliers >= 8 and inlier_ratio >= 0.4:
|
|
confidence = 'medium'
|
|
else:
|
|
confidence = 'low'
|
|
|
|
# Only include medium and high confidence matches
|
|
if confidence in ['medium', 'high']:
|
|
matches.append({
|
|
'master_id': master_id,
|
|
'confidence': confidence,
|
|
'inliers': inliers,
|
|
'match_details': {
|
|
'inliers': inliers,
|
|
'good_matches': len(good_matches),
|
|
'inlier_ratio': round(inlier_ratio, 3)
|
|
}
|
|
})
|
|
except:
|
|
continue
|
|
|
|
except Exception as e:
|
|
print(f"Error in basic matching: {e}")
|
|
|
|
return matches
|
|
|
|
def _deduplicate_matches(self, result: Dict) -> Dict:
|
|
"""Remove duplicate matches, keeping highest confidence ones"""
|
|
if not result['all_matches']:
|
|
return result
|
|
|
|
# Group matches by master_id
|
|
master_groups = {}
|
|
for match in result['all_matches']:
|
|
master_id = match['master_id']
|
|
if master_id not in master_groups:
|
|
master_groups[master_id] = []
|
|
master_groups[master_id].append(match)
|
|
|
|
# Keep only the highest confidence match for each master
|
|
deduplicated_matches = []
|
|
for master_id, matches in master_groups.items():
|
|
best_match = max(matches, key=lambda x: x.get('confidence', 0))
|
|
deduplicated_matches.append(best_match)
|
|
|
|
result['all_matches'] = deduplicated_matches
|
|
result['detected_masters'] = [match['master_id'] for match in deduplicated_matches]
|
|
|
|
return result
|
|
|
|
def _rectangles_overlap(self, rect1: Tuple[int, int, int, int],
|
|
rect2: Tuple[int, int, int, int]) -> bool:
|
|
"""Check if two rectangles overlap"""
|
|
x1, y1, w1, h1 = rect1
|
|
x2, y2, w2, h2 = rect2
|
|
|
|
return not (x1 + w1 < x2 or x2 + w2 < x1 or y1 + h1 < y2 or y2 + h2 < y1)
|
|
|
|
def _save_debug_visualization(self, image_path: str, image: np.ndarray,
|
|
splits: List[Dict]) -> None:
|
|
"""Save debug visualization of the splitting results"""
|
|
if not self.debug:
|
|
return
|
|
|
|
base_name = os.path.splitext(os.path.basename(image_path))[0]
|
|
|
|
# Create visualization with boundaries
|
|
vis_image = image.copy()
|
|
|
|
for i, split in enumerate(splits):
|
|
x, y, w, h = split['bounds']
|
|
|
|
# Draw rectangle
|
|
cv2.rectangle(vis_image, (x, y), (x + w, y + h), (0, 255, 0), 2)
|
|
|
|
# Add label
|
|
label = f"Panel {i+1} ({split['confidence']:.2f})"
|
|
cv2.putText(vis_image, label, (x + 5, y + 20),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
|
|
|
|
# Save visualization
|
|
vis_path = os.path.join(self.debug_dir, f"{base_name}_splits.jpg")
|
|
cv2.imwrite(vis_path, vis_image)
|
|
|
|
# Save individual splits
|
|
for i, split in enumerate(splits):
|
|
split_path = os.path.join(self.debug_dir, f"{base_name}_split_{i+1}.jpg")
|
|
cv2.imwrite(split_path, split['image']) |