264 lines
No EOL
9.9 KiB
Python
264 lines
No EOL
9.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Improved horizontal splitting algorithm for fashion layout panels
|
|
"""
|
|
import cv2
|
|
import numpy as np
|
|
from pathlib import Path
|
|
import os
|
|
from scipy.ndimage import gaussian_filter1d
|
|
from scipy.signal import find_peaks
|
|
|
|
def improved_horizontal_splitting(image_path: str, debug=False):
|
|
"""
|
|
Improved algorithm for horizontal panel detection
|
|
Focuses on major structural separators, not text/content details
|
|
"""
|
|
print(f"\nTesting improved algorithm on: {Path(image_path).name}")
|
|
|
|
# Load image
|
|
img = cv2.imread(image_path)
|
|
height, width = img.shape[:2]
|
|
print(f"Image dimensions: {width}x{height}")
|
|
|
|
# Only process wide images
|
|
if width <= height * 1.2:
|
|
print("Not a wide layout, treating as single panel")
|
|
return [{
|
|
'bbox': (0, 0, width, height),
|
|
'width': width,
|
|
'height': height,
|
|
'crop_id': "single"
|
|
}]
|
|
|
|
# Convert to grayscale
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
|
|
# Method 1: Structural edge detection
|
|
# Focus on strong vertical edges that span most of the height
|
|
edges = cv2.Canny(gray, 30, 100)
|
|
|
|
# Create a tall vertical kernel to detect full-height separators
|
|
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, height // 3))
|
|
vertical_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, vertical_kernel)
|
|
|
|
# Get vertical projection of strong edges
|
|
edge_projection = np.sum(vertical_edges, axis=0)
|
|
|
|
# Method 2: Intensity histogram analysis
|
|
# Look for consistent dark/light vertical bands
|
|
horizontal_hist = np.sum(gray, axis=0)
|
|
|
|
# Smooth both signals
|
|
smoothed_edges = gaussian_filter1d(edge_projection, sigma=15)
|
|
smoothed_hist = gaussian_filter1d(horizontal_hist, sigma=15)
|
|
|
|
# Invert histogram to find valleys (potential separators)
|
|
inverted_hist = np.max(smoothed_hist) - smoothed_hist
|
|
|
|
# Adaptive parameters based on image size
|
|
if width < 2000:
|
|
# Small images: likely 1-2 panels
|
|
min_panel_width = width // 4 # At least 25% of image width per panel
|
|
max_panels = 3
|
|
elif width < 5000:
|
|
# Medium images: likely 2-4 panels
|
|
min_panel_width = width // 6 # At least 16% of image width per panel
|
|
max_panels = 6
|
|
else:
|
|
# Large images: multi-panel layouts
|
|
min_panel_width = width // 12 # At least 8% of image width per panel
|
|
max_panels = 15
|
|
|
|
print(f"Min panel width: {min_panel_width}px, Max panels: {max_panels}")
|
|
|
|
# Find separator candidates using both methods
|
|
edge_threshold = np.max(smoothed_edges) * 0.4 # Strong edges only
|
|
hist_threshold = np.max(inverted_hist) * 0.3 # Significant valleys only
|
|
|
|
# Edge-based separators
|
|
edge_peaks, _ = find_peaks(smoothed_edges,
|
|
distance=min_panel_width,
|
|
height=edge_threshold,
|
|
prominence=np.max(smoothed_edges) * 0.2)
|
|
|
|
# Histogram-based separators
|
|
hist_peaks, _ = find_peaks(inverted_hist,
|
|
distance=min_panel_width,
|
|
height=hist_threshold,
|
|
prominence=np.max(inverted_hist) * 0.15)
|
|
|
|
print(f"Edge peaks: {len(edge_peaks)}, Histogram peaks: {len(hist_peaks)}")
|
|
|
|
# Combine and validate separators
|
|
all_separators = set(edge_peaks) | set(hist_peaks)
|
|
|
|
# Filter separators that are too close to image boundaries
|
|
boundary_margin = width * 0.05 # 5% margin from edges
|
|
valid_separators = [s for s in all_separators
|
|
if boundary_margin < s < width - boundary_margin]
|
|
|
|
# Sort separators
|
|
valid_separators = sorted(valid_separators)
|
|
|
|
# Remove separators that are too close to each other
|
|
final_separators = []
|
|
for sep in valid_separators:
|
|
if not final_separators or sep - final_separators[-1] >= min_panel_width:
|
|
final_separators.append(sep)
|
|
|
|
# Limit to reasonable number of panels
|
|
if len(final_separators) >= max_panels:
|
|
# Keep only the strongest separators
|
|
separator_scores = []
|
|
for sep in final_separators:
|
|
edge_score = smoothed_edges[sep] if sep < len(smoothed_edges) else 0
|
|
hist_score = inverted_hist[sep] if sep < len(inverted_hist) else 0
|
|
combined_score = edge_score + hist_score
|
|
separator_scores.append((sep, combined_score))
|
|
|
|
# Sort by score and take top ones
|
|
separator_scores.sort(key=lambda x: x[1], reverse=True)
|
|
final_separators = [s[0] for s in separator_scores[:max_panels-1]]
|
|
final_separators.sort()
|
|
|
|
print(f"Final separators: {final_separators}")
|
|
|
|
# Create crops
|
|
x_boundaries = [0] + final_separators + [width]
|
|
crops = []
|
|
|
|
for i in range(len(x_boundaries) - 1):
|
|
x1, x2 = x_boundaries[i], x_boundaries[i + 1]
|
|
|
|
# Ensure minimum panel width
|
|
if x2 - x1 >= min_panel_width:
|
|
crops.append({
|
|
'bbox': (x1, 0, x2, height),
|
|
'width': x2 - x1,
|
|
'height': height,
|
|
'crop_id': f"panel_{i}"
|
|
})
|
|
|
|
print(f"Generated {len(crops)} panels")
|
|
|
|
# Debug visualization
|
|
if debug:
|
|
debug_dir = Path("debug_improved")
|
|
debug_dir.mkdir(exist_ok=True)
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
fig, axes = plt.subplots(4, 1, figsize=(15, 12))
|
|
|
|
# Original image
|
|
axes[0].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
|
axes[0].set_title("Original Image")
|
|
for sep in final_separators:
|
|
axes[0].axvline(x=sep, color='red', linewidth=2)
|
|
|
|
# Edge projection
|
|
axes[1].plot(smoothed_edges)
|
|
axes[1].set_title("Edge Projection (Smoothed)")
|
|
axes[1].axhline(y=edge_threshold, color='red', linestyle='--', alpha=0.7)
|
|
for sep in edge_peaks:
|
|
axes[1].axvline(x=sep, color='red', alpha=0.7)
|
|
|
|
# Histogram analysis
|
|
axes[2].plot(inverted_hist)
|
|
axes[2].set_title("Inverted Histogram (Smoothed)")
|
|
axes[2].axhline(y=hist_threshold, color='red', linestyle='--', alpha=0.7)
|
|
for sep in hist_peaks:
|
|
axes[2].axvline(x=sep, color='blue', alpha=0.7)
|
|
|
|
# Final result
|
|
axes[3].plot(smoothed_edges, label='Edges', alpha=0.7)
|
|
axes[3].plot(inverted_hist, label='Histogram', alpha=0.7)
|
|
axes[3].set_title("Combined Analysis with Final Separators")
|
|
for sep in final_separators:
|
|
axes[3].axvline(x=sep, color='red', linewidth=2, label='Final Separator')
|
|
axes[3].legend()
|
|
|
|
plt.tight_layout()
|
|
debug_file = debug_dir / f"{Path(image_path).stem}_analysis.png"
|
|
plt.savefig(debug_file, dpi=150, bbox_inches='tight')
|
|
plt.close()
|
|
|
|
print(f"Debug visualization saved: {debug_file}")
|
|
|
|
return crops
|
|
|
|
def test_improved_algorithm():
|
|
"""Test the improved algorithm on various layouts"""
|
|
|
|
test_cases = [
|
|
# Single panels
|
|
{"path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6785934.jpg", "expected": 1, "type": "Single"},
|
|
{"path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6813573.jpg", "expected": 1, "type": "Single"},
|
|
|
|
# Double panels
|
|
{"path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6785852.jpg", "expected": 2, "type": "Double"},
|
|
|
|
# 4-panel layouts
|
|
{"path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6799150.jpg", "expected": 4, "type": "4-Panel"},
|
|
{"path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6813643.jpg", "expected": 4, "type": "4-Panel"},
|
|
|
|
# Multi-panel layouts
|
|
{"path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6791144.jpg", "expected": 8, "type": "Multi-Panel"},
|
|
{"path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6786505.jpg", "expected": 10, "type": "Multi-Panel"},
|
|
]
|
|
|
|
print("TESTING IMPROVED HORIZONTAL SPLITTING ALGORITHM")
|
|
print("="*60)
|
|
|
|
results = []
|
|
crops_dir = Path("improved_crops")
|
|
crops_dir.mkdir(exist_ok=True)
|
|
|
|
for test_case in test_cases:
|
|
if not os.path.exists(test_case["path"]):
|
|
print(f"⚠️ File not found: {test_case['path']}")
|
|
continue
|
|
|
|
crops = improved_horizontal_splitting(test_case["path"], debug=True)
|
|
|
|
# Save crop previews
|
|
img = cv2.imread(test_case["path"])
|
|
base_name = Path(test_case["path"]).stem
|
|
|
|
for i, crop in enumerate(crops):
|
|
x1, y1, x2, y2 = crop['bbox']
|
|
cropped = img[y1:y2, x1:x2]
|
|
crop_filename = f"{base_name}_improved_crop{i+1:02d}.jpg"
|
|
cv2.imwrite(str(crops_dir / crop_filename), cropped)
|
|
|
|
# Analyze result
|
|
detected = len(crops)
|
|
expected = test_case["expected"]
|
|
accurate = abs(detected - expected) <= 1
|
|
|
|
status = "✅" if accurate else "❌"
|
|
print(f"{status} {base_name}: {detected}/{expected} panels ({test_case['type']})")
|
|
|
|
results.append({
|
|
"file": base_name,
|
|
"type": test_case["type"],
|
|
"expected": expected,
|
|
"detected": detected,
|
|
"accurate": accurate
|
|
})
|
|
|
|
# Summary
|
|
print(f"\n{'='*60}")
|
|
print("IMPROVED ALGORITHM SUMMARY")
|
|
print(f"{'='*60}")
|
|
|
|
accurate_count = sum(1 for r in results if r["accurate"])
|
|
total_count = len(results)
|
|
|
|
print(f"Accurate results: {accurate_count}/{total_count} ({accurate_count/total_count*100:.1f}%)")
|
|
print(f"Crop previews saved to: {crops_dir}/")
|
|
print(f"Debug visualizations saved to: debug_improved/")
|
|
|
|
if __name__ == "__main__":
|
|
test_improved_algorithm() |