master_adapt_detect/test_horizontal_splitting.py

#!/usr/bin/env python3
import cv2
import numpy as np
from pathlib import Path
import os

def analyze_horizontal_panels(image_path: str, debug=True):
    """Analyze horizontal panel structure and test splitting parameters"""
    print(f"Analyzing {Path(image_path).name}")

    # Load image
    img = cv2.imread(image_path)
    height, width = img.shape[:2]
    print(f"Image dimensions: {width}x{height}")

    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Method 1: Simple equal division
    print("\n=== Method 1: Equal Division ===")
    equal_panels = []
    for num_panels in [8, 10, 12]:
        panel_width = width // num_panels
        panels = []
        for i in range(num_panels):
            x1 = i * panel_width
            x2 = (i + 1) * panel_width if i < num_panels - 1 else width
            panels.append((x1, 0, x2, height))
        equal_panels.append((num_panels, panels))
        print(f"  {num_panels} panels: widths = {[x2-x1 for x1, y1, x2, y2 in panels]}")

    # Method 2: Edge-based detection
    print("\n=== Method 2: Edge Detection ===")
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)

    # Create vertical line detection kernel
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, height // 4))
    vertical_lines = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, vertical_kernel)

    # Get vertical projection
    vertical_projection = np.sum(vertical_lines, axis=0)

    # Find peaks for different thresholds
    for threshold_factor in [0.3, 0.4, 0.5, 0.6, 0.7]:
        threshold = np.max(vertical_projection) * threshold_factor
        separators = []

        for x in range(width):
            if vertical_projection[x] > threshold:
                # Ensure minimum distance between separators
                if not separators or x - separators[-1] > 30:
                    separators.append(x)

        # Create panels from separators
        x_boundaries = [0] + separators + [width]
        x_boundaries = sorted(list(set(x_boundaries)))

        panels = []
        for i in range(len(x_boundaries) - 1):
            x1, x2 = x_boundaries[i], x_boundaries[i + 1]
            if x2 - x1 > 50:  # Minimum panel width
                panels.append((x1, 0, x2, height))

        print(f"  Threshold {threshold_factor}: {len(panels)} panels, widths = {[x2-x1 for x1, y1, x2, y2 in panels]}")

    # Method 3: Adaptive histogram analysis
    print("\n=== Method 3: Histogram Analysis ===")

    # Calculate horizontal histogram (sum of pixel intensities)
    horizontal_hist = np.sum(gray, axis=0)

    # Find local minima (potential separators)
    from scipy.signal import find_peaks

    # Invert histogram to find valleys (separators)
    inverted_hist = np.max(horizontal_hist) - horizontal_hist

    # Find peaks in inverted histogram (valleys in original)
    for min_distance in [50, 80, 100, 120]:
        peaks, _ = find_peaks(inverted_hist, distance=min_distance, height=np.max(inverted_hist) * 0.1)

        # Create panels
        x_boundaries = [0] + list(peaks) + [width]
        x_boundaries = sorted(list(set(x_boundaries)))

        panels = []
        for i in range(len(x_boundaries) - 1):
            x1, x2 = x_boundaries[i], x_boundaries[i + 1]
            if x2 - x1 > 50:
                panels.append((x1, 0, x2, height))

        print(f"  Min distance {min_distance}: {len(panels)} panels, widths = {[x2-x1 for x1, y1, x2, y2 in panels]}")

    if debug:
        # Save debug images
        debug_dir = Path("debug_splitting")
        debug_dir.mkdir(exist_ok=True)

        # Save edge detection result
        cv2.imwrite(str(debug_dir / f"{Path(image_path).stem}_edges.jpg"), edges)
        cv2.imwrite(str(debug_dir / f"{Path(image_path).stem}_vertical_lines.jpg"), vertical_lines)

        # Save histogram visualization
        import matplotlib.pyplot as plt
        plt.figure(figsize=(15, 5))
        plt.plot(horizontal_hist)
        plt.title("Horizontal Histogram")
        plt.savefig(debug_dir / f"{Path(image_path).stem}_histogram.png")
        plt.close()

        # Save inverted histogram with peaks
        plt.figure(figsize=(15, 5))
        plt.plot(inverted_hist)
        peaks, _ = find_peaks(inverted_hist, distance=100, height=np.max(inverted_hist) * 0.1)
        plt.scatter(peaks, inverted_hist[peaks], color='red', zorder=5)
        plt.title("Inverted Histogram with Detected Peaks")
        plt.savefig(debug_dir / f"{Path(image_path).stem}_peaks.png")
        plt.close()

        print(f"\nDebug images saved to {debug_dir}/")

    return equal_panels

def test_best_method(image_path: str, expected_panels: int):
    """Test the best splitting method for accurate panel detection"""
    print(f"\n=== Testing Best Method for {expected_panels} expected panels ===")

    img = cv2.imread(image_path)
    height, width = img.shape[:2]
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Combination approach: Use histogram analysis with refinement
    horizontal_hist = np.sum(gray, axis=0)
    inverted_hist = np.max(horizontal_hist) - horizontal_hist

    # Smooth the inverted histogram to reduce noise
    from scipy.ndimage import gaussian_filter1d
    smoothed_hist = gaussian_filter1d(inverted_hist, sigma=10)

    # Find peaks with optimized parameters
    from scipy.signal import find_peaks
    min_distance = width // (expected_panels * 1.5)  # Adaptive distance based on expected panels
    peaks, properties = find_peaks(smoothed_hist,
                                  distance=min_distance,
                                  height=np.max(smoothed_hist) * 0.15,
                                  prominence=np.max(smoothed_hist) * 0.1)

    print(f"Found {len(peaks)} separator peaks with min_distance={min_distance}")

    # Create panels
    x_boundaries = [0] + list(peaks) + [width]
    x_boundaries = sorted(list(set(x_boundaries)))

    panels = []
    for i in range(len(x_boundaries) - 1):
        x1, x2 = x_boundaries[i], x_boundaries[i + 1]
        if x2 - x1 > 50:  # Minimum reasonable panel width
            panels.append((x1, 0, x2, height))

    print(f"Generated {len(panels)} panels:")
    for i, (x1, y1, x2, y2) in enumerate(panels):
        print(f"  Panel {i+1}: x={x1}-{x2} (width={x2-x1})")

    # Save crops for visual verification
    crops_dir = Path("test_crops")
    crops_dir.mkdir(exist_ok=True)

    for i, (x1, y1, x2, y2) in enumerate(panels):
        crop = img[y1:y2, x1:x2]
        crop_path = crops_dir / f"{Path(image_path).stem}_panel_{i+1:02d}.jpg"
        cv2.imwrite(str(crop_path), crop)

    print(f"\nCrops saved to {crops_dir}/ - Check these to verify panel accuracy!")

    return panels

if __name__ == "__main__":
    # Test on the multi-panel layout
    test_image = "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6786500.jpg"

    if os.path.exists(test_image):
        print("Analyzing horizontal panel structure...")
        analyze_horizontal_panels(test_image, debug=True)

        print("\n" + "="*60)
        test_best_method(test_image, expected_panels=10)
    else:
        print(f"Test image not found: {test_image}")