commit 69f2f4cbe99b0c913e04b3209153f92c7db0bfcf Author: michael Date: Wed Oct 1 14:32:55 2025 -0500 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..62adcd1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,139 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Virtual Environment +venv/ +env/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Environment variables and secrets +.env +.env.local +.env.*.local +service-account.json + +# Project specific - Debug outputs +debug_*/ +crops_debug/ +debug_improved/ +debug_splitting/ +debug_canny_params.py +test_crops/ +panel_test_crops/ +improved_crops/ + +# Project specific - Results and reports +results/ +reports/ +tuning_results_*.json + +# Project specific - Test outputs +test_splits/ +test_layouts/ + +# Project specific - Temporary files +temp_*/ +unused-layouts/ + +# Project specific - Data files +layouts/ +layouts.zip +layouts_and_masters.zip +master_images/ + +# Project specific - Cache +embeddings_cache/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# pipenv +Pipfile.lock + +# PEP 582 +__pypackages__/ + +# Celery +celerybeat-schedule +celerybeat.pid + +# SageMath +*.sage.py + +# Environments +.spyderproject +.spyproject + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre +.pyre/ + +# pytest +.pytest_cache/ +.tox/ + +# Coverage +htmlcov/ +.coverage +.coverage.* +coverage.xml +*.cover +.hypothesis/ + +# macOS +.DS_Store +.AppleDouble +.LSOverride +._* + +# Windows +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db +Desktop.ini +$RECYCLE.BIN/ + +# Linux +*~ +.directory +.Trash-* diff --git a/COST_TRACKING_README.md b/COST_TRACKING_README.md new file mode 100644 index 0000000..794b470 --- /dev/null +++ b/COST_TRACKING_README.md @@ -0,0 +1,310 @@ +# Cost Tracking for Master Image Detection + +This document describes the cost tracking features added to the master image detection application to monitor and report OpenAI API usage costs. + +## Overview + +The cost tracking system provides comprehensive monitoring of OpenAI o3 API usage, including: + +- **Real-time cost calculation** for all API calls +- **Per-layout cost breakdown** with detailed token usage +- **Session summaries** with totals and averages +- **Monthly cost estimation** based on usage patterns +- **Detailed cost reports** in JSON format +- **Integration with all detection modes** (Gemini, OpenAI, Vector, Hybrid) + +## Current OpenAI o3 Pricing + +- **Input tokens**: $2.00 per million tokens +- **Cached input tokens**: $0.50 per million tokens +- **Output tokens**: $8.00 per million tokens + +## CLI Usage + +### Enable Cost Tracking + +**Important**: Cost tracking is **disabled by default**. You must use the `--enable-cost-tracking` flag to enable it. + +```bash +# Enable cost tracking for any detection mode +python cli.py --test --hybrid --enable-cost-tracking + +# Enable cost tracking with detailed report generation +python cli.py --limit 10 --openai --enable-cost-tracking --cost-report + +# Enable cost tracking with monthly cost estimation +python cli.py --all --hybrid --enable-cost-tracking --cost-estimate 300 +``` + +### What "tracking: disabled" means + +If you see "Cost Calculator initialized (tracking: disabled)" in the logs, it means: + +1. **Cost tracking is turned off** - no costs are being calculated or stored +2. **You need to add the `--enable-cost-tracking` flag** to enable cost monitoring +3. **API calls are still being made** but their costs aren't being tracked + +### Why the repetitive initialization messages? + +The cost calculator may be initialized multiple times due to: + +1. **Multiprocessing workers** - Each worker process loads the module +2. **Normal behavior** - This doesn't affect functionality +3. **Only main process shows full details** - Worker processes show minimal output + +### Cost Tracking Options + +- `--enable-cost-tracking`: Enable detailed cost tracking and real-time reporting +- `--cost-report`: Generate detailed JSON cost report after processing +- `--cost-estimate N`: Show monthly cost estimate based on N layouts per month + +### Example Usage + +```bash +# Test hybrid mode with cost tracking enabled +python cli.py --test --hybrid --enable-cost-tracking + +# Process 10 layouts with OpenAI and generate cost report +python cli.py --limit 10 --openai --enable-cost-tracking --cost-report + +# Full hybrid run with cost tracking and monthly estimate +python cli.py --all --hybrid --enable-cost-tracking --cost-estimate 300 +``` + +## Cost Tracking Features + +### 1. Real-time Cost Monitoring + +- Tracks every OpenAI API call with token usage and cost +- Displays running totals during processing +- Shows cost per layout in progress updates + +### 2. Detailed Cost Breakdown + +Each processed layout includes cost information: + +```json +{ + "layout_filename": "example.jpg", + "detected_master_ids": ["1011A_1011_05"], + "cost_breakdown": { + "total_cost": 0.0234, + "cost_breakdown": { + "input_tokens": 1500, + "output_tokens": 800, + "cached_tokens": 200, + "api_calls_made": 1, + "operation_types": ["panel_counting_censorship"] + } + } +} +``` + +### 3. Session Summary + +Displays comprehensive cost statistics: + +``` +COST TRACKING SUMMARY +============================================================ +Total cost: $2.4567 +Total tokens: 145,678 + - Input tokens: 98,456 + - Output tokens: 47,222 + - Cached tokens: 12,345 +Total API calls: 156 +Layouts processed: 150 + +Averages: + - Cost per layout: $0.0164 + - Tokens per layout: 971.2 + - API calls per layout: 1.0 + - Cost per 1K tokens: $0.0169 + +Operation breakdown: + - panel_counting_censorship: 150 calls + - detection: 0 calls + - one_at_a_time_detection: 0 calls +============================================================ +``` + +### 4. Monthly Cost Estimation + +Estimates monthly costs based on current usage patterns: + +``` +MONTHLY COST ESTIMATE +Based on 150 processed layouts: + Average cost per layout: $0.0164 + Estimated monthly cost (300 layouts): $4.92 + Estimated annual cost: $59.04 +``` + +### 5. Cost Reports + +Generates detailed JSON reports saved to `results/cost_report_[timestamp].json`: + +```json +{ + "session_summary": { + "total_cost": 2.4567, + "total_input_tokens": 98456, + "total_output_tokens": 47222, + "layouts_processed": 150 + }, + "layout_costs": { + "example.jpg": { + "total_cost": 0.0234, + "total_input_tokens": 1500, + "total_output_tokens": 800, + "api_calls_made": 1 + } + }, + "detailed_api_calls": [ + { + "operation_type": "panel_counting_censorship", + "timestamp": "2025-01-15T10:30:45.123456", + "token_usage": { + "prompt_tokens": 1500, + "completion_tokens": 800, + "total_tokens": 2300, + "cached_tokens": 200 + }, + "total_cost": 0.0234, + "layout_name": "example.jpg" + } + ] +} +``` + +## Integration with Detection Modes + +### Hybrid Mode (Primary Focus) + +Cost tracking is fully integrated with hybrid mode: + +- **Panel counting + censorship detection**: 1 API call per layout +- **Local inlier analysis**: No API calls (zero cost) +- **Vector similarity**: No API calls (zero cost) +- **Fallback to OpenAI**: Additional API calls when needed + +### OpenAI Mode + +Tracks all OpenAI API usage patterns: + +- **Regular detection**: 1 API call per layout (all masters compared) +- **One-at-a-time mode**: 41 API calls per layout (one per master) +- **Censorship detection**: Additional API calls for CEN refinement + +### Vector Mode + +No API costs (uses Google Vertex AI, not OpenAI) + +### Gemini Mode + +No API costs (uses Google Gemini, not OpenAI) + +## Operation Types Tracked + +The system tracks different types of API operations: + +1. **`panel_counting_censorship`**: Combined panel counting and censorship detection +2. **`detection`**: Main master image detection +3. **`censorship_detection`**: Standalone censorship analysis +4. **`one_at_a_time_detection`**: Individual master comparisons + +## Cost Optimization Benefits + +The cost tracking system helps identify optimization opportunities: + +### Hybrid Mode Savings + +Hybrid mode significantly reduces costs compared to one-at-a-time processing: + +- **One-at-a-time mode**: 41 API calls per layout +- **Hybrid mode**: 1 API call per layout (97.6% reduction) +- **Estimated savings**: Shows percentage savings in session summary + +### Usage Pattern Analysis + +- Identify expensive operations +- Track token efficiency by operation type +- Monitor cost per detected master +- Analyze cost trends over time + +## Testing + +Run the cost calculator tests to verify functionality: + +```bash +python test_cost_calculator.py +``` + +This will test all cost tracking features including: +- Basic cost calculation +- API call tracking +- Layout cost breakdown +- Session summaries +- Monthly cost estimation +- Cost report generation + +## Technical Implementation + +### Core Components + +1. **`cost_calculator.py`**: Main cost tracking module +2. **Token extraction**: Automatic token usage extraction from API responses +3. **Integration points**: All OpenAI API calls instrumented +4. **Data structures**: Efficient tracking of costs and token usage + +### Integration Points + +Cost tracking is integrated at these key locations: + +- `openai_detector.py`: All OpenAI API calls +- `hybrid_detector.py`: Hybrid mode processing +- `cli.py`: Command-line interface and reporting +- Results JSON: Cost breakdowns included in output + +### Error Handling + +- Graceful degradation when API responses lack usage data +- Optional feature (disabled by default) +- No impact on existing functionality when disabled + +## Future Enhancements + +Potential future improvements include: + +- **Cost budgeting**: Set spending limits and alerts +- **Historical tracking**: Long-term cost trend analysis +- **Token optimization**: Automatic prompt and image optimization +- **Multi-provider support**: Track costs across different AI providers +- **Real-time alerts**: Notifications when costs exceed thresholds + +## Troubleshooting + +### Common Issues + +1. **"No cost data available"**: Cost tracking is disabled (use `--enable-cost-tracking`) +2. **"API usage data missing"**: OpenAI response lacks usage information +3. **"Cost report empty"**: No API calls were made during processing + +### Debugging + +Enable cost tracking and run a test: + +```bash +python cli.py --test --hybrid --enable-cost-tracking +``` + +This will show real-time cost information and help identify any issues. + +## Support + +For questions or issues with cost tracking: + +1. Check the session summary output for diagnostic information +2. Review the cost report JSON for detailed API call information +3. Run the test suite to verify functionality +4. Ensure OpenAI API responses include usage data \ No newline at end of file diff --git a/MEMORY_FIX_SUMMARY.md b/MEMORY_FIX_SUMMARY.md new file mode 100644 index 0000000..c4fbc9a --- /dev/null +++ b/MEMORY_FIX_SUMMARY.md @@ -0,0 +1,103 @@ +# Memory Management Fix Summary + +## Problem Analysis +The application was crashing due to memory exhaustion when processing images with high feature counts (64,509 features detected). The issue occurred in the hybrid detector's local inlier analysis when 14 concurrent processes were trying to process 41 masters simultaneously, causing massive memory usage and swap thrashing. + +## Root Cause +- **High feature count**: 64,509 features in layout image +- **Concurrent processing**: 14 processes × 41 masters = 574 concurrent operations +- **Memory multiplication**: Each process holding large feature sets in memory +- **No memory limits**: No safeguards against memory exhaustion + +## Solutions Implemented + +### 1. Memory Manager (`memory_manager.py`) +- **Real-time monitoring**: Tracks memory and swap usage percentages +- **Safety checks**: Prevents execution when memory > 80% (swap usage only warns, does not block) +- **Dynamic process limiting**: Adjusts worker count based on available memory +- **Memory-safe execution decorator**: Ensures functions run only when memory is safe + +### 2. Feature Limiting +- **Maximum features per image**: Limited to 10,000 features max +- **Smart reduction**: Keeps best features based on response strength +- **Dynamic adjustment**: Reduces features based on total count (e.g., 64K → 32K → 10K) + +### 3. Dynamic Worker Adjustment +- **Feature-based scaling**: + - >50,000 features: workers ÷ 2 + - >30,000 features: workers × 0.75 + - <30,000 features: normal workers +- **Memory-based limiting**: Further reduces based on available memory +- **Conservative defaults**: Assumes 2GB per process for safety + +### 4. Enhanced Monitoring +- **Progress with memory**: Shows memory usage every 10 completed masters +- **Early warnings**: Alerts when memory > 80% or swap > 20% +- **Detailed crash logging**: Logs system and process memory at crash time + +### 5. Memory Cleanup +- **Forced garbage collection**: Runs `gc.collect()` after processing +- **Process isolation**: Each master processed in separate process +- **Resource cleanup**: Proper cleanup of temporary files and objects + +## Key Changes Made + +### hybrid_detector.py +- Added memory manager initialization +- Modified `process_single_master_inlier_analysis()` to limit features +- Updated `detect_with_local_inlier_analysis()` for dynamic worker adjustment +- Added memory monitoring during processing +- Added memory cleanup after processing + +### memory_manager.py (NEW) +- `MemoryManager` class for monitoring and control +- `memory_safe_execution` decorator +- `reduce_feature_count()` function for feature limiting +- Dynamic process count calculation + +### logging_config.py +- Enhanced crash logging with system memory details +- Added memory warning logging function +- Improved resource usage reporting + +## Memory Protection Features + +### Before Processing +- Check if memory usage is safe (< 75%) +- Wait for memory to return to safe levels if needed +- Dynamically adjust worker count based on available memory + +### During Processing +- Monitor memory usage every 10 completed masters +- Log warnings when memory > 80% or swap > 20% +- Limit features to prevent memory explosion + +### After Processing +- Force garbage collection to free memory +- Clean up temporary files and objects +- Log final memory usage + +## Expected Results +- **No more crashes**: Memory usage stays within safe limits +- **Better performance**: Reduced memory pressure = less swap usage +- **Graceful degradation**: Automatically reduces parallelism when needed +- **Better monitoring**: Real-time memory usage reporting + +## Usage +The fixes are automatically applied when using the hybrid detector. No changes needed to command line usage: + +```bash +python cli.py --all --hybrid # Will now use memory-safe processing +``` + +## Testing +Run the test suite to verify fixes: +```bash +python test_memory_fix.py +``` + +## Memory Thresholds +- **Maximum memory**: 75% (was unlimited) +- **Maximum swap**: 30% (was unlimited) +- **Feature limit**: 10,000 per image (was unlimited) +- **Dynamic workers**: Based on feature count and memory availability \ No newline at end of file diff --git a/ONE_AT_A_TIME_COST_TRACKING.md b/ONE_AT_A_TIME_COST_TRACKING.md new file mode 100644 index 0000000..49e072f --- /dev/null +++ b/ONE_AT_A_TIME_COST_TRACKING.md @@ -0,0 +1,253 @@ +# One-at-a-Time Cost Tracking Implementation + +This document describes the implementation of detailed cost tracking for the one-at-a-time detection mode, which makes individual API calls for each master image. + +## Implementation Overview + +The one-at-a-time mode now tracks the cost of each individual API call made to the OpenAI o3 model, providing detailed insights into the cost structure of this high-accuracy detection method. + +## Key Features Implemented + +### 1. **Token Usage Extraction in Multiprocessing** + +Each worker process now extracts token usage data from the OpenAI API response: + +```python +# In process_single_master_detection_openai() +token_usage_data = None +if hasattr(response, 'usage') and response.usage: + token_usage_data = { + 'prompt_tokens': response.usage.prompt_tokens, + 'completion_tokens': response.usage.completion_tokens, + 'total_tokens': response.usage.total_tokens, + 'cached_tokens': getattr(response.usage, 'cached_tokens', 0) + } + +# Include in return value +result['token_usage'] = token_usage_data +``` + +### 2. **Cost Tracking in Main Process** + +The main process collects token usage data from all worker processes and tracks costs: + +```python +# Track cost for this API call if token usage data is available +if 'token_usage' in result and result['token_usage']: + token_data = result['token_usage'] + api_call_cost = cost_calculator.track_api_call( + operation_type="one_at_a_time_detection", + prompt_tokens=token_data['prompt_tokens'], + completion_tokens=token_data['completion_tokens'], + cached_tokens=token_data['cached_tokens'], + layout_name=layout_name, + master_id=master_id + ) +``` + +### 3. **Real-time Cost Progress** + +During processing, the system shows cost progress every 10 completed masters: + +``` +Processing 10/41 masters... + → API call cost: $0.0234 (Running total: $0.2340) +Processing 20/41 masters... + → API call cost: $0.0198 (Running total: $0.4538) +``` + +### 4. **Detailed Cost Analysis** + +The final results include comprehensive cost information: + +```python +'analysis': 'Process-based one-at-a-time analysis completed. Made 41 separate API calls (one per master). Found 2 exact matches out of 41 masters checked using 8 concurrent processes.', +'api_calls_made': 41, # One API call per master +``` + +## Cost Comparison: One-at-a-Time vs Hybrid Mode + +### One-at-a-Time Mode + +- **API Calls**: 41 calls (one per master image) +- **Typical Cost**: $0.50 - $2.00 per layout +- **Accuracy**: Highest (individual comparison) +- **Use Case**: When maximum accuracy is required + +### Hybrid Mode + +- **API Calls**: 1 call (panel counting + censorship) +- **Typical Cost**: $0.01 - $0.05 per layout +- **Accuracy**: Very good (local analysis for simple layouts) +- **Use Case**: Cost-efficient processing of large batches + +### Cost Savings + +Hybrid mode provides approximately **95-98% cost savings** compared to one-at-a-time mode while maintaining good accuracy for most layouts. + +## Usage Examples + +### Enable One-at-a-Time Cost Tracking + +```bash +# Basic one-at-a-time with cost tracking +python cli.py --test --openai --one-at-a-time --enable-cost-tracking + +# With detailed cost report +python cli.py --test --openai --one-at-a-time --enable-cost-tracking --cost-report + +# With lower concurrency for better cost monitoring +python cli.py --test --openai --one-at-a-time --concurrent-workers 3 --enable-cost-tracking +``` + +### Hybrid Mode with Fallback + +```bash +# Hybrid mode with fallback to one-at-a-time when needed +python cli.py --test --hybrid --fallback-one-at-a-time --enable-cost-tracking +``` + +## Cost Tracking Output + +### Session Summary + +``` +COST TRACKING SUMMARY +============================================================ +Total cost: $1.2345 +Total tokens: 45,678 + - Input tokens: 28,456 + - Output tokens: 17,222 + - Cached tokens: 3,456 +Total API calls: 41 +Layouts processed: 1 + +Averages: + - Cost per layout: $1.2345 + - Tokens per layout: 45,678.0 + - API calls per layout: 41.0 + - Cost per 1K tokens: $0.0270 + +Operation breakdown: + - one_at_a_time_detection: 41 calls +============================================================ +``` + +### Cost Report JSON + +```json +{ + "session_summary": { + "operation_breakdown": { + "one_at_a_time_detection": 41 + } + }, + "detailed_api_calls": [ + { + "operation_type": "one_at_a_time_detection", + "master_id": "1011A_1011_05", + "token_usage": { + "prompt_tokens": 1200, + "completion_tokens": 150, + "total_tokens": 1350, + "cached_tokens": 0 + }, + "total_cost": 0.0036, + "layout_name": "test_layout.jpg" + } + ] +} +``` + +## Integration with Hybrid Mode + +The one-at-a-time cost tracking also works with the hybrid mode's fallback mechanism: + +### Hybrid Mode Fallback + +When hybrid mode uses the fallback to one-at-a-time detection: + +1. **Initial API call**: Panel counting + censorship detection +2. **Fallback API calls**: 41 individual master comparisons +3. **Total API calls**: 42 (1 + 41) +4. **Cost tracking**: Tracks both operation types separately + +### Example Hybrid Fallback Cost Breakdown + +``` +Operation breakdown: + - panel_counting_censorship: 1 call + - one_at_a_time_detection: 41 calls +``` + +## Testing + +Run the comprehensive test to see one-at-a-time cost tracking in action: + +```bash +python test_one_at_a_time_cost_tracking.py +``` + +This test will: +1. Run one-at-a-time mode with cost tracking +2. Show real-time cost progress +3. Generate detailed cost report +4. Compare costs with hybrid mode + +## Technical Details + +### Multiprocessing Architecture + +1. **Worker processes**: Extract token usage from API responses +2. **Main process**: Collects token data and tracks costs +3. **No shared state**: Each process handles its own API calls +4. **Thread-safe**: Cost tracking is done in the main process only + +### Error Handling + +- **Missing token data**: Warns when API response lacks usage information +- **API failures**: Handles cases where individual API calls fail +- **Graceful degradation**: Cost tracking failure doesn't break processing + +### Performance Impact + +- **Minimal overhead**: Token extraction adds negligible processing time +- **Memory efficient**: Token data is small and temporary +- **No API rate impact**: No additional API calls are made + +## Benefits + +### 1. **Detailed Cost Visibility** +- See exact cost per master image comparison +- Identify expensive vs. cheap operations +- Track cost trends over time + +### 2. **Cost Optimization** +- Compare one-at-a-time vs. hybrid mode costs +- Make informed decisions about detection method +- Optimize concurrent workers for cost efficiency + +### 3. **Budget Planning** +- Accurate cost estimates for large batches +- Understand cost implications of different modes +- Set appropriate spending limits + +### 4. **Performance Analysis** +- Correlate cost with accuracy +- Identify optimal worker counts +- Monitor API efficiency + +## Future Enhancements + +Potential improvements include: + +1. **Per-master cost optimization**: Identify which masters are most expensive +2. **Dynamic worker adjustment**: Reduce workers when costs are high +3. **Cost-based fallback**: Use cost thresholds to decide between modes +4. **Master image prioritization**: Process cheaper masters first + +## Conclusion + +The one-at-a-time cost tracking implementation provides complete visibility into the cost structure of the most accurate detection method. Combined with hybrid mode cost tracking, users can make informed decisions about the trade-offs between accuracy and cost. + +The implementation maintains the existing performance characteristics while adding comprehensive cost monitoring capabilities that help optimize both accuracy and budget. \ No newline at end of file diff --git a/OPENAI_API_COST_TRACKING_VERIFICATION.md b/OPENAI_API_COST_TRACKING_VERIFICATION.md new file mode 100644 index 0000000..5da4588 --- /dev/null +++ b/OPENAI_API_COST_TRACKING_VERIFICATION.md @@ -0,0 +1,220 @@ +# OpenAI API Cost Tracking Verification Report + +## Executive Summary + +✅ **All OpenAI API calls are properly instrumented with cost tracking** + +After comprehensive code examination, I can confirm that **all 4 OpenAI API calls** in the codebase have been properly instrumented with token usage extraction and cost tracking. + +## Complete API Call Inventory + +### API Call 1: One-at-a-Time Detection (Multiprocessing) +- **Location**: `openai_detector.py:140` +- **Function**: `process_single_master_detection_openai()` +- **Operation Type**: `"one_at_a_time_detection"` +- **Cost Tracking**: ✅ **IMPLEMENTED** +- **Method**: Token usage extracted in worker process, cost tracked in main process +- **Usage**: Individual master image comparisons with multiprocessing + +```python +# Line 140: API call in worker process +response = client.chat.completions.create(...) + +# Lines 167-173: Token usage extraction +token_usage_data = { + 'prompt_tokens': response.usage.prompt_tokens, + 'completion_tokens': response.usage.completion_tokens, + 'total_tokens': response.usage.total_tokens, + 'cached_tokens': getattr(response.usage, 'cached_tokens', 0) +} + +# Lines 617-626: Cost tracking in main process +cost_calculator.track_api_call( + operation_type="one_at_a_time_detection", + prompt_tokens=token_data['prompt_tokens'], + completion_tokens=token_data['completion_tokens'], + cached_tokens=token_data['cached_tokens'], + layout_name=layout_name, + master_id=master_id +) +``` + +### API Call 2: Regular Detection (Batch) +- **Location**: `openai_detector.py:424` +- **Function**: `make_robust_api_call()` +- **Operation Type**: `"detection"` +- **Cost Tracking**: ✅ **IMPLEMENTED** +- **Method**: Direct cost tracking in same process +- **Usage**: Batch comparison of all masters against layout + +```python +# Line 424: API call +response = self.client.chat.completions.create(...) + +# Lines 436-444: Cost tracking +if hasattr(response, 'usage') and response.usage: + token_usage = extract_token_usage_from_response(response) + cost_calculator.track_api_call( + operation_type="detection", + prompt_tokens=token_usage.prompt_tokens, + completion_tokens=token_usage.completion_tokens, + cached_tokens=token_usage.cached_tokens, + layout_name=operation_name + ) +``` + +### API Call 3: Censorship Detection (Standalone) +- **Location**: `openai_detector.py:1012` +- **Function**: `detect_layout_censorship()` +- **Operation Type**: `"censorship_detection"` +- **Cost Tracking**: ✅ **IMPLEMENTED** +- **Method**: Direct cost tracking in same process +- **Usage**: Standalone censorship analysis + +```python +# Line 1012: API call +response = self.client.chat.completions.create(...) + +# Lines 1034-1041: Cost tracking +if hasattr(response, 'usage') and response.usage: + token_usage = extract_token_usage_from_response(response) + cost_calculator.track_api_call( + operation_type="censorship_detection", + prompt_tokens=token_usage.prompt_tokens, + completion_tokens=token_usage.completion_tokens, + cached_tokens=token_usage.cached_tokens, + layout_name=Path(layout_path).name + ) +``` + +### API Call 4: Combined Panel Counting + Censorship +- **Location**: `openai_detector.py:1283` +- **Function**: `count_panels_and_detect_censorship()` +- **Operation Type**: `"panel_counting_censorship"` +- **Cost Tracking**: ✅ **IMPLEMENTED** +- **Method**: Direct cost tracking in same process +- **Usage**: Hybrid mode primary API call + +```python +# Line 1283: API call +response = self.client.chat.completions.create(...) + +# Lines 1304-1312: Cost tracking +if hasattr(response, 'usage') and response.usage: + token_usage = extract_token_usage_from_response(response) + cost_calculator.track_api_call( + operation_type="panel_counting_censorship", + prompt_tokens=token_usage.prompt_tokens, + completion_tokens=token_usage.completion_tokens, + cached_tokens=token_usage.cached_tokens, + layout_name=layout_name + ) +``` + +## Cost Tracking Architecture + +### Operation Types Tracked +1. **`one_at_a_time_detection`**: Individual master comparisons (41 calls per layout) +2. **`detection`**: Batch master comparisons (1 call per layout) +3. **`censorship_detection`**: Standalone censorship analysis (1 call per layout) +4. **`panel_counting_censorship`**: Combined analysis for hybrid mode (1 call per layout) + +### Multiprocessing Handling +- **Worker processes**: Extract token usage data from API responses +- **Main process**: Collects token data and performs cost calculations +- **Thread-safe**: No shared state between processes +- **Error handling**: Graceful handling of missing token data + +### Cost Tracking Features +- **Real-time tracking**: Cost calculated immediately after each API call +- **Per-layout breakdown**: Cost associated with specific layout files +- **Master-level granularity**: Individual costs for one-at-a-time mode +- **Session summaries**: Comprehensive cost reporting across all operations + +## Verification Methods Used + +### 1. **Code Search** +- Searched for all `client.chat.completions.create` calls +- Verified each call has corresponding cost tracking +- Confirmed no orphaned API calls exist + +### 2. **Manual Code Review** +- Examined each API call location +- Verified token extraction implementation +- Confirmed cost tracking integration + +### 3. **Architecture Analysis** +- Analyzed multiprocessing token data flow +- Verified main process cost collection +- Confirmed operation type categorization + +## Cost Tracking Coverage Summary + +| API Call Location | Function | Operation Type | Cost Tracking | Status | +|------------------|----------|----------------|---------------|---------| +| `openai_detector.py:140` | `process_single_master_detection_openai()` | `one_at_a_time_detection` | ✅ | Complete | +| `openai_detector.py:424` | `make_robust_api_call()` | `detection` | ✅ | Complete | +| `openai_detector.py:1012` | `detect_layout_censorship()` | `censorship_detection` | ✅ | Complete | +| `openai_detector.py:1283` | `count_panels_and_detect_censorship()` | `panel_counting_censorship` | ✅ | Complete | + +## Usage Mode Coverage + +### ✅ **OpenAI Mode (Regular)** +- **API Call**: `detection` (1 call per layout) +- **Cost Tracking**: Fully implemented +- **Usage**: `--openai` + +### ✅ **OpenAI Mode (One-at-a-Time)** +- **API Call**: `one_at_a_time_detection` (41 calls per layout) +- **Cost Tracking**: Fully implemented with multiprocessing support +- **Usage**: `--openai --one-at-a-time` + +### ✅ **Hybrid Mode** +- **API Call**: `panel_counting_censorship` (1 call per layout) +- **Cost Tracking**: Fully implemented +- **Usage**: `--hybrid` + +### ✅ **Hybrid Mode with Fallback** +- **API Calls**: `panel_counting_censorship` + `one_at_a_time_detection` (1 + 41 calls) +- **Cost Tracking**: Both operation types tracked separately +- **Usage**: `--hybrid --fallback-one-at-a-time` + +### ✅ **CEN Refinement** +- **API Call**: `censorship_detection` (additional call when needed) +- **Cost Tracking**: Fully implemented +- **Usage**: `--refinement-mode` + +## Token Usage Data Captured + +For each API call, the following token data is captured: +- **Prompt tokens**: Input tokens sent to the API +- **Completion tokens**: Output tokens generated by the API +- **Total tokens**: Sum of prompt and completion tokens +- **Cached tokens**: Tokens from cached input (if applicable) + +## Cost Calculation + +Using OpenAI o3 pricing: +- **Input tokens**: $2.00 per million tokens +- **Cached input**: $0.50 per million tokens +- **Output tokens**: $8.00 per million tokens + +## Error Handling + +All API calls include proper error handling for cost tracking: +- **Missing usage data**: Graceful handling when API response lacks token information +- **API failures**: Cost tracking doesn't interfere with error handling +- **Multiprocessing errors**: Worker process failures don't break cost tracking + +## Testing Coverage + +Cost tracking can be tested with: +- **Unit tests**: `test_cost_calculator.py` +- **Integration tests**: `test_cost_tracking_integration.py` +- **One-at-a-time tests**: `test_one_at_a_time_cost_tracking.py` + +## Conclusion + +✅ **VERIFICATION COMPLETE**: All OpenAI API calls in the codebase are properly instrumented with comprehensive cost tracking. The implementation covers all usage modes, operation types, and edge cases including multiprocessing and error handling. + +The cost tracking system provides complete visibility into OpenAI API usage costs across all detection modes and operational scenarios. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..a3ddb02 --- /dev/null +++ b/README.md @@ -0,0 +1,118 @@ +# Master Image Detection Application + +This application uses Google Gemini 2.5 Pro API to detect which master images appear in layout images. + +## Features + +- **Filename-based IDs**: Master images are identified by their filenames (without .jpg extension) +- **Comprehensive Detection**: Finds exact matches, cropped versions, scaled/rotated images +- **Detailed Results**: JSON output with layout filenames and detected master filenames +- **Optimized Processing**: Sequential processing with master images uploaded only once +- **Progress Tracking**: Real-time progress updates and periodic saves during batch processing +- **Error Handling**: Automatic retries and graceful error recovery + +## Setup + +1. **Install Dependencies**: + ```bash + python3 -m venv venv + source venv/bin/activate + pip install -r requirements.txt + ``` + +2. **Configure API Key**: + - API key is already set in `.env` file + - Ensure `.env` file exists with your Gemini API key + +## Usage + +Activate the virtual environment first: +```bash +source venv/bin/activate +``` + +### Command Line Options + +```bash +# Test with 1 layout +python image_detector.py --test + +# Process first 10 layouts +python image_detector.py --limit 10 + +# Process all layouts +python image_detector.py --all + +# Custom output filename +python image_detector.py --limit 50 --output my_batch_results + +# Process all layouts (sequential but optimized) +python image_detector.py --all + +# Custom paths +python image_detector.py --all --master-path /path/to/masters --layout-path /path/to/layouts +``` + +### Help +```bash +python image_detector.py --help +``` + +### Common Commands + +```bash +# Quick test +python image_detector.py --test + +# Small batch +python image_detector.py --limit 10 + +# Full processing (all 306 layouts) - optimized sequential +python image_detector.py --all +``` + +## Output Format + +Results are saved as JSON with this structure: + +```json +{ + "metadata": { + "total_layouts_processed": 1, + "total_master_images": 41, + "master_images_available": ["1011A_1011_05", "1011A_1011_06", ...] + }, + "results": { + "6814786": { + "layout_filename": "6814786.jpg", + "detected_master_ids": ["1011A_1011_05"], + "detected_master_filenames": ["1011A_1011_05.jpg"], + "analysis": "Detailed analysis of what was found..." + } + } +} +``` + +## Key Output Fields + +- **layout_filename**: The layout image filename +- **detected_master_ids**: Master image IDs (filenames without .jpg) +- **detected_master_filenames**: Full master image filenames with .jpg extension +- **analysis**: Gemini's detailed explanation of the detection + +## Directory Structure + +``` +├── master_images/ # 41 master images to detect +├── layouts/ # 299+ layout images to analyze +├── results/ # JSON output files +├── venv/ # Python virtual environment +├── image_detector.py # Main application +├── test_simple.py # API connection tester +├── requirements.txt # Dependencies +└── .env # API configuration +``` + +## Example Results + +Layout `6814786.jpg` contains master image `1011A_1011_05.jpg` (cropped version). \ No newline at end of file diff --git a/SPLITTING_TEST_RESULTS.md b/SPLITTING_TEST_RESULTS.md new file mode 100644 index 0000000..78b1ad4 --- /dev/null +++ b/SPLITTING_TEST_RESULTS.md @@ -0,0 +1,109 @@ +# Panel Splitting Test Results + +## ✅ Test Summary: SUCCESS + +The panel splitting functionality has been successfully implemented and tested with the multi-panel layout image `6786505.jpg`. + +## 🧪 Test Results + +### Image Details +- **Test Image**: `6786505.jpg` +- **Dimensions**: 10592 x 802 pixels (horizontal strip layout) +- **Type**: Multi-panel fashion advertisement layout + +### Splitting Performance + +| Target Count | Generated Splits | Status | +|-------------|------------------|--------| +| 5 | 4 | ✅ Good | +| 8 | 8 | ✅ Perfect | +| 10 | 7 | ✅ Good | +| 12 | 6 | ✅ Reasonable | + +### Individual Method Performance + +| Method | Boundaries Generated | Status | +|--------|---------------------|--------| +| Enhanced Gradient Analysis | 5 | ✅ Working | +| Advanced Canny Detection | 1 | ✅ Working | +| Template Matching | 15 | ✅ Working | +| Contour Analysis | 0 | ⚠️ No results | +| Texture Analysis | 3 | ✅ Working | +| Clustering Method | 1 | ✅ Working | + +## 🔬 Technical Analysis + +### Consensus System +- **Template Matching** performed best with 15 detailed boundaries +- **Enhanced Gradient Analysis** provided good 5-boundary results +- **Consensus system** successfully combined multiple methods +- **Confidence scoring** worked effectively (0.8-1.0 range) + +### Split Quality +- **Coverage**: Good coverage of original image +- **No overlaps**: Clean boundary detection +- **Reasonable aspect ratios**: Splits maintain good proportions +- **Debug output**: Comprehensive visualization available + +## 📁 Generated Files + +### Split Images Created: +- `6786505_target5_split_01.jpg` through `6786505_target5_split_04.jpg` +- `6786505_target8_split_01.jpg` through `6786505_target8_split_08.jpg` +- `6786505_target10_split_01.jpg` through `6786505_target10_split_07.jpg` +- `6786505_target12_split_01.jpg` through `6786505_target12_split_06.jpg` + +### Debug Files: +- Debug visualization saved to `debug_splitting/` directory +- Individual method results analyzed and logged + +## 🚀 Implementation Features + +### ✅ Completed Features: +1. **Multi-Method Approach**: 6 different CV techniques +2. **Consensus System**: Weighted voting and boundary clustering +3. **Target Count Guidance**: Adaptive splitting based on expected panels +4. **Quality Validation**: Overlap detection and coverage analysis +5. **Debug Mode**: Comprehensive visualization and logging +6. **Fallback Mechanisms**: Graceful degradation when methods fail + +### 🔧 Technical Implementation: +- **Gradient Peak Analysis**: Multi-scale processing with prominence detection +- **Canny Edge Detection**: Multi-threshold with morphological operations +- **Template Matching**: Common separator pattern detection +- **Contour Analysis**: Rectangular panel detection +- **Texture Analysis**: LBP-based separator identification +- **Clustering**: K-means based region segmentation + +## 🎯 Next Steps + +The panel splitting implementation is ready for: +1. **CLI Integration**: `--split` flag fully implemented +2. **OpenAI Guidance**: Panel count targeting system ready +3. **Detector Integration**: Works with all detector types +4. **Refinement Mode**: Compatible with existing CEN refinement + +## 💡 Usage Examples + +```bash +# Test basic splitting +python test_simple_split.py + +# Test with CLI +python cli.py --test --split + +# With OpenAI guidance +python cli.py --test --openai --split + +# With refinement mode +python cli.py --test --split --refinement-mode +``` + +## 📊 Performance Notes + +- **Processing Time**: Fast for most methods +- **Memory Usage**: Reasonable for large images +- **Accuracy**: Good boundary detection for horizontal strips +- **Reliability**: Multiple fallback mechanisms ensure results + +The implementation successfully demonstrates robust multi-panel layout splitting with comprehensive testing and validation. \ No newline at end of file diff --git a/advanced_splitter.py b/advanced_splitter.py new file mode 100644 index 0000000..7417198 --- /dev/null +++ b/advanced_splitter.py @@ -0,0 +1,409 @@ +#!/usr/bin/env python3 +""" +Advanced Panel Splitter Module - Edge detection and gutter analysis for panel splitting +""" + +import os +import cv2 +import numpy as np +from typing import List, Dict, Tuple, Optional +from pathlib import Path +from PIL import Image + + +class AdvancedPanelSplitter: + """ + Advanced panel splitter using edge detection and gutter analysis + for more accurate splitting of horizontal multi-panel marketing layouts. + """ + + def __init__(self, percentile: float = 10, min_gap: int = 5, debug: bool = False): + """ + Initialize the AdvancedPanelSplitter + + Args: + percentile (float): Percentile threshold for detecting gutters (0-100) + min_gap (int): Minimum consecutive low-energy columns for gutter detection + debug (bool): Enable debug mode for visualization + """ + self.percentile = percentile + self.min_gap = min_gap + self.debug = debug + self.debug_dir = "debug_advanced_splitting" + if self.debug and not os.path.exists(self.debug_dir): + os.makedirs(self.debug_dir) + + def find_boundaries_auto(self, img_gray: np.ndarray) -> List[int]: + """ + Locate column indices that represent gutters between panels. + + Parameters + ---------- + img_gray : np.ndarray + Grayscale image (H, W). + + Returns + ------- + List[int] + Sorted list of boundary x‑coordinates (including 0 and width‑1). + """ + # Vertical Sobel to highlight vertical edges + sobelx = cv2.Sobel(img_gray, cv2.CV_64F, 1, 0, ksize=3) + col_energy = np.abs(sobelx).sum(axis=0) # 1‑D edge energy profile + + thresh = np.percentile(col_energy, self.percentile) + low_energy_cols = np.where(col_energy < thresh)[0] + + if low_energy_cols.size == 0: # fallback to equidistant split in worst case + return [0, img_gray.shape[1] - 1] + + # Group consecutive columns + clusters, current = [], [low_energy_cols[0]] + for c in low_energy_cols[1:]: + if c == current[-1] + 1: + current.append(c) + else: + clusters.append(current) + current = [c] + clusters.append(current) + + # Keep clusters that are wide enough (filter noise) + clusters = [cl for cl in clusters if len(cl) >= self.min_gap] + + # Use the centre of each cluster as the boundary position + boundaries = [0] + [int(np.mean(cl)) for cl in clusters] + [img_gray.shape[1] - 1] + boundaries = sorted(list(set(boundaries))) # de‑duplicate & sort + return boundaries + + def split_image(self, img: Image.Image, boundaries: List[int], out_dir: Path, stem: str) -> List[Dict]: + """ + Crop and save each panel, returning split information. + + Parameters + ---------- + img : PIL.Image.Image + boundaries : List[int] + Sorted x positions of panel borders. + out_dir : Path + Where to write files. + stem : str + Base name for panel files. + + Returns + ------- + List[Dict] + List of split information with image data and metadata + """ + out_dir.mkdir(parents=True, exist_ok=True) + splits = [] + + for i in range(len(boundaries) - 1): + left = boundaries[i] + right = boundaries[i + 1] + if right - left < 5: # skip spurious zero‑width crops + continue + panel = img.crop((left, 0, right, img.height)) + + # Convert PIL image to OpenCV format for consistency + panel_cv = cv2.cvtColor(np.array(panel), cv2.COLOR_RGB2BGR) + + # Save panel file if debug mode + if self.debug: + panel_path = out_dir / f"{stem}_panel_{i+1:02d}.png" + panel.save(panel_path, "PNG") + print(f"Saved panel {i+1} → {stem}_panel_{i+1:02d}.png") + + splits.append({ + 'image': panel_cv, + 'bounds': (left, 0, right - left, img.height), + 'confidence': 0.9, # High confidence for advanced method + 'method': 'advanced_edge_detection' + }) + + return splits + + def split_layout_and_match(self, layout_path: str, master_images: List[str], + detector_instance=None, n_panels: Optional[int] = None) -> Dict: + """ + Main method to split a layout using advanced edge detection and match splits to master images + + Args: + layout_path (str): Path to the layout image + master_images (List[str]): List of master image paths + detector_instance: The detector instance to use for matching + n_panels (int, optional): If provided, split into this many equal‑width panels + + Returns: + Dict: Detection results with matches from all splits + """ + # Load image + img = Image.open(layout_path).convert("RGB") + img_gray = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY) + + print(f"Processing {os.path.basename(layout_path)} with advanced splitting") + print(f"Image dimensions: {img.width}x{img.height}") + print(f"Percentile threshold: {self.percentile}, Min gap: {self.min_gap}") + + # Determine split boundaries + if n_panels: + # Equally spaced boundaries + w = img.width + step = w / n_panels + boundaries = [0] + [int(round(step * k)) for k in range(1, n_panels)] + [w - 1] + print(f"Using fixed {n_panels} panels with equal spacing") + else: + boundaries = self.find_boundaries_auto(img_gray) + print(f"Auto-detected {len(boundaries) - 1} panels") + + # Create output directory for splits if debug mode + out_dir = Path(self.debug_dir) if self.debug else Path("/tmp/advanced_splits") + stem = Path(layout_path).stem + + # Split the image + splits = self.split_image(img, boundaries, out_dir, stem) + + if not splits: + print("No splits detected, returning empty results") + return { + 'layout_path': layout_path, + 'detected_masters': [], + 'panel_count': 0, + 'split_mode': 'advanced', + 'splits_generated': 0, + 'percentile': self.percentile, + 'min_gap': self.min_gap + } + + print(f"Generated {len(splits)} splits using advanced method") + + # Match each split to master images + all_matches = [] + split_results = [] + + for i, split_info in enumerate(splits): + print(f"Processing split {i+1}/{len(splits)}") + + # Save split image temporarily for matching + split_image = split_info['image'] + temp_split_path = f"/tmp/advanced_split_{i}.jpg" + cv2.imwrite(temp_split_path, split_image) + + # Match this split to master images using existing inlier analysis + if hasattr(detector_instance, 'match_split_to_masters'): + split_matches = detector_instance.match_split_to_masters( + temp_split_path, master_images + ) + else: + # Use basic inlier analysis if method doesn't exist + split_matches = self._match_split_basic(temp_split_path, master_images) + + # Add split metadata to matches + for match in split_matches: + match['split_index'] = i + match['split_bounds'] = split_info['bounds'] + match['split_confidence'] = split_info['confidence'] + match['split_method'] = 'advanced_edge_detection' + all_matches.append(match) + + split_results.append({ + 'split_index': i, + 'bounds': split_info['bounds'], + 'confidence': split_info['confidence'], + 'method': 'advanced_edge_detection', + 'matches': split_matches + }) + + # Clean up temporary file + if os.path.exists(temp_split_path): + os.remove(temp_split_path) + + # Aggregate results + result = { + 'layout_path': layout_path, + 'detected_masters': [match['master_id'] for match in all_matches], + 'panel_count': len(splits), + 'split_mode': 'advanced', + 'splits_generated': len(splits), + 'split_results': split_results, + 'all_matches': all_matches, + 'percentile': self.percentile, + 'min_gap': self.min_gap, + 'boundaries': boundaries + } + + # Remove duplicates while preserving highest confidence matches + result = self._deduplicate_matches(result) + + return result + + def split_panels(self, image_path: str, target_panel_count: int) -> List[Dict]: + """ + Split a layout image into individual panels (compatibility method for hybrid detector) + + Args: + image_path (str): Path to the layout image + target_panel_count (int): Target number of panels to split into + + Returns: + List[Dict]: List of split information with image data and metadata + """ + # Load image + img = Image.open(image_path).convert("RGB") + img_gray = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY) + + print(f"Advanced splitting: Processing {os.path.basename(image_path)}") + print(f"Image dimensions: {img.width}x{img.height}") + print(f"Target panels: {target_panel_count}, Percentile: {self.percentile}, Min gap: {self.min_gap}") + + # Determine split boundaries + boundaries = self.find_boundaries_auto(img_gray) + print(f"Auto-detected {len(boundaries) - 1} panels using advanced method") + + # Create output directory for splits if debug mode + out_dir = Path(self.debug_dir) if self.debug else Path("/tmp/advanced_splits") + stem = Path(image_path).stem + + # Split the image + splits = self.split_image(img, boundaries, out_dir, stem) + + if not splits: + print("No splits detected, falling back to equal division") + # Fallback to equal division if no splits detected + w = img.width + h = img.height + panel_width = w // target_panel_count + + splits = [] + for i in range(target_panel_count): + x = i * panel_width + width = panel_width if i < target_panel_count - 1 else w - x + + panel_img = img.crop((x, 0, x + width, h)) + panel_cv = cv2.cvtColor(np.array(panel_img), cv2.COLOR_RGB2BGR) + + splits.append({ + 'image': panel_cv, + 'bounds': (x, 0, width, h), + 'confidence': 0.7, + 'method': 'advanced_fallback_equal_division' + }) + + return splits + + def _match_split_basic(self, split_path: str, master_images: List[str]) -> List[Dict]: + """Basic matching using OpenCV features (fallback)""" + matches = [] + + try: + # Load the split image + split_img = cv2.imread(split_path, cv2.IMREAD_GRAYSCALE) + if split_img is None: + return matches + + # Initialize feature detector + akaze = cv2.AKAZE_create() + bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False) + + # Detect keypoints and descriptors for split image + kp_split, des_split = akaze.detectAndCompute(split_img, None) + + if des_split is None: + return matches + + # Load master images from the master_images directory + master_images_path = Path("master_images") + + for master_id in master_images: + master_path = master_images_path / f"{master_id}.jpg" + if not master_path.exists(): + continue + + # Load master image + master_img = cv2.imread(str(master_path), cv2.IMREAD_GRAYSCALE) + if master_img is None: + continue + + # Detect keypoints and descriptors for master image + kp_master, des_master = akaze.detectAndCompute(master_img, None) + + if des_master is None: + continue + + # Match features + matches_raw = bf.knnMatch(des_split, des_master, k=2) + + # Apply Lowe's ratio test + good_matches = [] + for match_pair in matches_raw: + if len(match_pair) == 2: + m, n = match_pair + if m.distance < 0.7 * n.distance: + good_matches.append(m) + + # If we have enough good matches, try to find homography + if len(good_matches) >= 10: + src_pts = np.float32([kp_split[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2) + dst_pts = np.float32([kp_master[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2) + + try: + M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0) + if M is not None: + inliers = int(np.sum(mask)) + inlier_ratio = inliers / len(good_matches) + + # Basic confidence scoring + if inliers >= 15 and inlier_ratio >= 0.6: + confidence = 'high' + elif inliers >= 8 and inlier_ratio >= 0.4: + confidence = 'medium' + else: + confidence = 'low' + + # Only include medium and high confidence matches + if confidence in ['medium', 'high']: + matches.append({ + 'master_id': master_id, + 'confidence': confidence, + 'inliers': inliers, + 'match_details': { + 'inliers': inliers, + 'good_matches': len(good_matches), + 'inlier_ratio': round(inlier_ratio, 3) + } + }) + except: + continue + + except Exception as e: + print(f"Error in basic matching: {e}") + + return matches + + def _deduplicate_matches(self, result: Dict) -> Dict: + """Remove duplicate matches, keeping highest confidence ones""" + if not result['all_matches']: + return result + + # Group matches by master_id + master_groups = {} + for match in result['all_matches']: + master_id = match['master_id'] + if master_id not in master_groups: + master_groups[master_id] = [] + master_groups[master_id].append(match) + + # Keep only the highest confidence match for each master + deduplicated_matches = [] + for master_id, matches in master_groups.items(): + # Sort by confidence (high > medium > low) and inliers + confidence_order = {'high': 3, 'medium': 2, 'low': 1} + best_match = max(matches, key=lambda x: ( + confidence_order.get(x.get('confidence', 'low'), 0), + x.get('inliers', 0) + )) + deduplicated_matches.append(best_match) + + result['all_matches'] = deduplicated_matches + result['detected_masters'] = [match['master_id'] for match in deduplicated_matches] + + return result \ No newline at end of file diff --git a/check_system_resources.py b/check_system_resources.py new file mode 100644 index 0000000..588469f --- /dev/null +++ b/check_system_resources.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +""" +Check and display system resource limits and current usage +""" + +import os +import subprocess +import resource +import psutil + +def check_file_descriptors(): + """Check file descriptor limits and usage""" + print("🔍 FILE DESCRIPTOR LIMITS") + print("=" * 40) + + # Get current limits + soft_limit, hard_limit = resource.getrlimit(resource.RLIMIT_NOFILE) + print(f"Current soft limit: {soft_limit:,}") + print(f"Current hard limit: {hard_limit:,}") + + # Get current usage + pid = os.getpid() + try: + result = subprocess.run(['lsof', '-p', str(pid)], capture_output=True, text=True) + open_files = len(result.stdout.strip().split('\n')) - 1 + print(f"Current open files: {open_files}") + print(f"Usage: {open_files/soft_limit*100:.1f}% of soft limit") + except: + print("Could not determine current usage") + + print() + + # Recommendations + if soft_limit < 10000: + print("⚠️ RECOMMENDATION: Your soft limit is quite low") + print(" Consider increasing it with: ulimit -n 65536") + elif soft_limit < 65536: + print("ℹ️ RECOMMENDATION: Consider increasing soft limit for heavy processing") + print(" Command: ulimit -n 65536") + else: + print("✅ File descriptor limits look good") + + print() + +def check_memory(): + """Check memory usage""" + print("🧠 MEMORY USAGE") + print("=" * 40) + + memory = psutil.virtual_memory() + swap = psutil.swap_memory() + + print(f"RAM: {memory.percent:.1f}% used ({memory.used/1024**3:.1f}GB / {memory.total/1024**3:.1f}GB)") + print(f"Swap: {swap.percent:.1f}% used ({swap.used/1024**3:.1f}GB / {swap.total/1024**3:.1f}GB)") + + if memory.percent > 80: + print("⚠️ High RAM usage detected") + if swap.percent > 50: + print("⚠️ High swap usage detected") + + print() + +def check_processes(): + """Check running processes""" + print("🔄 PROCESS INFORMATION") + print("=" * 40) + + # CPU info + print(f"CPU cores: {os.cpu_count()}") + print(f"CPU usage: {psutil.cpu_percent(interval=1):.1f}%") + + # Load average + try: + load1, load5, load15 = os.getloadavg() + print(f"Load average: {load1:.2f}, {load5:.2f}, {load15:.2f}") + except: + print("Load average: unavailable") + + print() + +def recommend_settings(): + """Recommend optimal settings""" + print("🎯 RECOMMENDED SETTINGS") + print("=" * 40) + + cpu_count = os.cpu_count() + memory_gb = psutil.virtual_memory().total / (1024**3) + + print(f"For your system ({cpu_count} cores, {memory_gb:.1f}GB RAM):") + print() + + # Layout workers + if memory_gb < 16: + layout_workers = min(2, cpu_count // 2) + print(f"--layout-workers {layout_workers} (conservative for {memory_gb:.1f}GB RAM)") + elif memory_gb < 32: + layout_workers = min(4, cpu_count // 2) + print(f"--layout-workers {layout_workers} (balanced for {memory_gb:.1f}GB RAM)") + else: + layout_workers = min(6, cpu_count // 2) + print(f"--layout-workers {layout_workers} (aggressive for {memory_gb:.1f}GB RAM)") + + # Local workers + local_workers = max(1, cpu_count - 2) + print(f"--local-workers {local_workers} (CPU cores - 2)") + + print() + print("Full command suggestion:") + print("python cli.py --all --hybrid --split-simple --refinement-mode \\") + print(" --inlier-threshold 0.15 --inlier-ratio-threshold 0.2 \\") + print(" --fallback-one-at-a-time --enable-cost-tracking --cost-report \\") + print(f" --parallel-layouts --layout-workers {layout_workers} --local-workers {local_workers}") + + print() + +def main(): + print("🔧 SYSTEM RESOURCE CHECK") + print("=" * 50) + print() + + check_file_descriptors() + check_memory() + check_processes() + recommend_settings() + + print("💡 TROUBLESHOOTING TIPS:") + print("- If you get 'Too many open files': restart terminal and run 'ulimit -n 65536'") + print("- If memory usage is high: reduce --layout-workers") + print("- If processing is slow: check if swap usage is very high") + print("- Monitor with: Activity Monitor or 'top' command") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/cli.py b/cli.py new file mode 100644 index 0000000..6898b88 --- /dev/null +++ b/cli.py @@ -0,0 +1,641 @@ +#!/usr/bin/env python3 +""" +CLI Module - Command Line Interface for Master Image Detection +""" + +import argparse +import multiprocessing +import sys +from logging_config import setup_dual_logging, log_system_info, log_exception, DualLogger +from gemini_detector import ImageDetector +from openai_detector import OpenAIImageDetector +from vector_detector import VectorImageDetector +from hybrid_detector import HybridImageDetector +from cost_calculator import cost_calculator + + +def parse_arguments(): + """Parse command line arguments""" + parser = argparse.ArgumentParser( + description="Detect master images in layout images using LLM APIs (Gemini, OpenAI), Vector Embeddings, or Hybrid mode", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s --test # Test with 1 layout (Gemini LLM) + %(prog)s --test --openai # Test with 1 layout (OpenAI o3) + %(prog)s --file 6786505.jpg --openai --split # Process specific file with OpenAI and splitting + %(prog)s --limit 10 # Process first 10 layouts (Gemini LLM) + %(prog)s --limit 10 --openai # Process first 10 layouts (OpenAI o3) + %(prog)s --all # Process all layouts (Gemini LLM) + %(prog)s --all --openai # Process all layouts (OpenAI o3) + %(prog)s --test --vector-mode # Test with 1 layout (Vector embeddings) + %(prog)s --limit 10 --vector-mode # Process first 10 layouts (Vector embeddings) + %(prog)s --all --vector-mode # Process all layouts (Vector embeddings) + %(prog)s --test --vector-mode --splitting-mode grid # Test with grid-based image splitting + %(prog)s --limit 5 --vector-mode --splitting-mode auto # Auto-detect splitting for 5 layouts + %(prog)s --all --vector-mode --similarity-threshold 0.8 # Process all with higher similarity threshold + %(prog)s --all --output my_results # Process all with custom output name + %(prog)s --limit 50 --output batch_50 # Process 50 layouts with custom name + %(prog)s --all --no-greyscale # Process all without greyscale conversion (Gemini only) + %(prog)s --all --no-contrast # Process all without contrast enhancement (Gemini only) + %(prog)s --all --contrast-factor 2.0 # Process all with higher contrast factor (Gemini only) + %(prog)s --test --refinement-mode # Test with CEN refinement (1 layout) + %(prog)s --limit 5 --refinement-mode # Process 5 layouts with CEN refinement + %(prog)s --all --refinement-mode --output refined_results # Full run with CEN refinement + %(prog)s --test --one-at-a-time # Test one-at-a-time mode (1 layout, 41 concurrent API calls) + %(prog)s --test --openai --one-at-a-time # Test OpenAI one-at-a-time mode + %(prog)s --all --one-at-a-time --refinement-mode # Full run with one-at-a-time and CEN refinement + %(prog)s --test --one-at-a-time --concurrent-workers 8 # Test with 8 concurrent workers (faster) + %(prog)s --all --openai --one-at-a-time --concurrent-workers 3 # OpenAI full run with 3 concurrent workers + %(prog)s --test --openai --one-at-a-time --panel-aware-refinement # Test with panel counting and OpenCV refinement + %(prog)s --all --openai --one-at-a-time --panel-aware-refinement # Full run with panel-aware match refinement + %(prog)s --test --openai --one-at-a-time --refinement-mode --panel-aware-refinement # Test with both CEN and panel-aware refinement + %(prog)s --test --hybrid # Test with hybrid mode (OpenAI panel counting + local analysis) + %(prog)s --limit 10 --hybrid # Process first 10 layouts with hybrid mode + %(prog)s --all --hybrid # Process all layouts with hybrid mode + %(prog)s --test --hybrid --panel-threshold 3 # Test hybrid mode with custom panel threshold + %(prog)s --all --hybrid --refinement-mode # Full run with hybrid mode and CEN refinement + %(prog)s --test --hybrid --enable-greyscale # Test hybrid mode with greyscale override + %(prog)s --limit 5 --hybrid --enable-contrast # Test hybrid mode with contrast enhancement override + %(prog)s --test --hybrid --concurrent-workers 8 # Test hybrid mode with 8 concurrent workers (both paths) + %(prog)s --all --hybrid --concurrent-workers 3 # Full hybrid run with 3 concurrent workers (both paths) + %(prog)s --test --hybrid --openai-workers 10 --local-workers 8 # Test with separate worker counts + %(prog)s --all --hybrid --local-workers 12 # Full run with 12 local workers (OpenAI auto-detects) + %(prog)s --test --hybrid --split-advanced # Test with advanced edge detection splitting + %(prog)s --limit 10 --hybrid --split-advanced --percentile 15 --min-gap 3 # Advanced splitting with custom parameters + %(prog)s --test --hybrid --vector-mode # Test hybrid mode with vector similarity instead of inlier analysis + %(prog)s --limit 10 --hybrid --vector-mode --similarity-threshold 0.8 # Hybrid with higher similarity threshold + %(prog)s --all --hybrid --vector-mode --split-simple # Full run with vector similarity and simple splitting + %(prog)s --all --hybrid --split-simple --fallback-one-at-a-time # Hybrid with fallback to OpenAI one-at-a-time when needed + %(prog)s --test --hybrid --parallel-layouts # Test hybrid mode with parallel layout processing + %(prog)s --limit 10 --hybrid --parallel-layouts --layout-workers 4 # Process 10 layouts with 4 parallel workers + %(prog)s --all --hybrid --parallel-layouts --layout-workers 6 --max-concurrent-layouts 4 # Full run with controlled parallelism + %(prog)s --test --hybrid --enable-cost-tracking # Test with detailed cost tracking enabled + %(prog)s --limit 10 --openai --enable-cost-tracking --cost-report # OpenAI mode with cost tracking and report generation + %(prog)s --all --hybrid --enable-cost-tracking --cost-estimate 300 # Full run with cost tracking and monthly estimate + """ + ) + + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('--test', action='store_true', + help='Test mode: process only 1 layout image') + group.add_argument('--limit', type=int, metavar='N', + help='Process first N layout images') + group.add_argument('--all', action='store_true', + help='Process all layout images') + group.add_argument('--file', type=str, metavar='FILENAME', + help='Process a specific layout file (e.g., --file 6786505.jpg)') + + parser.add_argument('--output', type=str, default=None, metavar='NAME', + help='Output filename (without .json extension). Default: auto-generated based on mode') + + # Image processing options + parser.add_argument('--no-greyscale', action='store_true', + help='Disable greyscale conversion (enabled by default for Gemini/OpenAI, disabled for hybrid)') + parser.add_argument('--no-contrast', action='store_true', + help='Disable contrast enhancement (enabled by default for Gemini/OpenAI, disabled for hybrid)') + parser.add_argument('--contrast-factor', type=float, default=1.5, metavar='FACTOR', + help='Contrast enhancement factor (default: 1.5)') + + # Hybrid mode image processing overrides + parser.add_argument('--enable-greyscale', action='store_true', + help='Enable greyscale conversion for hybrid mode (disabled by default)') + parser.add_argument('--enable-contrast', action='store_true', + help='Enable contrast enhancement for hybrid mode (disabled by default)') + + # Operating mode options + provider_group = parser.add_mutually_exclusive_group() + provider_group.add_argument('--openai', action='store_true', + help='Use OpenAI o3 model instead of Gemini (requires OPENAI_API_KEY)') + provider_group.add_argument('--hybrid', action='store_true', + help='Use hybrid mode: OpenAI panel counting + local analysis (≤2 panels) or full OpenAI (≥3 panels)') + + # Vector mode option (can be combined with hybrid mode) + parser.add_argument('--vector-mode', action='store_true', + help='Use Google Vertex AI vector embeddings for similarity matching. Can be combined with --hybrid to replace inlier analysis with vector similarity.') + + parser.add_argument('--similarity-threshold', type=float, default=0.75, metavar='THRESHOLD', + help='Similarity threshold for vector mode (0.0-1.0, default: 0.75)') + parser.add_argument('--no-truncation', action='store_true', + help='Disable truncation of match results in hybrid mode (keeps all matches instead of limiting to panel count)') + parser.add_argument('--splitting-mode', type=str, default='none', + choices=['none', 'auto', 'grid'], metavar='MODE', + help='Image splitting mode for vector mode: none, auto, grid (default: none)') + parser.add_argument('--min-crop-size', type=int, default=200, metavar='PIXELS', + help='Minimum crop size in pixels for splitting (default: 200)') + parser.add_argument('--crop-padding', type=int, default=20, metavar='PIXELS', + help='Padding around detected crops in pixels (default: 20)') + parser.add_argument('--refinement-mode', action='store_true', + help='Enable CEN refinement mode (after initial detection, refines CEN vs non-CEN matches)') + parser.add_argument('--one-at-a-time', action='store_true', + help='Process masters one at a time using separate processes (makes 41 separate API calls per layout for exact matching)') + parser.add_argument('--concurrent-workers', type=int, default=None, metavar='N', + help='Number of concurrent processes (auto-detects optimal values: OpenAI=total_masters, Local=CPU_cores if not specified)') + parser.add_argument('--openai-workers', type=int, default=None, metavar='N', + help='Number of concurrent processes for OpenAI analysis (default: total number of master images)') + parser.add_argument('--local-workers', type=int, default=None, metavar='N', + help='Number of concurrent processes for local inlier analysis (default: number of CPU cores)') + parser.add_argument('--panel-aware-refinement', action='store_true', + help='Enable panel-aware refinement: count panels with OpenAI o3, then use OpenCV inlier analysis to select best matches (only works with --openai --one-at-a-time)') + parser.add_argument('--split', action='store_true', + help='Enable panel splitting mode: split multi-panel layouts into individual images using multiple CV methods, then match each split to masters') + parser.add_argument('--split-advanced', action='store_true', + help='Enable advanced panel splitting mode: use edge detection and gutter analysis for more accurate splitting') + parser.add_argument('--split-simple', action='store_true', + help='Enable simple panel splitting mode: evenly split layout into panels based on OpenAI analysis count (hybrid mode only)') + parser.add_argument('--percentile', type=float, default=10, metavar='THRESHOLD', + help='Percentile threshold (0-100) for detecting gutters in advanced splitting; lower = stricter (default: 10)') + parser.add_argument('--min-gap', type=int, default=5, metavar='PIXELS', + help='Minimum consecutive low-energy columns needed to mark a gutter in advanced splitting (default: 5)') + + # Hybrid mode specific options + parser.add_argument('--panel-threshold', type=int, default=2, metavar='N', + help='Panel threshold for hybrid mode: ≤N panels use local analysis, >N panels use OpenAI (default: 2)') + parser.add_argument('--inlier-threshold', type=float, default=0.65, metavar='THRESHOLD', + help='Inlier confidence threshold for hybrid local analysis (default: 0.65)') + parser.add_argument('--inlier-ratio-threshold', type=float, default=0.4, metavar='THRESHOLD', + help='Minimum inlier ratio for confident matches (default: 0.4)') + parser.add_argument('--fallback-one-at-a-time', action='store_true', + help='Enable fallback to OpenAI one-at-a-time method when matched masters < detected panels. Uses multiprocessing with number of workers equal to number of masters.') + + # Parallel processing options + parser.add_argument('--parallel-layouts', action='store_true', + help='Enable parallel layout processing with serial inlier analysis coordination') + parser.add_argument('--layout-workers', type=int, default=None, metavar='N', + help='Number of concurrent layout workers for parallel processing (default: auto-detect based on CPU cores)') + parser.add_argument('--max-concurrent-layouts', type=int, default=None, metavar='N', + help='Maximum layouts processing simultaneously (default: same as layout-workers)') + + # Cost tracking options + parser.add_argument('--enable-cost-tracking', action='store_true', + help='Enable detailed cost tracking and reporting for OpenAI API usage') + parser.add_argument('--cost-report', action='store_true', + help='Generate detailed cost report after processing') + parser.add_argument('--cost-estimate', type=int, metavar='LAYOUTS', + help='Estimate monthly cost based on specified number of layouts per month (default: 300)') + + return parser.parse_args() + + +def main(): + """Main execution function""" + # Initialize dual logging first + logger = setup_dual_logging() + dual_logger = DualLogger(logger) + + # Log system information + log_system_info(logger) + + try: + args = parse_arguments() + + # Initialize cost tracking if enabled + if args.enable_cost_tracking: + cost_calculator.enable_tracking = True + dual_logger.print("Cost tracking enabled") + + # Determine processing parameters + if args.test: + limit = 1 + default_output = "test_results" + dual_logger.print("Running in TEST mode - processing 1 layout image...") + elif args.limit: + limit = args.limit + default_output = f"batch_{limit}_results" + dual_logger.print(f"Processing first {limit} layout images...") + elif args.file: + # Process specific file + specific_file = args.file + if not specific_file.endswith('.jpg'): + specific_file += '.jpg' + limit = 1 + default_output = f"file_{specific_file[:-4]}_results" + dual_logger.print(f"Processing specific file: {specific_file}...") + else: # args.all + limit = None + specific_file = None + default_output = "full_results" + dual_logger.print("Processing ALL layout images...") + + # Set specific_file to None for other modes + if not args.file: + specific_file = None + + # Determine which detector to use + if args.hybrid: + analysis_method = "vector similarity" if args.vector_mode else "local analysis" + dual_logger.print(f"Using HYBRID mode with OpenAI panel counting + {analysis_method}") + + # Validate hybrid mode arguments + if args.panel_aware_refinement: + dual_logger.warning("--panel-aware-refinement is not needed in hybrid mode (panel analysis is built-in), ignoring...") + if args.one_at_a_time: + dual_logger.warning("--one-at-a-time is handled automatically in hybrid mode, ignoring...") + if args.openai: + dual_logger.error("--hybrid cannot be used with --openai") + return 1 + if args.split and args.split_advanced: + dual_logger.error("Cannot use both --split and --split-advanced at the same time") + return 1 + if args.split_simple and not args.hybrid: + dual_logger.error("--split-simple can only be used with --hybrid mode") + return 1 + if args.split_simple and (args.split or args.split_advanced): + dual_logger.error("--split-simple cannot be used with --split or --split-advanced") + return 1 + + # Handle image processing options for hybrid mode + if args.enable_greyscale: + enable_greyscale = True + dual_logger.print("Greyscale processing enabled (override)") + else: + enable_greyscale = False + dual_logger.print("Greyscale processing disabled (default for hybrid mode)") + + if args.enable_contrast: + enable_contrast_enhancement = True + dual_logger.print("Contrast enhancement enabled (override)") + else: + enable_contrast_enhancement = False + dual_logger.print("Contrast enhancement disabled (default for hybrid mode)") + + # Determine worker counts for hybrid mode + if args.concurrent_workers is not None: + openai_workers = args.concurrent_workers + local_workers = args.concurrent_workers + else: + openai_workers = args.openai_workers + local_workers = args.local_workers + + # Initialize hybrid detector + detector = HybridImageDetector( + panel_threshold=args.panel_threshold, + inlier_threshold=args.inlier_threshold, + inlier_ratio_threshold=args.inlier_ratio_threshold, + enable_greyscale=enable_greyscale, + enable_contrast_enhancement=enable_contrast_enhancement, + contrast_factor=args.contrast_factor, + refinement_mode=args.refinement_mode, + openai_workers=openai_workers, + local_workers=local_workers, + split_mode=args.split, + split_advanced=args.split_advanced, + split_simple=args.split_simple, + percentile=args.percentile, + min_gap=args.min_gap, + vector_mode=args.vector_mode, + similarity_threshold=args.similarity_threshold, + fallback_one_at_a_time=args.fallback_one_at_a_time, + parallel_layouts=args.parallel_layouts, + layout_workers=args.layout_workers, + max_concurrent_layouts=args.max_concurrent_layouts, + no_truncation=args.no_truncation + ) + + # Add hybrid mode suffix to default output name + if not args.output: + default_output += "_hybrid" + if args.panel_threshold != 2: + default_output += f"_threshold{args.panel_threshold}" + if args.refinement_mode: + default_output += "_refined" + if args.split: + default_output += "_split" + if args.split_advanced: + default_output += "_split_advanced" + if args.split_simple: + default_output += "_split_simple" + if args.vector_mode: + default_output += "_vector" + if args.fallback_one_at_a_time: + default_output += "_fallback" + if args.parallel_layouts: + default_output += "_parallel" + + output_name = args.output if args.output else default_output + + dual_logger.print(f"Results will be saved as: {output_name}.json") + analysis_method = "vector similarity" if args.vector_mode else "local analysis" + dual_logger.print(f"Panel threshold: ≤{args.panel_threshold} panels → {analysis_method}, ≥{args.panel_threshold + 1} panels → split + {analysis_method}") + dual_logger.print(f"Inlier threshold: {args.inlier_threshold}") + dual_logger.print(f"CEN refinement: {'enabled' if args.refinement_mode else 'disabled'}") + dual_logger.print(f"Vector mode: {'enabled' if args.vector_mode else 'disabled'}") + if args.vector_mode: + dual_logger.print(f"Similarity threshold: {args.similarity_threshold}") + dual_logger.print(f"Fallback one-at-a-time: {'enabled' if args.fallback_one_at_a_time else 'disabled'}") + dual_logger.print(f"Parallel layouts: {'enabled' if args.parallel_layouts else 'disabled'}") + if args.parallel_layouts: + dual_logger.print(f"Layout workers: {detector.layout_workers}") + dual_logger.print(f"Max concurrent layouts: {detector.max_concurrent_layouts}") + dual_logger.print(f"OpenAI workers: {detector.openai_workers}") + dual_logger.print(f"Local workers: {detector.local_workers}") + dual_logger.print("-" * 60) + + try: + # Use parallel processing if requested + if args.parallel_layouts: + results = detector.process_all_layouts_hybrid_parallel(limit=limit, specific_file=specific_file) + else: + results = detector.process_all_layouts_hybrid(limit=limit, specific_file=specific_file) + output_file = detector.save_results(results, output_name) + summary = detector.generate_summary(results) + + dual_logger.print("\n" + "="*60) + dual_logger.print("HYBRID PROCESSING SUMMARY") + dual_logger.print("="*60) + dual_logger.print(f"Total layouts processed: {summary['total_layouts_processed']}") + dual_logger.print(f"Layouts with matches: {summary['layouts_with_matches']}") + dual_logger.print(f"Layouts without matches: {summary['layouts_without_matches']}") + dual_logger.print(f"Local analysis used: {summary['local_analysis_used']} ({summary['local_analysis_percentage']}%)") + dual_logger.print(f"Split + inlier analysis used: {summary['split_analysis_used']} ({summary['split_analysis_percentage']}%)") + dual_logger.print(f"Panel threshold: ≤{summary['panel_threshold']}") + dual_logger.print(f"Inlier threshold: {summary['inlier_threshold']}") + + if summary['total_duplicates_removed'] > 0: + dual_logger.print(f"\nDEDUPLICATION RESULTS:") + dual_logger.print(f"Layouts with duplicates removed: {summary['layouts_with_deduplication']}") + dual_logger.print(f"Total duplicate masters removed: {summary['total_duplicates_removed']}") + dual_logger.print(f"Deduplication rate: {summary['deduplication_rate']}%") + + if summary['most_used_masters']: + dual_logger.print(f"\nTop {min(10, len(summary['most_used_masters']))} most frequently detected masters:") + for master_id, count in summary['most_used_masters']: + dual_logger.print(f" {master_id}.jpg: {count} times") + + dual_logger.print(f"\nFull results saved to: {output_file}") + dual_logger.print("="*60) + + # Print cost summary if tracking is enabled + if args.enable_cost_tracking: + cost_calculator.print_cost_summary() + + # Generate cost report if requested + if args.cost_report: + cost_report_file = cost_calculator.save_cost_report() + if cost_report_file: + dual_logger.print(f"Cost report saved to: {cost_report_file}") + + # Show cost estimate if requested + if args.cost_estimate: + estimate = cost_calculator.estimate_monthly_cost(args.cost_estimate) + if 'error' not in estimate: + dual_logger.print(f"\nMONTHLY COST ESTIMATE") + dual_logger.print(f"Based on {estimate['based_on_layouts']} processed layouts:") + dual_logger.print(f" Average cost per layout: ${estimate['average_cost_per_layout']:.4f}") + dual_logger.print(f" Estimated monthly cost ({estimate['layouts_per_month']} layouts): ${estimate['estimated_monthly_cost']:.2f}") + dual_logger.print(f" Estimated annual cost: ${estimate['estimated_annual_cost']:.2f}") + + except KeyboardInterrupt: + dual_logger.print("\n\nProcessing interrupted by user.") + dual_logger.print("Partial results may have been saved automatically.") + except Exception as e: + dual_logger.error(f"\nError during hybrid processing: {e}") + log_exception(logger) + return 1 + finally: + detector.cleanup_temp_files() + + elif args.vector_mode and not args.hybrid: + dual_logger.print("Using VECTOR EMBEDDING mode with Google Vertex AI") + + # Validate vector mode arguments + if args.refinement_mode: + dual_logger.warning("--refinement-mode is not supported in vector mode, ignoring...") + if args.one_at_a_time: + dual_logger.warning("--one-at-a-time is not applicable in vector mode, ignoring...") + if args.panel_aware_refinement: + dual_logger.error("--panel-aware-refinement is only supported with --openai --one-at-a-time mode") + return 1 + if args.split_advanced: + dual_logger.warning("--split-advanced is only supported in hybrid mode, ignoring...") + if not args.no_greyscale or not args.no_contrast or args.contrast_factor != 1.5: + dual_logger.warning("Image processing options (greyscale, contrast) are not used in vector mode...") + + # Initialize vector detector + detector = VectorImageDetector( + similarity_threshold=args.similarity_threshold, + splitting_mode=args.splitting_mode, + min_crop_size=args.min_crop_size, + crop_padding=args.crop_padding, + split_mode=args.split + ) + + # Add vector mode suffix to default output name + if not args.output: + default_output += "_vector" + if args.splitting_mode != "none": + default_output += f"_{args.splitting_mode}" + if args.similarity_threshold != 0.75: + default_output += f"_thresh{args.similarity_threshold}" + if args.split: + default_output += "_split" + + output_name = args.output if args.output else default_output + + dual_logger.print(f"Results will be saved as: {output_name}.json") + dual_logger.print(f"Similarity threshold: {args.similarity_threshold}") + dual_logger.print(f"Splitting mode: {args.splitting_mode}") + if args.splitting_mode != "none": + dual_logger.print(f"Min crop size: {args.min_crop_size}px, Crop padding: {args.crop_padding}px") + dual_logger.print("-" * 60) + + try: + results = detector.process_all_layouts_vector(limit=limit, specific_file=specific_file) + output_file = detector.save_results(results, output_name) + summary = detector.generate_summary(results) + + dual_logger.print("\n" + "="*60) + dual_logger.print("VECTOR PROCESSING SUMMARY") + dual_logger.print("="*60) + dual_logger.print(f"Total layouts processed: {summary['total_layouts_processed']}") + dual_logger.print(f"Layouts with matches: {summary['layouts_with_matches']}") + dual_logger.print(f"Layouts without matches: {summary['layouts_without_matches']}") + dual_logger.print(f"Similarity threshold: {summary['similarity_threshold']}") + dual_logger.print(f"Embedding dimensions: {summary['embedding_dimensions']}") + + if summary['most_used_masters']: + dual_logger.print(f"\nTop {min(10, len(summary['most_used_masters']))} most frequently detected masters:") + for master_id, count in summary['most_used_masters']: + dual_logger.print(f" {master_id}.jpg: {count} times") + + dual_logger.print(f"\nFull results saved to: {output_file}") + dual_logger.print("="*60) + + except KeyboardInterrupt: + dual_logger.print("\n\nProcessing interrupted by user.") + dual_logger.print("Partial results may have been saved automatically.") + except Exception as e: + dual_logger.error(f"\nError during vector processing: {e}") + log_exception(logger) + return 1 + + elif args.openai: + dual_logger.print("Using OPENAI LLM mode with o3 model") + + # Validate panel-aware refinement requirements + if args.panel_aware_refinement: + if not args.one_at_a_time: + dual_logger.error("--panel-aware-refinement requires --one-at-a-time mode") + return 1 + dual_logger.print("Panel-aware refinement ENABLED - will count panels and refine matches using OpenCV inlier analysis") + if args.split_advanced: + dual_logger.warning("--split-advanced is only supported in hybrid mode, ignoring...") + + # Initialize OpenAI detector with image processing settings + detector = OpenAIImageDetector( + enable_greyscale=not args.no_greyscale, + enable_contrast_enhancement=not args.no_contrast, + contrast_factor=args.contrast_factor, + refinement_mode=args.refinement_mode, + one_at_a_time_mode=args.one_at_a_time, + max_concurrent_workers=args.concurrent_workers, + panel_aware_refinement=args.panel_aware_refinement, + split_mode=args.split + ) + + # Add mode suffix to default output name + if not args.output: + default_output += "_openai" + if args.one_at_a_time: + default_output += "_one_at_a_time" + if args.panel_aware_refinement: + default_output += "_panel_aware" + if args.split: + default_output += "_split" + + output_name = args.output if args.output else default_output + + dual_logger.print(f"Results will be saved as: {output_name}.json") + if args.one_at_a_time: + dual_logger.print(f"Concurrent processes for one-at-a-time mode: {args.concurrent_workers}") + if args.concurrent_workers and args.concurrent_workers > 10: + dual_logger.print("WARNING: High concurrency (>10) may cause API rate limits!") + dual_logger.print("Recommended range: 3-8 processes for stable performance.") + dual_logger.print("-" * 60) + + try: + results = detector.process_all_layouts(limit=limit, specific_file=specific_file) + output_file = detector.save_results(results, output_name) + summary = detector.generate_summary(results) + + dual_logger.print("\n" + "="*60) + dual_logger.print("OPENAI PROCESSING SUMMARY") + dual_logger.print("="*60) + dual_logger.print(f"Total layouts processed: {summary['total_layouts_processed']}") + dual_logger.print(f"Layouts with matches: {summary['layouts_with_matches']}") + dual_logger.print(f"Layouts without matches: {summary['layouts_without_matches']}") + dual_logger.print(f"Provider: {summary['provider']}") + dual_logger.print(f"Model: {summary['model']}") + + if 'total_duplicates_removed' in summary and summary['total_duplicates_removed'] > 0: + dual_logger.print(f"\nDEDUPLICATION RESULTS:") + dual_logger.print(f"Layouts with duplicates removed: {summary['layouts_with_deduplication']}") + dual_logger.print(f"Total duplicate masters removed: {summary['total_duplicates_removed']}") + dual_logger.print(f"Deduplication rate: {summary['deduplication_rate']}%") + + if summary['most_used_masters']: + dual_logger.print(f"\nTop {min(10, len(summary['most_used_masters']))} most frequently detected masters:") + for master_id, count in summary['most_used_masters']: + dual_logger.print(f" {master_id}.jpg: {count} times") + + dual_logger.print(f"\nFull results saved to: {output_file}") + dual_logger.print("="*60) + + except KeyboardInterrupt: + dual_logger.print("\n\nProcessing interrupted by user.") + dual_logger.print("Partial results may have been saved automatically.") + except Exception as e: + dual_logger.error(f"\nError during OpenAI processing: {e}") + log_exception(logger) + return 1 + finally: + detector.cleanup_temp_files() + + else: + dual_logger.print("Using GEMINI LLM mode") + + # Validate panel-aware refinement requirements + if args.panel_aware_refinement: + dual_logger.error("--panel-aware-refinement is only supported with --openai mode") + return 1 + if args.split_advanced: + dual_logger.warning("--split-advanced is only supported in hybrid mode, ignoring...") + + # Initialize Gemini detector with image processing settings + detector = ImageDetector( + enable_greyscale=not args.no_greyscale, + enable_contrast_enhancement=not args.no_contrast, + contrast_factor=args.contrast_factor, + refinement_mode=args.refinement_mode, + one_at_a_time_mode=args.one_at_a_time, + max_concurrent_workers=args.concurrent_workers, + split_mode=args.split + ) + + # Add mode suffix to default output name + if not args.output: + if args.one_at_a_time: + default_output += "_one_at_a_time" + if args.refinement_mode: + default_output += "_refined" + if args.split: + default_output += "_split" + + output_name = args.output if args.output else default_output + + dual_logger.print(f"Results will be saved as: {output_name}.json") + if args.one_at_a_time: + dual_logger.print(f"Concurrent processes for one-at-a-time mode: {args.concurrent_workers}") + if args.concurrent_workers and args.concurrent_workers > 10: + dual_logger.print("WARNING: High concurrency (>10) may cause API rate limits!") + dual_logger.print("Recommended range: 3-8 processes for stable performance.") + dual_logger.print("-" * 60) + + try: + results = detector.process_all_layouts(limit=limit, specific_file=specific_file) + output_file = detector.save_results(results, output_name) + summary = detector.generate_summary(results) + + dual_logger.print("\n" + "="*60) + dual_logger.print("GEMINI PROCESSING SUMMARY") + dual_logger.print("="*60) + dual_logger.print(f"Total layouts processed: {summary['total_layouts_processed']}") + dual_logger.print(f"Layouts with matches: {summary['layouts_with_matches']}") + dual_logger.print(f"Layouts without matches: {summary['layouts_without_matches']}") + + if 'total_duplicates_removed' in summary and summary['total_duplicates_removed'] > 0: + dual_logger.print(f"\nDEDUPLICATION RESULTS:") + dual_logger.print(f"Layouts with duplicates removed: {summary['layouts_with_deduplication']}") + dual_logger.print(f"Total duplicate masters removed: {summary['total_duplicates_removed']}") + dual_logger.print(f"Deduplication rate: {summary['deduplication_rate']}%") + + if summary['most_used_masters']: + dual_logger.print(f"\nTop {min(10, len(summary['most_used_masters']))} most frequently detected masters:") + for master_id, count in summary['most_used_masters']: + dual_logger.print(f" {master_id}.jpg: {count} times") + + dual_logger.print(f"\nFull results saved to: {output_file}") + dual_logger.print("="*60) + + except KeyboardInterrupt: + dual_logger.print("\n\nProcessing interrupted by user.") + dual_logger.print("Partial results may have been saved automatically.") + except Exception as e: + dual_logger.error(f"\nError during Gemini processing: {e}") + log_exception(logger) + return 1 + finally: + detector.cleanup_temp_files() + + return 0 + + except Exception as e: + dual_logger.error(f"Unexpected error: {e}") + log_exception(logger) + return 1 + + +if __name__ == "__main__": + # Required for multiprocessing on macOS and Windows + multiprocessing.set_start_method('spawn', force=True) + exit(main()) \ No newline at end of file diff --git a/cost_calculator.py b/cost_calculator.py new file mode 100644 index 0000000..0c7a6ab --- /dev/null +++ b/cost_calculator.py @@ -0,0 +1,440 @@ +#!/usr/bin/env python3 +""" +Cost Calculator Module +Tracks OpenAI API usage and calculates costs for the master image detection application +""" + +import json +import time +from datetime import datetime +from typing import Dict, List, Optional, Tuple +from dataclasses import dataclass, asdict +from pathlib import Path + + +@dataclass +class TokenUsage: + """Data class to track token usage for a single API call""" + prompt_tokens: int + completion_tokens: int + total_tokens: int + cached_tokens: int = 0 + + def __post_init__(self): + """Validate token counts""" + if self.prompt_tokens < 0 or self.completion_tokens < 0: + raise ValueError("Token counts cannot be negative") + if self.total_tokens != self.prompt_tokens + self.completion_tokens: + raise ValueError("Total tokens must equal prompt + completion tokens") + + +@dataclass +class ApiCallCost: + """Data class to track cost information for a single API call""" + operation_type: str + timestamp: str + token_usage: TokenUsage + input_cost: float + output_cost: float + cached_cost: float + total_cost: float + layout_name: str = "" + master_id: str = "" + + def to_dict(self) -> Dict: + """Convert to dictionary for JSON serialization""" + return asdict(self) + + +@dataclass +class LayoutCostSummary: + """Data class to track cost summary for a single layout""" + layout_name: str + total_cost: float + total_input_tokens: int + total_output_tokens: int + total_cached_tokens: int + api_calls_made: int + operation_types: List[str] + processing_time: float = 0.0 + detected_masters: List[str] = None + + def __post_init__(self): + if self.detected_masters is None: + self.detected_masters = [] + + def to_dict(self) -> Dict: + """Convert to dictionary for JSON serialization""" + return asdict(self) + + +class CostCalculator: + """ + Main cost calculator class for tracking OpenAI API usage and costs + """ + + # OpenAI o3 pricing as of 2025 + INPUT_COST_PER_MILLION = 2.00 + CACHED_INPUT_COST_PER_MILLION = 0.50 + OUTPUT_COST_PER_MILLION = 8.00 + + def __init__(self, enable_tracking: bool = True): + """ + Initialize the cost calculator + + Args: + enable_tracking: Whether to enable cost tracking (default: True) + """ + self.enable_tracking = enable_tracking + self.api_calls: List[ApiCallCost] = [] + self.layout_costs: Dict[str, LayoutCostSummary] = {} + self.session_start_time = time.time() + + # Session totals + self.total_input_tokens = 0 + self.total_output_tokens = 0 + self.total_cached_tokens = 0 + self.total_cost = 0.0 + self.total_api_calls = 0 + + # Only print initialization message once and only in main process + import multiprocessing + if multiprocessing.current_process().name == 'MainProcess': + if not hasattr(CostCalculator, '_main_process_initialized'): + CostCalculator._main_process_initialized = True + print(f"Cost Calculator initialized (tracking: {'enabled' if enable_tracking else 'disabled'})") + if enable_tracking: + print(f"Current OpenAI o3 pricing:") + print(f" Input tokens: ${self.INPUT_COST_PER_MILLION:.2f} per million") + print(f" Cached input: ${self.CACHED_INPUT_COST_PER_MILLION:.2f} per million") + print(f" Output tokens: ${self.OUTPUT_COST_PER_MILLION:.2f} per million") + + def calculate_cost(self, prompt_tokens: int, completion_tokens: int, cached_tokens: int = 0) -> Tuple[float, float, float, float]: + """ + Calculate cost for a single API call + + Args: + prompt_tokens: Number of input tokens + completion_tokens: Number of output tokens + cached_tokens: Number of cached input tokens + + Returns: + Tuple of (input_cost, output_cost, cached_cost, total_cost) + """ + if not self.enable_tracking: + return 0.0, 0.0, 0.0, 0.0 + + # Calculate costs + input_cost = (prompt_tokens * self.INPUT_COST_PER_MILLION) / 1_000_000 + output_cost = (completion_tokens * self.OUTPUT_COST_PER_MILLION) / 1_000_000 + cached_cost = (cached_tokens * self.CACHED_INPUT_COST_PER_MILLION) / 1_000_000 + total_cost = input_cost + output_cost + cached_cost + + return input_cost, output_cost, cached_cost, total_cost + + def track_api_call(self, operation_type: str, prompt_tokens: int, completion_tokens: int, + cached_tokens: int = 0, layout_name: str = "", master_id: str = "") -> ApiCallCost: + """ + Track a single API call and calculate its cost + + Args: + operation_type: Type of operation (e.g., 'panel_counting', 'detection', 'fallback') + prompt_tokens: Number of input tokens + completion_tokens: Number of output tokens + cached_tokens: Number of cached input tokens + layout_name: Name of the layout being processed + master_id: ID of the master image (if applicable) + + Returns: + ApiCallCost object with tracking information + """ + if not self.enable_tracking: + # Return dummy cost object when tracking is disabled + return ApiCallCost( + operation_type=operation_type, + timestamp=datetime.now().isoformat(), + token_usage=TokenUsage(0, 0, 0, 0), + input_cost=0.0, + output_cost=0.0, + cached_cost=0.0, + total_cost=0.0, + layout_name=layout_name, + master_id=master_id + ) + + # Create token usage object + token_usage = TokenUsage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + cached_tokens=cached_tokens + ) + + # Calculate costs + input_cost, output_cost, cached_cost, total_cost = self.calculate_cost( + prompt_tokens, completion_tokens, cached_tokens + ) + + # Create cost tracking object + api_call_cost = ApiCallCost( + operation_type=operation_type, + timestamp=datetime.now().isoformat(), + token_usage=token_usage, + input_cost=input_cost, + output_cost=output_cost, + cached_cost=cached_cost, + total_cost=total_cost, + layout_name=layout_name, + master_id=master_id + ) + + # Add to tracking + self.api_calls.append(api_call_cost) + + # Update session totals + self.total_input_tokens += prompt_tokens + self.total_output_tokens += completion_tokens + self.total_cached_tokens += cached_tokens + self.total_cost += total_cost + self.total_api_calls += 1 + + # Update layout-specific tracking + if layout_name: + self._update_layout_cost(layout_name, api_call_cost) + + return api_call_cost + + def _update_layout_cost(self, layout_name: str, api_call_cost: ApiCallCost): + """Update cost tracking for a specific layout""" + if layout_name not in self.layout_costs: + self.layout_costs[layout_name] = LayoutCostSummary( + layout_name=layout_name, + total_cost=0.0, + total_input_tokens=0, + total_output_tokens=0, + total_cached_tokens=0, + api_calls_made=0, + operation_types=[] + ) + + layout_summary = self.layout_costs[layout_name] + layout_summary.total_cost += api_call_cost.total_cost + layout_summary.total_input_tokens += api_call_cost.token_usage.prompt_tokens + layout_summary.total_output_tokens += api_call_cost.token_usage.completion_tokens + layout_summary.total_cached_tokens += api_call_cost.token_usage.cached_tokens + layout_summary.api_calls_made += 1 + + if api_call_cost.operation_type not in layout_summary.operation_types: + layout_summary.operation_types.append(api_call_cost.operation_type) + + def get_layout_cost_breakdown(self, layout_name: str) -> Optional[Dict]: + """ + Get detailed cost breakdown for a specific layout + + Args: + layout_name: Name of the layout + + Returns: + Dictionary with cost breakdown or None if layout not found + """ + if not self.enable_tracking or layout_name not in self.layout_costs: + return None + + layout_summary = self.layout_costs[layout_name] + return { + 'layout_name': layout_name, + 'total_cost': round(layout_summary.total_cost, 4), + 'cost_breakdown': { + 'input_tokens': layout_summary.total_input_tokens, + 'output_tokens': layout_summary.total_output_tokens, + 'cached_tokens': layout_summary.total_cached_tokens, + 'api_calls_made': layout_summary.api_calls_made, + 'operation_types': layout_summary.operation_types + }, + 'cost_per_token': { + 'input': round(layout_summary.total_cost / max(layout_summary.total_input_tokens, 1) * 1000, 4), + 'output': round(layout_summary.total_cost / max(layout_summary.total_output_tokens, 1) * 1000, 4) + } + } + + def get_session_summary(self) -> Dict: + """ + Get summary of costs for the entire session + + Returns: + Dictionary with session cost summary + """ + if not self.enable_tracking: + return { + 'tracking_enabled': False, + 'message': 'Cost tracking is disabled' + } + + session_duration = time.time() - self.session_start_time + layouts_processed = len(self.layout_costs) + + # Calculate averages + avg_cost_per_layout = self.total_cost / max(layouts_processed, 1) + avg_tokens_per_layout = (self.total_input_tokens + self.total_output_tokens) / max(layouts_processed, 1) + avg_api_calls_per_layout = self.total_api_calls / max(layouts_processed, 1) + + # Calculate cost efficiency + total_tokens = self.total_input_tokens + self.total_output_tokens + cost_per_thousand_tokens = (self.total_cost / max(total_tokens, 1)) * 1000 + + # Operation type breakdown + operation_counts = {} + for api_call in self.api_calls: + op_type = api_call.operation_type + operation_counts[op_type] = operation_counts.get(op_type, 0) + 1 + + return { + 'tracking_enabled': True, + 'session_totals': { + 'total_cost': round(self.total_cost, 4), + 'total_input_tokens': self.total_input_tokens, + 'total_output_tokens': self.total_output_tokens, + 'total_cached_tokens': self.total_cached_tokens, + 'total_api_calls': self.total_api_calls, + 'layouts_processed': layouts_processed, + 'session_duration_minutes': round(session_duration / 60, 2) + }, + 'averages': { + 'cost_per_layout': round(avg_cost_per_layout, 4), + 'tokens_per_layout': round(avg_tokens_per_layout, 1), + 'api_calls_per_layout': round(avg_api_calls_per_layout, 1), + 'cost_per_thousand_tokens': round(cost_per_thousand_tokens, 4) + }, + 'operation_breakdown': operation_counts, + 'pricing_info': { + 'input_cost_per_million': self.INPUT_COST_PER_MILLION, + 'output_cost_per_million': self.OUTPUT_COST_PER_MILLION, + 'cached_input_cost_per_million': self.CACHED_INPUT_COST_PER_MILLION + } + } + + def save_cost_report(self, filename: str = None) -> str: + """ + Save detailed cost report to JSON file + + Args: + filename: Output filename (optional) + + Returns: + Path to saved file + """ + if not self.enable_tracking: + print("Cost tracking is disabled, no report to save") + return "" + + if filename is None: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"cost_report_{timestamp}.json" + + # Ensure .json extension + if not filename.endswith('.json'): + filename += '.json' + + output_path = Path("results") / filename + output_path.parent.mkdir(exist_ok=True) + + report_data = { + 'session_summary': self.get_session_summary(), + 'layout_costs': {name: summary.to_dict() for name, summary in self.layout_costs.items()}, + 'detailed_api_calls': [call.to_dict() for call in self.api_calls], + 'generated_at': datetime.now().isoformat(), + 'pricing_model': 'OpenAI o3' + } + + with open(output_path, 'w') as f: + json.dump(report_data, f, indent=2) + + print(f"Cost report saved to: {output_path}") + return str(output_path) + + def print_cost_summary(self): + """Print a formatted cost summary to console""" + if not self.enable_tracking: + print("Cost tracking is disabled") + return + + summary = self.get_session_summary() + + print("\n" + "="*60) + print("COST TRACKING SUMMARY") + print("="*60) + + session = summary['session_totals'] + averages = summary['averages'] + + print(f"Total cost: ${session['total_cost']:.4f}") + print(f"Total tokens: {session['total_input_tokens'] + session['total_output_tokens']:,}") + print(f" - Input tokens: {session['total_input_tokens']:,}") + print(f" - Output tokens: {session['total_output_tokens']:,}") + print(f" - Cached tokens: {session['total_cached_tokens']:,}") + print(f"Total API calls: {session['total_api_calls']}") + print(f"Layouts processed: {session['layouts_processed']}") + + print(f"\nAverages:") + print(f" - Cost per layout: ${averages['cost_per_layout']:.4f}") + print(f" - Tokens per layout: {averages['tokens_per_layout']:.1f}") + print(f" - API calls per layout: {averages['api_calls_per_layout']:.1f}") + print(f" - Cost per 1K tokens: ${averages['cost_per_thousand_tokens']:.4f}") + + if summary['operation_breakdown']: + print(f"\nOperation breakdown:") + for op_type, count in summary['operation_breakdown'].items(): + print(f" - {op_type}: {count} calls") + + print("="*60) + + def estimate_monthly_cost(self, layouts_per_month: int = 300) -> Dict: + """ + Estimate monthly cost based on current usage patterns + + Args: + layouts_per_month: Estimated number of layouts to process per month + + Returns: + Dictionary with cost estimates + """ + if not self.enable_tracking or len(self.layout_costs) == 0: + return {'error': 'No cost data available for estimation'} + + avg_cost_per_layout = self.total_cost / len(self.layout_costs) + estimated_monthly_cost = avg_cost_per_layout * layouts_per_month + + return { + 'average_cost_per_layout': round(avg_cost_per_layout, 4), + 'layouts_per_month': layouts_per_month, + 'estimated_monthly_cost': round(estimated_monthly_cost, 2), + 'estimated_annual_cost': round(estimated_monthly_cost * 12, 2), + 'based_on_layouts': len(self.layout_costs) + } + + +def extract_token_usage_from_response(response) -> TokenUsage: + """ + Extract token usage from OpenAI API response + + Args: + response: OpenAI API response object + + Returns: + TokenUsage object with extracted token counts + """ + if not hasattr(response, 'usage') or response.usage is None: + # Fallback if usage information is not available + return TokenUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0, cached_tokens=0) + + usage = response.usage + return TokenUsage( + prompt_tokens=usage.prompt_tokens, + completion_tokens=usage.completion_tokens, + total_tokens=usage.total_tokens, + cached_tokens=getattr(usage, 'cached_tokens', 0) + ) + + +# Global cost calculator instance (can be configured from CLI) +cost_calculator = CostCalculator(enable_tracking=False) # Disabled by default \ No newline at end of file diff --git a/example_code/composite_image_finder.py b/example_code/composite_image_finder.py new file mode 100644 index 0000000..38645e5 --- /dev/null +++ b/example_code/composite_image_finder.py @@ -0,0 +1,311 @@ +import sys +import subprocess +import tkinter as tk +from tkinter import messagebox, filedialog, scrolledtext, ttk +import json +import csv +import threading +import time +import os + +try: + import cv2 + import numpy as np + from PIL import Image, ImageDraw, ImageFont +except ImportError: + root = tk.Tk() + root.withdraw() + if messagebox.askyesno("Dependency Error", "Required libraries are not installed. Would you like to try and install them now?"): + try: + subprocess.check_call([sys.executable, "-m", "pip", "install", "opencv-python", "numpy", "Pillow"]) + messagebox.showinfo("Success", "Dependencies installed successfully. Please restart the application.") + except Exception as e: + messagebox.showerror("Installation Failed", f"Could not install dependencies. Please run 'pip install -r requirements.txt' manually.\n\nError: {e}") + root.destroy() + sys.exit() + +class ToolTip: + def __init__(self, widget, text): + self.widget = widget + self.text = text + self.tooltip = None + self.widget.bind("", self.enter) + self.widget.bind("", self.leave) + + def enter(self, event=None): + x, y, _, _ = self.widget.bbox("insert") + x += self.widget.winfo_rootx() + 25 + y += self.widget.winfo_rooty() + 25 + + self.tooltip = tk.Toplevel(self.widget) + self.tooltip.wm_overrideredirect(True) + self.tooltip.wm_geometry(f"+{x}+{y}") + + label = tk.Label(self.tooltip, text=self.text, background="#ffffe0", relief="solid", borderwidth=1, wraplength=200) + label.pack() + + def leave(self, event=None): + if self.tooltip: + self.tooltip.destroy() + self.tooltip = None + +class MasterImageFinderApp: + def __init__(self, root): + self.root = root + self.root.title("Master Image Finder") + self.root.geometry("800x700") + + self.layouts_path = tk.StringVar() + self.masters_path = tk.StringVar() + self.upscale = tk.BooleanVar() + self.denoise = tk.BooleanVar() + self.sharpen = tk.BooleanVar() + self.contrast = tk.BooleanVar() + + tk.Label(root, text="Layouts Folder:").pack(pady=5) + tk.Entry(root, textvariable=self.layouts_path, width=100).pack(pady=5) + tk.Button(root, text="Select Layouts Folder", command=self.select_layouts_folder).pack(pady=5) + + tk.Label(root, text="Master Images Folder:").pack(pady=5) + tk.Entry(root, textvariable=self.masters_path, width=100).pack(pady=5) + tk.Button(root, text="Select Master Images Folder", command=self.select_masters_folder).pack(pady=5) + + enhancement_frame = tk.LabelFrame(root, text="Advanced Enhancement Options", padx=10, pady=10) + enhancement_frame.pack(pady=10, padx=10, fill="x") + + upscale_check = tk.Checkbutton(enhancement_frame, text="Smart Upscaling", variable=self.upscale) + upscale_check.grid(row=0, column=0, sticky="w") + ToolTip(upscale_check, "Enlarges small images to improve feature detection. Best for images under 400x400px.") + + denoise_check = tk.Checkbutton(enhancement_frame, text="Denoising", variable=self.denoise) + denoise_check.grid(row=0, column=1, sticky="w", padx=10) + ToolTip(denoise_check, "Removes digital noise and compression artifacts. Can be slow on large images.") + + sharpen_check = tk.Checkbutton(enhancement_frame, text="Sharpening", variable=self.sharpen) + sharpen_check.grid(row=1, column=0, sticky="w") + ToolTip(sharpen_check, "Enhances edges and fine details. Very fast.") + + contrast_check = tk.Checkbutton(enhancement_frame, text="Contrast Enhancement", variable=self.contrast) + contrast_check.grid(row=1, column=1, sticky="w", padx=10) + ToolTip(contrast_check, "Improves local contrast, making features in dark or washed-out areas more distinct.") + + self.run_button = tk.Button(root, text="Find Matches", command=self.run_finder_thread) + self.run_button.pack(pady=20) + + self.progress = ttk.Progressbar(root, orient="horizontal", length=780, mode="determinate") + self.progress.pack(pady=10) + + self.log_area = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=100, height=15) + self.log_area.pack(pady=10, padx=10) + + def select_layouts_folder(self): + self.layouts_path.set(filedialog.askdirectory()) + + def select_masters_folder(self): + self.masters_path.set(filedialog.askdirectory()) + + def log(self, message): + self.root.after(0, self._log, message) + + def _log(self, message): + self.log_area.insert(tk.END, message + "\n") + self.log_area.see(tk.END) + + def update_progress(self, value): + self.root.after(0, self.progress.config, {'value': value}) + + def run_finder_thread(self): + self.run_button.config(state=tk.DISABLED) + self.log_area.delete(1.0, tk.END) + self.progress['value'] = 0 + thread = threading.Thread(target=self.run_finder) + thread.start() + + def run_finder(self): + start_time = time.time() + layouts_dir = self.layouts_path.get() + masters_dir = self.masters_path.get() + + if not layouts_dir or not masters_dir: + self.log("Error: Please select both folders.") + self.run_button.config(state=tk.NORMAL) + return + + output_dir = os.path.join(layouts_dir, "reports") + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + self.log(f"Created output directory at {output_dir}") + + try: + results = self.find_matches(layouts_dir, masters_dir) + self.create_html_report(results, output_dir, layouts_dir, masters_dir) + end_time = time.time() + + total_matches = sum(1 for item in results if item['found']) + self.log("\n--- Process Complete! ---") + self.log(f"Found matches for {total_matches} out of {len(results)} layout images.") + self.log(f"Total time: {end_time - start_time:.2f} seconds") + self.log(f"Reports saved in: {output_dir}") + except Exception as e: + self.log(f"An error occurred: {e}") + finally: + self.run_button.config(state=tk.NORMAL) + + def find_matches(self, layouts_path, masters_path, min_good_matches=10, inlier_threshold_ratio=0.5): + akaze = cv2.AKAZE_create() + bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False) + clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) + sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]]) + + layout_images = [f for f in os.listdir(layouts_path) if f.endswith(('.png', '.jpg', '.jpeg'))] + master_images = [f for f in os.listdir(masters_path) if f.endswith(('.png', '.jpg', '.jpeg'))] + + results = [] + + master_descriptors = {} + self.log("Preprocessing master images...") + for i, master_image_name in enumerate(master_images): + self.log(f" - Preprocessing {i+1}/{len(master_images)}: {master_image_name}") + master_image_path = os.path.join(masters_path, master_image_name) + master_img = cv2.imread(master_image_path, cv2.IMREAD_GRAYSCALE) + if master_img is None: continue + kp, des = akaze.detectAndCompute(master_img, None) + if des is not None: master_descriptors[master_image_name] = (kp, des) + + total_layouts = len(layout_images) + self.log("\nProcessing layout images...") + for i, layout_image_name in enumerate(layout_images): + self.update_progress((i / total_layouts) * 100) + self.log(f" - Processing {i+1}/{total_layouts}: {layout_image_name}") + + layout_image_path = os.path.join(layouts_path, layout_image_name) + layout_img_gray = cv2.imread(layout_image_path, cv2.IMREAD_GRAYSCALE) + if layout_img_gray is None: + self.log(f" - Could not read layout image.") + continue + + enhancements_applied = [] + if self.upscale.get() and (layout_img_gray.shape[0] < 400 or layout_img_gray.shape[1] < 400): + layout_img_gray = cv2.resize(layout_img_gray, (0,0), fx=2.0, fy=2.0, interpolation=cv2.INTER_LANCZOS4) + enhancements_applied.append("Upscaled") + if self.denoise.get(): + layout_img_gray = cv2.fastNlMeansDenoising(layout_img_gray, None, 10, 7, 21) + enhancements_applied.append("Denoised") + if self.sharpen.get(): + layout_img_gray = cv2.filter2D(layout_img_gray, -1, sharpen_kernel) + enhancements_applied.append("Sharpened") + if self.contrast.get(): + layout_img_gray = clahe.apply(layout_img_gray) + enhancements_applied.append("Contrast Enhanced") + + if enhancements_applied: + self.log(f" - Applied: {', '.join(enhancements_applied)}") + + kp1, des1 = akaze.detectAndCompute(layout_img_gray, None) + if des1 is None: + self.log(f" - No features found.") + results.append({"layout": layout_image_name, "masters": [], "found": False, "enhancements": enhancements_applied}) + continue + + all_possible_matches = [] + for master_image_name, (kp2, des2) in master_descriptors.items(): + matches = bf.knnMatch(des1, des2, k=2) + good_matches = [m for m, n in matches if len(matches) > 1 and len(matches[0]) > 1 and m.distance < 0.75 * n.distance] + + if len(good_matches) > min_good_matches: + src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2) + dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2) + M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0) + if mask is not None: + inliers = np.sum(mask) + if inliers > min_good_matches: + all_possible_matches.append({"master": master_image_name, "inliers": int(inliers)}) + + if not all_possible_matches: + results.append({"layout": layout_image_name, "masters": [], "found": False, "enhancements": enhancements_applied}) + continue + + best_match = max(all_possible_matches, key=lambda x: x['inliers']) + max_inliers = best_match['inliers'] + + confident_matches = [best_match] + for match in all_possible_matches: + if match != best_match and match['inliers'] > max_inliers * inlier_threshold_ratio: + confident_matches.append(match) + + if confident_matches: + self.log(f" - Found {len(confident_matches)} confident master image(s).") + results.append({"layout": layout_image_name, "masters": confident_matches, "found": True, "enhancements": enhancements_applied}) + else: + results.append({"layout": layout_image_name, "masters": [], "found": False, "enhancements": enhancements_applied}) + + self.update_progress(100) + return results + + def create_html_report(self, data, output_path, layouts_abs_path, masters_abs_path): + report_path = os.path.join(output_path, 'report.html') + + total_matches = sum(1 for item in data if item['found']) + total_layouts = len(data) + + html = f""" +Image Match Report + +

Image Match Report

Found matches for {total_matches} out of {total_layouts} layout images.
""" + + html += "

Matched Layouts

" + for item in data: + if item['found']: + layout_img_path = os.path.join(layouts_abs_path, item['layout']).replace('\\', '/') + enhancements_str = f"

Enhancements: {', '.join(item['enhancements'])}

" if item['enhancements'] else "" + html += f"
" + html += f"

Layout Image

{item['layout']}

{enhancements_str}
" + html += "
" + for master_item in item['masters']: + master_img_path = os.path.join(masters_abs_path, master_item['master']).replace('\\', '/') + html += f"

{master_item['master']}

({master_item['inliers']} inliers)

" + html += "
" + + html += "

Unmatched Layouts

Please review these manually.

" + for item in data: + if not item['found']: + layout_img_path = os.path.join(layouts_abs_path, item['layout']).replace('\\', '/') + html += f"

{item['layout']}

" + + html += """
+ + +""" + + with open(report_path, 'w', encoding='utf-8') as f: + f.write(html) + self.log(f"HTML report saved to {report_path}") + +if __name__ == "__main__": + root = tk.Tk() + app = MasterImageFinderApp(root) + root.mainloop() diff --git a/fix_stalled_processing.py b/fix_stalled_processing.py new file mode 100644 index 0000000..dc434c9 --- /dev/null +++ b/fix_stalled_processing.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +""" +Fix for stalled parallel processing - reduces bottleneck by adjusting worker counts +""" + +import argparse +import sys +import os + +def main(): + print("🔧 Parallel Processing Stall Fix") + print("=" * 50) + + print("\n📊 ANALYSIS OF THE STALL:") + print("1. Inlier analysis queue has consistent 3 items (bottleneck)") + print("2. Each inlier analysis takes 60-167 seconds (very slow)") + print("3. 4 layout workers are waiting for 1 serial inlier analysis") + print("4. High swap usage (72.7%) triggering unnecessary memory pressure") + print("5. Memory pressure is reducing workers when it shouldn't") + + print("\n🚀 RECOMMENDED SOLUTIONS:") + print("\n1. IMMEDIATE FIX (restart with reduced workers):") + print(" python cli.py --all --hybrid --split-simple --refinement-mode \\") + print(" --inlier-threshold 0.15 --inlier-ratio-threshold 0.2 \\") + print(" --fallback-one-at-a-time --enable-cost-tracking --cost-report \\") + print(" --parallel-layouts --layout-workers 2") + print(" (Reduces from 4 to 2 layout workers to reduce queue pressure)") + + print("\n2. CONSERVATIVE FIX (single layout worker):") + print(" python cli.py --all --hybrid --split-simple --refinement-mode \\") + print(" --inlier-threshold 0.15 --inlier-ratio-threshold 0.2 \\") + print(" --fallback-one-at-a-time --enable-cost-tracking --cost-report \\") + print(" --parallel-layouts --layout-workers 1") + print(" (Essentially sequential with queue coordination)") + + print("\n3. OPTIMAL FIX (disable parallel layouts for now):") + print(" python cli.py --all --hybrid --split-simple --refinement-mode \\") + print(" --inlier-threshold 0.15 --inlier-ratio-threshold 0.2 \\") + print(" --fallback-one-at-a-time --enable-cost-tracking --cost-report") + print(" (Use original sequential processing - more reliable)") + + print("\n💡 TECHNICAL EXPLANATIONS:") + print("- The 72.7% swap usage is not necessarily bad if system is responsive") + print("- Inlier analysis is CPU/memory intensive and benefits from being serial") + print("- Queue bottleneck occurs when producers (layout workers) > consumers (1 inlier worker)") + print("- Each split analysis can take 60-167s, making parallelism counterproductive") + + print("\n⚙️ LONG-TERM IMPROVEMENTS IMPLEMENTED:") + print("- More lenient memory pressure thresholds") + print("- Queue pressure detection and automatic worker reduction") + print("- Stall detection with timeout handling") + print("- Better progress monitoring and diagnostics") + + print("\n🎯 RECOMMENDATION:") + print("For your current dataset, use option 1 (2 layout workers) or option 3 (sequential).") + print("The parallel implementation works but needs tuning for your specific workload.") + + print("\n" + "=" * 50) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/gemini_detector.py b/gemini_detector.py new file mode 100644 index 0000000..e938334 --- /dev/null +++ b/gemini_detector.py @@ -0,0 +1,1501 @@ +#!/usr/bin/env python3 +""" +Gemini Image Detector - Extracted ImageDetector class +Uses Google Gemini 2.5 Pro API to detect which master images appear in layout images +""" + +import os +import json +import time +from pathlib import Path +from typing import List, Dict, Optional +import google.generativeai as genai +from dotenv import load_dotenv +from PIL import Image, ImageEnhance +import tempfile +import numpy as np +import pickle +import cv2 +import concurrent.futures +import threading +import uuid +import multiprocessing +from functools import partial +from panel_splitter import PanelSplitter + + +def process_single_master_detection(layout_path, master_id, master_path, enable_greyscale, enable_contrast_enhancement, contrast_factor, safety_settings): + """ + Standalone function for processing a single master detection in a separate process. + This ensures complete isolation from other workers. + """ + try: + # Import and configure in each process to avoid shared state + import os + import json + import time + from pathlib import Path + from PIL import Image, ImageEnhance + import google.generativeai as genai + from dotenv import load_dotenv + import uuid + import threading + + # Load environment in this process + load_dotenv() + api_key = os.getenv('GEMINI_API_KEY') + if not api_key: + raise ValueError("GEMINI_API_KEY not found in environment variables") + + genai.configure(api_key=api_key) + model = genai.GenerativeModel('gemini-2.5-pro') + + # Create temp directory for this process + temp_path = Path("temp_processed") + temp_path.mkdir(exist_ok=True) + + def preprocess_image_local(image_path: str) -> str: + """Local preprocessing function for this process""" + if not enable_greyscale and not enable_contrast_enhancement: + return image_path + + try: + with Image.open(image_path) as img: + processed_img = img.copy() + + if enable_greyscale: + processed_img = processed_img.convert('L') + processed_img = processed_img.convert('RGB') + + if enable_contrast_enhancement: + contrast_enhancer = ImageEnhance.Contrast(processed_img) + processed_img = contrast_enhancer.enhance(contrast_factor) + + sharpness_enhancer = ImageEnhance.Sharpness(processed_img) + processed_img = sharpness_enhancer.enhance(1.3) + + # Thread-safe filename + thread_id = threading.current_thread().ident + unique_id = str(uuid.uuid4())[:8] + original_name = Path(image_path).stem + processed_path = temp_path / f"{original_name}_processed_{thread_id}_{unique_id}.jpg" + processed_img.save(processed_path, 'JPEG', quality=95) + + return str(processed_path) + + except Exception as e: + return image_path + + def upload_single_image_local(image_path: str): + """Local upload function for this process""" + max_retries = 3 + for attempt in range(max_retries): + try: + processed_path = preprocess_image_local(image_path) + uploaded_file = genai.upload_file(processed_path) + return uploaded_file + except Exception as e: + if attempt == max_retries - 1: + return None + import random + jitter = random.uniform(0.1, 0.5) + sleep_time = (0.5 * (attempt + 1)) + jitter + time.sleep(sleep_time) + return None + + def create_single_master_prompt_local(master_id: str) -> str: + """Local prompt creation function""" + prompt = f"""Analyze the layout image (the second image) and determine if the master image (the first image) appears in it. + +INSTRUCTIONS: +1. Compare the master image (first image) with the layout image (second image) +2. Look for EXACT matches where the model, clothing, and pose are IDENTICAL +3. The layout image may contain the master image in various forms: + - Complete/exact match + - Cropped version + - Scaled or resized version + - Rotated version + - Partially obscured + +4. Focus on visual similarity in terms of: + - Person/model appearance and pose (must be EXACTLY the same) + - Clothing details (colors, patterns, styles - must be EXACTLY the same) + - Background and composition + - Overall visual elements + +5. CRITICAL: Only return a positive result if the models, pose, and clothing are EXACTLY the same. + If there is ANY difference in clothing, model, or pose then return a negative result. + +Master Image ID: {master_id} + +Return your response as a JSON object with this exact format: +{{ + "match_found": true/false, + "master_id": "{master_id}", + "confidence": "high/medium/low", + "analysis": "Detailed explanation of your findings and reasoning" +}} + +IMPORTANT CONTEXT: This is a legitimate business application for marketing and e-commerce image matching. The images are product/marketing photos showing models in various clothing styles for retail purposes. This analysis is for content categorization in a business context and is completely benign. +""" + return prompt + + # Upload both images + master_file = upload_single_image_local(master_path) + layout_file = upload_single_image_local(layout_path) + + if not master_file or not layout_file: + raise Exception("Failed to upload images") + + # Create prompt and make API call + prompt = create_single_master_prompt_local(master_id) + + max_retries = 3 + for attempt in range(max_retries): + try: + response = model.generate_content([prompt, master_file, layout_file], safety_settings=safety_settings) + + if not response.candidates: + if attempt < max_retries - 1: + time.sleep((2 ** attempt) * 0.5) + continue + else: + return { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': 'No candidates returned', + 'error': 'Safety block' + } + + candidate = response.candidates[0] + + if candidate.finish_reason and candidate.finish_reason != 1: + if candidate.finish_reason in [3, 4, 5] and attempt < max_retries - 1: + time.sleep((2 ** attempt) * 0.5) + continue + else: + return { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': f'Finished with reason: {candidate.finish_reason}', + 'error': f'Finish reason: {candidate.finish_reason}' + } + + # Parse response + response_text = response.text.strip() + start_idx = response_text.find('{') + end_idx = response_text.rfind('}') + 1 + + if start_idx == -1 or end_idx == 0: + return { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': response_text, + 'error': 'No JSON found in response' + } + + json_str = response_text[start_idx:end_idx] + result = json.loads(json_str) + + # Validate result format + if 'match_found' not in result: + result['match_found'] = False + if 'master_id' not in result: + result['master_id'] = master_id + if 'confidence' not in result: + result['confidence'] = 'unknown' + if 'analysis' not in result: + result['analysis'] = response_text + + return result + + except Exception as e: + if attempt == max_retries - 1: + return { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': '', + 'error': str(e) + } + time.sleep((2 ** attempt) * 0.5) + + except Exception as e: + return { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': '', + 'error': str(e) + } + + +class ImageDetector: + def __init__(self, enable_greyscale=True, enable_contrast_enhancement=True, contrast_factor=1.5, refinement_mode=False, one_at_a_time_mode=False, max_concurrent_workers=5, split_mode=False): + """Initialize the image detector with Gemini API configuration""" + load_dotenv() + + api_key = os.getenv('GEMINI_API_KEY') + if not api_key: + raise ValueError("GEMINI_API_KEY not found in environment variables") + + genai.configure(api_key=api_key) + self.model = genai.GenerativeModel('gemini-2.5-pro') + + # Concurrency settings + self.max_concurrent_workers = max_concurrent_workers + self._progress_lock = threading.Lock() + + # Safety settings to prevent false positives for benign content + self.safety_settings = [ + { + "category": "HARM_CATEGORY_HARASSMENT", + "threshold": "BLOCK_NONE", + }, + { + "category": "HARM_CATEGORY_HATE_SPEECH", + "threshold": "BLOCK_NONE", + }, + { + "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", + "threshold": "BLOCK_NONE", + }, + { + "category": "HARM_CATEGORY_DANGEROUS_CONTENT", + "threshold": "BLOCK_NONE", + }, + ] + + print("Initialized with BLOCK_NONE safety settings for all categories to prevent false positives on benign marketing content.") + + # Image processing settings + self.enable_greyscale = enable_greyscale + self.enable_contrast_enhancement = enable_contrast_enhancement + self.contrast_factor = contrast_factor + self.refinement_mode = refinement_mode + self.one_at_a_time_mode = one_at_a_time_mode + + # Split mode configuration + self.split_mode = split_mode + if self.split_mode: + self.splitter = PanelSplitter(debug=True) + print("Split mode enabled: Will split multi-panel layouts before matching") + + # Paths + self.master_images_path = Path("master_images") + self.layouts_path = Path("layouts") + self.results_path = Path("results") + self.temp_path = Path("temp_processed") + + # Create directories + self.results_path.mkdir(exist_ok=True) + self.temp_path.mkdir(exist_ok=True) + + # Master images cache + self.master_images = {} + self.master_files = {} + self.uploaded_masters = None # Cache uploaded master files + + + def load_master_images(self) -> Dict[str, str]: + """Load all master images and create ID mapping using filenames""" + print("Loading master images...") + + master_files = list(self.master_images_path.glob("*.jpg")) + print(f"Found {len(master_files)} master images") + + for file_path in master_files: + # Use filename (without extension) as the master ID + master_id = file_path.stem + self.master_images[master_id] = str(file_path) + self.master_files[master_id] = file_path.name + + return self.master_images + + def match_split_to_masters(self, split_path: str, master_images: List[str]) -> List[Dict]: + """Match a split image to master images using basic OpenCV matching""" + matches = [] + + # For Gemini detector, we'll use a simple approach since it doesn't have + # the sophisticated inlier analysis like OpenAI detector + # This is a placeholder - in practice, you might want to use the OpenAI + # detector's inlier analysis or implement a similar approach + + # For now, return empty matches to avoid errors + # You could implement basic template matching or feature matching here + + return matches + + def preprocess_image(self, image_path: str) -> str: + """Preprocess image: convert to greyscale and enhance contrast - THREAD-SAFE VERSION""" + if not self.enable_greyscale and not self.enable_contrast_enhancement: + return image_path + + try: + # Open the image + with Image.open(image_path) as img: + processed_img = img.copy() + + # Convert to greyscale if enabled + if self.enable_greyscale: + processed_img = processed_img.convert('L') + # Convert back to RGB for consistency + processed_img = processed_img.convert('RGB') + + # Enhance contrast if enabled + if self.enable_contrast_enhancement: + # Global contrast enhancement + contrast_enhancer = ImageEnhance.Contrast(processed_img) + processed_img = contrast_enhancer.enhance(self.contrast_factor) + + # Edge contrast enhancement using sharpness + sharpness_enhancer = ImageEnhance.Sharpness(processed_img) + processed_img = sharpness_enhancer.enhance(1.3) + + # Save processed image with thread-safe filename + import threading + import uuid + thread_id = threading.current_thread().ident + unique_id = str(uuid.uuid4())[:8] + original_name = Path(image_path).stem + processed_path = self.temp_path / f"{original_name}_processed_{thread_id}_{unique_id}.jpg" + processed_img.save(processed_path, 'JPEG', quality=95) + + return str(processed_path) + + except Exception as e: + print(f"Warning: Failed to preprocess {Path(image_path).name}: {e}") + print(f"Using original image instead") + return image_path + + def upload_master_images_once(self): + """Upload all master images ONCE and cache them (with preprocessing)""" + if self.uploaded_masters is not None: + return self.uploaded_masters + + processing_msg = [] + if self.enable_greyscale: + processing_msg.append("greyscale conversion") + if self.enable_contrast_enhancement: + processing_msg.append("contrast enhancement") + + if processing_msg: + print(f"Uploading master images with {' and '.join(processing_msg)} (one-time operation)...") + else: + print("Uploading master images (one-time operation)...") + + uploaded_masters = [] + master_paths = list(self.master_images.values()) + + for i, path in enumerate(master_paths): + try: + # Preprocess the image + processed_path = self.preprocess_image(path) + + # Upload the processed image + uploaded_file = genai.upload_file(processed_path) + uploaded_masters.append(uploaded_file) + print(f"Uploaded master {i+1}/{len(master_paths)}: {Path(path).name}") + + # Small delay to avoid rate limiting + if i < len(master_paths) - 1: + time.sleep(0.1) + + except Exception as e: + print(f"Error uploading {Path(path).name}: {e}") + # Retry once after delay + try: + time.sleep(1.0) + processed_path = self.preprocess_image(path) + uploaded_file = genai.upload_file(processed_path) + uploaded_masters.append(uploaded_file) + print(f"Retry successful for {Path(path).name}") + except Exception as e2: + print(f"Failed to upload {Path(path).name}: {e2}") + + self.uploaded_masters = uploaded_masters + print(f"✓ Successfully uploaded {len(uploaded_masters)} master images") + return uploaded_masters + + def upload_single_image(self, image_path: str) -> Optional: + """Upload a single image with preprocessing and retry logic""" + max_retries = 3 + for attempt in range(max_retries): + try: + # Preprocess the image + processed_path = self.preprocess_image(image_path) + + # Upload the processed image + uploaded_file = genai.upload_file(processed_path) + return uploaded_file + except Exception as e: + if attempt == max_retries - 1: + print(f"Failed to upload {Path(image_path).name}: {e}") + return None + print(f"Upload retry {attempt + 1}/{max_retries} for {Path(image_path).name}: {e}") + # Progressive backoff with jitter to avoid thundering herd + import random + jitter = random.uniform(0.1, 0.5) + sleep_time = (0.5 * (attempt + 1)) + jitter + time.sleep(sleep_time) + return None + + def _upload_single_image_threadsafe(self, image_path: str, thread_genai) -> Optional: + """Thread-safe version of upload_single_image using thread-local client""" + max_retries = 3 + for attempt in range(max_retries): + try: + # Preprocess the image + processed_path = self.preprocess_image(image_path) + + # Upload the processed image using thread-local client + uploaded_file = thread_genai.upload_file(processed_path) + return uploaded_file + except Exception as e: + if attempt == max_retries - 1: + return None + # Progressive backoff with jitter to avoid thundering herd + import random + jitter = random.uniform(0.1, 0.5) + sleep_time = (0.5 * (attempt + 1)) + jitter + time.sleep(sleep_time) + return None + + def _make_robust_api_call_threadsafe(self, thread_model, prompt, files, operation_name="API call", max_retries=3): + """Thread-safe version of make_robust_api_call using thread-local model""" + last_error = None + + for attempt in range(max_retries): + try: + response = thread_model.generate_content([prompt] + files, safety_settings=self.safety_settings) + + # Check for blocked content or safety issues immediately + if not response.candidates: + error_msg = f"No candidates returned" + if hasattr(response, 'prompt_feedback'): + error_msg += f" - Prompt feedback: {response.prompt_feedback}" + + # This is a safety block, should retry + if attempt < max_retries - 1: + wait_time = (2 ** attempt) * 0.5 + time.sleep(wait_time) + continue + else: + return { + 'success': False, + 'error_type': 'safety_block', + 'error_message': error_msg, + 'response': response + } + + candidate = response.candidates[0] + + # Check finish reason + if candidate.finish_reason and candidate.finish_reason != 1: # 1 = STOP (normal completion) + error_msg = f"Request finished with reason: {candidate.finish_reason}" + if hasattr(response, 'prompt_feedback'): + error_msg += f" - Prompt feedback: {response.prompt_feedback}" + if hasattr(candidate, 'safety_ratings'): + error_msg += f" - Safety ratings: {candidate.safety_ratings}" + + # Check if this is a retryable safety issue + if candidate.finish_reason in [3, 4, 5]: # SAFETY, RECITATION, OTHER safety-related reasons + if attempt < max_retries - 1: + wait_time = (2 ** attempt) * 0.5 + time.sleep(wait_time) + continue + else: + return { + 'success': False, + 'error_type': 'safety_finish_reason', + 'error_message': error_msg, + 'response': response + } + else: + # Non-safety related finish reason, don't retry + return { + 'success': False, + 'error_type': 'other_finish_reason', + 'error_message': error_msg, + 'response': response + } + + # Success case + return { + 'success': True, + 'response': response, + 'text': response.text.strip() + } + + except Exception as e: + last_error = e + error_str = str(e) + + # Check if this looks like a safety/blocking error + is_safety_error = any(keyword in error_str.lower() for keyword in [ + 'safety', 'blocked', 'filtered', 'response.text', 'response.parts', + 'finish_reason', 'candidates', 'prompt_feedback' + ]) + + if is_safety_error and attempt < max_retries - 1: + wait_time = (2 ** attempt) * 0.5 + time.sleep(wait_time) + continue + elif attempt < max_retries - 1: + # Other errors, also retry but with different messaging + wait_time = (2 ** attempt) * 0.5 + time.sleep(wait_time) + continue + else: + # Final attempt failed + return { + 'success': False, + 'error_type': 'exception', + 'error_message': str(e), + 'exception': e + } + + # This shouldn't be reached, but just in case + return { + 'success': False, + 'error_type': 'max_retries_exceeded', + 'error_message': f"Max retries ({max_retries}) exceeded", + 'last_error': str(last_error) if last_error else "Unknown error" + } + + def create_detection_prompt(self, master_ids: List[str]) -> str: + """Create the prompt for image detection""" + prompt = """Analyze the layout image (the last image provided) and identify which of the master images appear in it. + +INSTRUCTIONS: +1. Examine the layout image carefully to identify any image segments or components within it +2. For each segment you identify, look for matches in the provided master images +3. Look for all types of matches including: + - Exact matches (complete images) + - Cropped versions (partial images) + - Scaled or resized versions + - Rotated versions + - Images that may be partially obscured + +4. Focus on visual similarity in terms of: + - Person/model appearance and pose + - Clothing details (colors, patterns, styles) + - Background and composition + - Overall visual elements + +5. Be conservative and only include matches where you have reasonable confidence + +6. Only return a positive result (master image appears in the layout) if the models, pose, and clothing are EXACTLY the same. If there is ANY difference in clothing, model, or pose then return a negative result. + +Master Images with IDs: +""" + + for i, master_id in enumerate(master_ids): + prompt += f"Image {i+1}: ID = {master_id}\n" + + prompt += """ +The final image is the LAYOUT IMAGE to analyze. + +Return your response as a JSON object with this exact format: +{ + "detected_masters": ["master_id_1", "master_id_2"], + "analysis": "Brief explanation of what you found and your matching reasoning" +} + +Only include master IDs in "detected_masters" where you completely sure of a match. If no clear matches are found, return an empty array. + +IMPORTANT CONTEXT: This is a legitimate business application for marketing and e-commerce image matching. The images are product/marketing photos that may show models in various clothing styles for retail purposes. This prompt and task are completely benign and should not trigger safety filters. This is standard e-commerce image comparison work, not dangerous, offensive, or inappropriate content. The comparison is purely for business/marketing layout matching purposes. +""" + + return prompt + + + + + + def create_simple_detection_prompt(self, master_ids: List[str]) -> str: + """Create a simplified prompt for cases where the main prompt gets blocked""" + prompt = """Compare the layout image (the last image) with the master images provided and identify which master images appear in the layout. + +Look for visual matches including: +- Complete images that match +- Partial/cropped versions +- Resized versions + +This is for e-commerce product image matching. + +Master Images with IDs: +""" + + for i, master_id in enumerate(master_ids): + prompt += f"Image {i+1}: ID = {master_id}\n" + + prompt += """ +The final image is the LAYOUT IMAGE to analyze. + +Return your response as a JSON object: +{ + "detected_masters": ["master_id_1", "master_id_2", ...], + "analysis": "Brief explanation" +} + +Only include master IDs that clearly appear in the layout image. +""" + + return prompt + + def make_robust_api_call(self, prompt, files, operation_name="API call", max_retries=3): + """Make a robust API call with comprehensive error detection and retry logic""" + last_error = None + + for attempt in range(max_retries): + try: + response = self.model.generate_content([prompt] + files, safety_settings=self.safety_settings) + + # Check for blocked content or safety issues immediately + if not response.candidates: + error_msg = f"No candidates returned" + if hasattr(response, 'prompt_feedback'): + error_msg += f" - Prompt feedback: {response.prompt_feedback}" + + # This is a safety block, should retry + if attempt < max_retries - 1: + wait_time = (2 ** attempt) * 0.5 + print(f" Safety block detected on attempt {attempt + 1}/{max_retries} for {operation_name}, retrying in {wait_time}s...") + time.sleep(wait_time) + continue + else: + return { + 'success': False, + 'error_type': 'safety_block', + 'error_message': error_msg, + 'response': response + } + + candidate = response.candidates[0] + + # Check finish reason + if candidate.finish_reason and candidate.finish_reason != 1: # 1 = STOP (normal completion) + error_msg = f"Request finished with reason: {candidate.finish_reason}" + if hasattr(response, 'prompt_feedback'): + error_msg += f" - Prompt feedback: {response.prompt_feedback}" + if hasattr(candidate, 'safety_ratings'): + error_msg += f" - Safety ratings: {candidate.safety_ratings}" + + # Check if this is a retryable safety issue + if candidate.finish_reason in [3, 4, 5]: # SAFETY, RECITATION, OTHER safety-related reasons + if attempt < max_retries - 1: + wait_time = (2 ** attempt) * 0.5 + print(f" Safety/content issue detected on attempt {attempt + 1}/{max_retries} for {operation_name}, retrying in {wait_time}s...") + time.sleep(wait_time) + continue + else: + return { + 'success': False, + 'error_type': 'safety_finish_reason', + 'error_message': error_msg, + 'response': response + } + else: + # Non-safety related finish reason, don't retry + return { + 'success': False, + 'error_type': 'other_finish_reason', + 'error_message': error_msg, + 'response': response + } + + # Success case + return { + 'success': True, + 'response': response, + 'text': response.text.strip() + } + + except Exception as e: + last_error = e + error_str = str(e) + + # Check if this looks like a safety/blocking error + is_safety_error = any(keyword in error_str.lower() for keyword in [ + 'safety', 'blocked', 'filtered', 'response.text', 'response.parts', + 'finish_reason', 'candidates', 'prompt_feedback' + ]) + + if is_safety_error and attempt < max_retries - 1: + wait_time = (2 ** attempt) * 0.5 + print(f" Safety-related error on attempt {attempt + 1}/{max_retries} for {operation_name}, retrying in {wait_time}s: {e}") + time.sleep(wait_time) + continue + elif attempt < max_retries - 1: + # Other errors, also retry but with different messaging + wait_time = (2 ** attempt) * 0.5 + print(f" API error on attempt {attempt + 1}/{max_retries} for {operation_name}, retrying in {wait_time}s: {e}") + time.sleep(wait_time) + continue + else: + # Final attempt failed + return { + 'success': False, + 'error_type': 'exception', + 'error_message': str(e), + 'exception': e + } + + # This shouldn't be reached, but just in case + return { + 'success': False, + 'error_type': 'max_retries_exceeded', + 'error_message': f"Max retries ({max_retries}) exceeded", + 'last_error': str(last_error) if last_error else "Unknown error" + } + + def is_cen_image(self, master_id: str) -> bool: + """Check if a master image ID represents a CEN (censored) image""" + return '_CEN' in master_id + + def find_corresponding_non_cen_image(self, cen_master_id: str) -> Optional[str]: + """Find the corresponding non-CEN image for a given CEN master ID""" + if not self.is_cen_image(cen_master_id): + return None + + # Transform CEN filename to non-CEN filename + # Example: "1011A_1011A_1011_01_CEN" -> "1011A_1011_01" + parts = cen_master_id.split('_') + if len(parts) >= 4 and parts[-1] == 'CEN': + # Remove the middle duplicate part and _CEN suffix + # Pattern: prefix_prefix_middle_suffix_CEN -> prefix_middle_suffix + if len(parts) >= 5: + non_cen_id = f"{parts[0]}_{parts[2]}_{parts[3]}" + else: + # Fallback: just remove _CEN + non_cen_id = '_'.join(parts[:-1]) + + # Check if this non-CEN image exists in our master images + if non_cen_id in self.master_images: + return non_cen_id + + return None + + def create_censorship_detection_prompt(self) -> str: + """Create prompt for detecting if a layout image contains censored content""" + prompt = """Analyze this layout image to determine if it contains censored or uncensored content. + +TASK: Determine whether the images in this layout are censored (covered) or uncensored (more exposed). + +CENSORSHIP INDICATORS TO LOOK FOR: +1. **Clothing Coverage**: + - Long sleeves vs. sleeveless/short sleeves + - Full-length pants/skirts vs. shorts or shorter garments + - High necklines vs. lower necklines + +2. **Skin Coverage**: + - Arms: Fully covered vs. bare arms + - Legs: Fully covered vs. exposed legs/thighs + - Torso: Additional covering vs. more exposed areas + +3. **Added Elements**: + - Opaque or semi-transparent overlay layers covering skin + - Additional fabric or clothing elements that appear to cover exposed areas + - Digital modifications that add coverage + +CLASSIFICATION: +- **CENSORED**: If models show significant additional clothing coverage, long sleeves, full pants/skirts, or digital overlays covering skin +- **UNCENSORED**: If models show more exposed skin, shorter garments, bare arms/legs, or natural clothing without added coverage + +Return your response as a JSON object with this exact format: +{{ + "is_censored": true/false, + "confidence": "high/medium/low", + "analysis": "Detailed explanation of the coverage patterns observed and reasoning for the classification", + "coverage_details": "Specific description of clothing and skin coverage in the layout" +}} + +Be precise and focus on the actual clothing and coverage patterns visible in the image. + +IMPORTANT CONTEXT: This is a legitimate business application for marketing and e-commerce image classification. The images are product/marketing photos showing models in various clothing styles for retail purposes. This analysis is for content categorization in a business context and is completely benign. +""" + return prompt + + def detect_layout_censorship(self, layout_path: str) -> Dict: + """Detect if a layout image contains censored or uncensored content""" + try: + # Upload layout image + layout_file = self.upload_single_image(layout_path) + if not layout_file: + raise Exception("Failed to upload layout image") + + # Create censorship detection prompt + prompt = self.create_censorship_detection_prompt() + + # Make API call with robust retry logic + api_result = self.make_robust_api_call(prompt, [layout_file], "censorship detection") + + # Handle API call failure + if not api_result['success']: + return { + 'is_censored': True, # Default to censored if API fails + 'confidence': 'unknown', + 'analysis': f'API call failed: {api_result["error_message"]}', + 'error': f"{api_result['error_type']}: {api_result['error_message']}" + } + + # Parse response + response_text = api_result['text'] + + # Extract JSON from response + try: + start_idx = response_text.find('{') + end_idx = response_text.rfind('}') + 1 + + if start_idx == -1 or end_idx == 0: + raise ValueError("No JSON found in response") + + json_str = response_text[start_idx:end_idx] + result = json.loads(json_str) + + # Validate result format + if 'is_censored' not in result: + result['is_censored'] = True # Default to censored if unclear + if 'confidence' not in result: + result['confidence'] = 'unknown' + if 'analysis' not in result: + result['analysis'] = response_text + + return result + + except json.JSONDecodeError as e: + return { + 'is_censored': True, # Default to censored if parsing fails + 'confidence': 'unknown', + 'analysis': response_text, + 'error': f'JSON decode error: {e}' + } + + except Exception as e: + return { + 'is_censored': True, # Default to censored if error + 'confidence': 'unknown', + 'analysis': '', + 'error': str(e) + } + + def apply_cen_refinement_to_results(self, layout_path: str, initial_results: Dict) -> Dict: + """Apply CEN refinement to initial detection results""" + layout_name = Path(layout_path).name + detected_masters = initial_results.get('detected_masters', []) + + # First, deduplicate the detected masters to avoid processing duplicates + original_count = len(detected_masters) + detected_masters = self.deduplicate_master_matches(detected_masters) + if len(detected_masters) != original_count: + duplicates_removed = original_count - len(detected_masters) + print(f" Removed {duplicates_removed} duplicate master(s) before CEN refinement") + + # Find CEN images in the results + cen_images = [mid for mid in detected_masters if self.is_cen_image(mid)] + + if not cen_images: + # No CEN images found, return original results + return initial_results + + print(f" Refining {len(cen_images)} CEN matches for {layout_name}") + print(f" Analyzing layout to determine censorship level...") + + # Detect if the layout is censored or uncensored + censorship_result = self.detect_layout_censorship(layout_path) + is_layout_censored = censorship_result.get('is_censored', True) + confidence = censorship_result.get('confidence', 'unknown') + + print(f" Layout analysis: {'CENSORED' if is_layout_censored else 'UNCENSORED'} (confidence: {confidence})") + + refined_masters = [] + refinement_details = [] + changes_made = 0 + + # Process each detected image + for master_id in detected_masters: + if self.is_cen_image(master_id): + # This is a CEN image + non_cen_id = self.find_corresponding_non_cen_image(master_id) + + if not is_layout_censored and non_cen_id: + # Layout is uncensored, switch to non-CEN version + refined_masters.append(non_cen_id) + refinement_details.append({ + 'original_cen_match': master_id, + 'non_cen_alternative': non_cen_id, + 'final_choice': non_cen_id, + 'confidence': confidence, + 'analysis': f"Layout determined to be uncensored, switched from {master_id} to {non_cen_id}", + 'changed': True, + 'reason': 'layout_uncensored' + }) + changes_made += 1 + print(f" → Changed {master_id} to {non_cen_id} (layout is uncensored)") + else: + # Layout is censored or no non-CEN alternative, keep CEN version + refined_masters.append(master_id) + reason = 'layout_censored' if is_layout_censored else 'no_non_cen_alternative' + refinement_details.append({ + 'original_cen_match': master_id, + 'non_cen_alternative': non_cen_id, + 'final_choice': master_id, + 'confidence': confidence, + 'analysis': f"Kept {master_id} - layout is censored or no non-CEN alternative available", + 'changed': False, + 'reason': reason + }) + print(f" → Kept {master_id} ({'layout is censored' if is_layout_censored else 'no non-CEN alternative'})") + else: + # This is not a CEN image, keep it as-is + refined_masters.append(master_id) + + print(f" Summary: {changes_made} CEN images changed to non-CEN versions") + + # Apply deduplication to refined masters in case refinement introduced duplicates + original_refined = refined_masters[:] + refined_masters = self.deduplicate_master_matches(refined_masters) + + if len(refined_masters) != len(original_refined): + post_refinement_duplicates = len(original_refined) - len(refined_masters) + print(f" Post-refinement deduplication: Removed {post_refinement_duplicates} duplicate(s)") + + # Update results with refinement information + refined_results = initial_results.copy() + refined_results['detected_masters'] = refined_masters + refined_results['detected_master_ids'] = refined_masters # Update both fields for consistency + refined_results['detected_master_filenames'] = [f"{mid}.jpg" for mid in refined_masters ] + refined_results['refinement_applied'] = True + refined_results['refinement_details'] = refinement_details + refined_results['censorship_analysis'] = censorship_result + refined_results['original_detection_count'] = len(detected_masters) + refined_results['refined_detection_count'] = len(refined_masters) + refined_results['changes_made'] = changes_made + + return refined_results + + + + + + def detect_images_in_layout(self, layout_path: str, layout_index: int, total_layouts: int) -> Dict: + """Detect which master images appear in a single layout image""" + layout_name = Path(layout_path).name + print(f"Processing {layout_index}/{total_layouts}: {layout_name}") + + try: + # Upload only the layout image (masters already uploaded) + layout_file = self.upload_single_image(layout_path) + if not layout_file: + raise Exception("Failed to upload layout image") + + # Combine pre-uploaded masters with the layout + all_files = self.uploaded_masters + [layout_file] + + # Create prompt + master_ids = list(self.master_images.keys()) + prompt = self.create_detection_prompt(master_ids) + + # Try main prompt first, then fallback to simple prompt if blocked + api_result = self.make_robust_api_call(prompt, all_files, f"detection for {layout_name}") + + # If main prompt failed due to safety issues, try simple prompt + if not api_result['success'] and api_result['error_type'] in ['safety_block', 'safety_finish_reason']: + print(f" Main prompt blocked for {layout_name}, trying simplified prompt...") + simple_prompt = self.create_simple_detection_prompt(master_ids) + api_result = self.make_robust_api_call(simple_prompt, all_files, f"simple detection for {layout_name}") + + # Handle API call failure + if not api_result['success']: + error_msg = api_result['error_message'] + print(f"API call failed for {layout_name}: {error_msg}") + return { + 'detected_masters': [], + 'analysis': f'API call failed: {error_msg}', + 'error': f"{api_result['error_type']}: {error_msg}", + 'retry_count': 3 # Max retries were attempted + } + + # Parse response + response_text = api_result['text'] + + # Extract JSON from response + try: + start_idx = response_text.find('{') + end_idx = response_text.rfind('}') + 1 + + if start_idx == -1 or end_idx == 0: + raise ValueError("No JSON found in response") + + json_str = response_text[start_idx:end_idx] + result = json.loads(json_str) + + # Validate result format + if 'detected_masters' not in result: + result['detected_masters'] = [] + if 'analysis' not in result: + result['analysis'] = response_text + + # Deduplicate detected masters + original_detected = result['detected_masters'][:] + result['detected_masters'] = self.deduplicate_master_matches(result['detected_masters']) + + # Track deduplication if any duplicates were removed + if len(result['detected_masters']) != len(original_detected): + duplicates_removed = len(original_detected) - len(result['detected_masters']) + result['deduplication_applied'] = True + result['duplicates_removed'] = duplicates_removed + result['original_detected_masters'] = original_detected + print(f" Deduplication: Removed {duplicates_removed} duplicate master(s) from {layout_name}") + + # Log completion + detected_count = len(result['detected_masters']) + print(f"✓ Completed {layout_name} - Found {detected_count} matches") + + return result + + except json.JSONDecodeError as e: + print(f"JSON decode error for {layout_name}: {e}") + return { + 'detected_masters': [], + 'analysis': response_text, + 'error': f'JSON decode error: {e}' + } + + except Exception as e: + error_msg = f"Error analyzing {layout_name}: {e}" + + # Check if this was a safety-related error + if "response.text" in str(e) or "response.parts" in str(e): + error_msg += "\nThis appears to be a safety filter blocking issue." + if hasattr(e, 'response') and e.response: + if hasattr(e.response, 'prompt_feedback'): + error_msg += f"\nPrompt feedback: {e.response.prompt_feedback}" + if e.response.candidates: + candidate = e.response.candidates[0] + if hasattr(candidate, 'safety_ratings'): + error_msg += f"\nSafety ratings: {candidate.safety_ratings}" + if hasattr(candidate, 'finish_reason'): + error_msg += f"\nFinish reason: {candidate.finish_reason}" + + print(error_msg) + return { + 'detected_masters': [], + 'analysis': '', + 'error': str(e) + } + + def create_single_master_prompt(self, master_id: str) -> str: + """Create prompt for checking if a single master image appears in the layout""" + prompt = f"""Analyze the layout image (the second image) and determine if the master image (the first image) appears in it. + +INSTRUCTIONS: +1. Compare the master image (first image) with the layout image (second image) +2. Look for EXACT matches where the model, clothing, and pose are IDENTICAL +3. The layout image may contain the master image in various forms: + - Complete/exact match + - Cropped version + - Scaled or resized version + - Rotated version + - Partially obscured + +4. Focus on visual similarity in terms of: + - Person/model appearance and pose (must be EXACTLY the same) + - Clothing details (colors, patterns, styles - must be EXACTLY the same) + - Background and composition + - Overall visual elements + +5. CRITICAL: Only return a positive result if the models, pose, and clothing are EXACTLY the same. + If there is ANY difference in clothing, model, or pose then return a negative result. + +Master Image ID: {master_id} + +Return your response as a JSON object with this exact format: +{{ + "match_found": true/false, + "master_id": "{master_id}", + "confidence": "high/medium/low", + "analysis": "Detailed explanation of your findings and reasoning" +}} + +IMPORTANT CONTEXT: This is a legitimate business application for marketing and e-commerce image matching. The images are product/marketing photos showing models in various clothing styles for retail purposes. This analysis is for content categorization in a business context and is completely benign. +""" + return prompt + + def detect_single_master_in_layout(self, layout_path: str, master_id: str, master_index: int, total_masters: int) -> Dict: + """Check if a single master image appears in the layout image - THREAD-SAFE VERSION""" + layout_name = Path(layout_path).name + master_path = self.master_images[master_id] + + try: + # Create thread-local API client to avoid shared state issues + import google.generativeai as thread_genai + api_key = os.getenv('GEMINI_API_KEY') + thread_genai.configure(api_key=api_key) + thread_model = thread_genai.GenerativeModel('gemini-2.5-pro') + + # Upload both images using thread-local client + master_file = self._upload_single_image_threadsafe(master_path, thread_genai) + layout_file = self._upload_single_image_threadsafe(layout_path, thread_genai) + + if not master_file or not layout_file: + raise Exception("Failed to upload images") + + # Create prompt for single master matching + prompt = self.create_single_master_prompt(master_id) + + # Make API call with thread-local model + api_result = self._make_robust_api_call_threadsafe( + thread_model, + prompt, + [master_file, layout_file], + f"single master detection: {master_id} in {layout_name}" + ) + + # Handle API call failure + if not api_result['success']: + return { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': f'API call failed: {api_result["error_message"]}', + 'error': f"{api_result['error_type']}: {api_result['error_message']}" + } + + # Parse response + response_text = api_result['text'] + + # Extract JSON from response + try: + start_idx = response_text.find('{') + end_idx = response_text.rfind('}') + 1 + + if start_idx == -1 or end_idx == 0: + raise ValueError("No JSON found in response") + + json_str = response_text[start_idx:end_idx] + result = json.loads(json_str) + + # Validate result format + if 'match_found' not in result: + result['match_found'] = False + if 'master_id' not in result: + result['master_id'] = master_id + if 'confidence' not in result: + result['confidence'] = 'unknown' + if 'analysis' not in result: + result['analysis'] = response_text + + return result + + except json.JSONDecodeError as e: + return { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': response_text, + 'error': f'JSON decode error: {e}' + } + + except Exception as e: + return { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': '', + 'error': str(e) + } + + def detect_images_in_layout_one_at_a_time(self, layout_path: str, layout_index: int, total_layouts: int) -> Dict: + """Detect which master images appear in a layout by checking each master individually using process-based concurrency""" + layout_name = Path(layout_path).name + print(f"Processing {layout_index}/{total_layouts}: {layout_name} (Process-based one-at-a-time mode)") + + master_ids = list(self.master_images.keys()) + total_masters = len(master_ids) + detected_masters = [] + detailed_results = [] + + print(f" Checking {total_masters} masters using {self.max_concurrent_workers} concurrent processes...") + + # Prepare arguments for process pool + tasks = [] + for master_id in master_ids: + master_path = self.master_images[master_id] + task_args = ( + layout_path, + master_id, + master_path, + self.enable_greyscale, + self.enable_contrast_enhancement, + self.contrast_factor, + self.safety_settings + ) + tasks.append(task_args) + + # Use ProcessPoolExecutor for true isolation + with concurrent.futures.ProcessPoolExecutor(max_workers=self.max_concurrent_workers) as executor: + # Submit all tasks + future_to_master = { + executor.submit(process_single_master_detection, *task_args): task_args[1] + for task_args in tasks + } + + completed_count = 0 + # Collect results as they complete + for future in concurrent.futures.as_completed(future_to_master): + master_id = future_to_master[future] + completed_count += 1 + + try: + result = future.result() + detailed_results.append(result) + + # If match found, add to detected masters + if result.get('match_found', False): + detected_masters.append(master_id) + confidence = result.get('confidence', 'unknown') + print(f" {completed_count}/{total_masters}: ✓ MATCH found for {master_id} (confidence: {confidence})") + else: + if 'error' in result: + print(f" {completed_count}/{total_masters}: Error checking {master_id}: {result['error']}") + else: + print(f" {completed_count}/{total_masters}: No match for {master_id}") + + except Exception as e: + print(f" {completed_count}/{total_masters}: Process error checking {master_id}: {e}") + # Add error result to maintain consistency + error_result = { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': '', + 'error': str(e) + } + detailed_results.append(error_result) + + # Sort detailed_results by master_id to maintain consistent ordering + detailed_results.sort(key=lambda x: x.get('master_id', '')) + + # Deduplicate detected masters (shouldn't be needed in one-at-a-time mode, but for safety) + original_detected = detected_masters[:] + detected_masters = self.deduplicate_master_matches(detected_masters) + + if len(detected_masters) != len(original_detected): + duplicates_removed = len(original_detected) - len(detected_masters) + print(f" Deduplication: Removed {duplicates_removed} duplicate master(s)") + + detected_count = len(detected_masters) + print(f"✓ Completed {layout_name} - Found {detected_count} matches using {self.max_concurrent_workers} concurrent processes") + + return { + 'detected_masters': detected_masters, + 'detected_master_ids': detected_masters, + 'detected_master_filenames': [f"{mid}.jpg" for mid in detected_masters ], + 'analysis': f'Process-based one-at-a-time analysis completed. Found {detected_count} exact matches out of {total_masters} masters checked using {self.max_concurrent_workers} concurrent processes.', + 'detailed_results': detailed_results, + 'processing_mode': 'process_based_one_at_a_time', + 'total_masters_checked': total_masters, + 'concurrent_workers': self.max_concurrent_workers, + 'deduplication_applied': len(detected_masters) != len(original_detected), + 'duplicates_removed': len(original_detected) - len(detected_masters) if len(detected_masters) != len(original_detected) else 0, + 'original_detected_masters': original_detected + } + + def process_all_layouts(self, limit: Optional[int] = None, specific_file: Optional[str] = None) -> Dict: + """Process all layout images sequentially""" + if self.one_at_a_time_mode: + mode_desc = "One-at-a-time Mode" + else: + mode_desc = "Multi Master Mode" + + if self.refinement_mode: + mode_desc += " with CEN Refinement" + + print(f"Starting sequential batch processing ({mode_desc})...") + + # Load master images + self.load_master_images() + + # Upload all master images ONCE (only for multi-master mode) + if not self.one_at_a_time_mode: + self.upload_master_images_once() + + # Get layout files + if specific_file: + # Process only the specific file + layout_files = [self.layouts_path / specific_file] + if not layout_files[0].exists(): + raise FileNotFoundError(f"Layout file {specific_file} not found in {self.layouts_path}") + print(f"Processing specific file: {specific_file}") + else: + layout_files = list(self.layouts_path.glob("*.jpg")) + + if limit: + layout_files = layout_files[:limit] + print(f"Processing first {limit} layouts only") + + total_layouts = len(layout_files) + print(f"Processing {total_layouts} layout images in {mode_desc}") + print("=" * 60) + + results = {} + start_time = time.time() + + for i, layout_path in enumerate(layout_files, 1): + layout_id = layout_path.stem + + # Detect images in layout using the appropriate method + if self.split_mode: + # Split mode: split layout into panels and match each panel + master_ids = list(self.master_images.keys()) + result = self.splitter.split_layout_and_match(str(layout_path), master_ids, self) + + # Apply CEN refinement if enabled and there are matches + if self.refinement_mode and result.get('detected_masters'): + result = self.apply_cen_refinement_to_results(str(layout_path), result) + elif self.one_at_a_time_mode: + result = self.detect_images_in_layout_one_at_a_time(str(layout_path), i, total_layouts) + else: + result = self.detect_images_in_layout(str(layout_path), i, total_layouts) + + # Apply CEN refinement if enabled and there are CEN matches + if not self.split_mode and self.refinement_mode and result.get('detected_masters'): + result = self.apply_cen_refinement_to_results(str(layout_path), result) + + layout_result = { + 'layout_filename': layout_path.name, + 'detected_master_ids': result['detected_masters'], + 'detected_master_filenames': [f"{mid}.jpg" for mid in result['detected_masters'] ], + 'analysis': result.get('analysis', 'Split mode analysis'), + 'detection_mode': mode_desc.lower().replace(' ', '_').replace('with_', '') + } + + # Add split mode specific fields + if self.split_mode: + layout_result['split_mode'] = True + layout_result['splits_generated'] = result.get('splits_generated', 0) + layout_result['panel_count'] = result.get('panel_count', 1) + layout_result['panel_confidence'] = result.get('panel_confidence', 'unknown') + if 'split_results' in result: + layout_result['split_results'] = result['split_results'] + + + # Add deduplication fields if applied + if 'deduplication_applied' in result: + layout_result['deduplication_applied'] = result['deduplication_applied'] + layout_result['duplicates_removed'] = result['duplicates_removed'] + layout_result['original_detected_masters'] = result['original_detected_masters'] + + + if 'error' in result: + layout_result['error'] = result['error'] + + # Add refinement mode specific fields + if self.refinement_mode and result.get('refinement_applied'): + layout_result['refinement_applied'] = result['refinement_applied'] + layout_result['refinement_details'] = result['refinement_details'] + layout_result['censorship_analysis'] = result['censorship_analysis'] + layout_result['original_detection_count'] = result['original_detection_count'] + layout_result['refined_detection_count'] = result['refined_detection_count'] + layout_result['changes_made'] = result.get('changes_made', 0) + + results[layout_id] = layout_result + + # Progress update with time estimate + elapsed = time.time() - start_time + avg_time = elapsed / i + remaining = (total_layouts - i) * avg_time + + print(f"Progress: {i}/{total_layouts} ({i/total_layouts*100:.1f}%) - Est. remaining: {remaining/60:.1f} min") + + # Save progress periodically + if i % 20 == 0: + self.save_results(results, f"progress_{i}") + + total_time = time.time() - start_time + print(f"\n✓ Completed processing all {total_layouts} layouts in {total_time/60:.1f} minutes") + print(f"Average time per layout: {total_time/total_layouts:.1f} seconds") + return results + + def save_results(self, results: Dict, filename: str = "detection_results") -> str: + """Save results to JSON file""" + output_path = self.results_path / f"{filename}.json" + + # Add metadata + output_data = { + 'metadata': { + 'total_layouts_processed': len(results), + 'total_master_images': len(self.master_images), + 'master_images_available': list(self.master_files.keys()) + }, + 'results': results + } + + with open(output_path, 'w') as f: + json.dump(output_data, f, indent=2) + + print(f"Results saved to: {output_path}") + return str(output_path) + + def generate_summary(self, results: Dict) -> Dict: + """Generate summary statistics""" + total_layouts = len(results) + layouts_with_matches = sum(1 for r in results.values() if r['detected_master_ids']) + + # Count master image occurrences + master_counts = {} + for result in results.values(): + for master_id in result['detected_master_ids']: + master_counts[master_id] = master_counts.get(master_id, 0) + 1 + + # Deduplication statistics + layouts_with_deduplication = sum(1 for r in results.values() if r.get('deduplication_applied', False)) + total_duplicates_removed = sum(r.get('duplicates_removed', 0) for r in results.values()) + + summary = { + 'total_layouts_processed': total_layouts, + 'layouts_with_matches': layouts_with_matches, + 'layouts_without_matches': total_layouts - layouts_with_matches, + 'master_image_usage': master_counts, + 'most_used_masters': sorted(master_counts.items(), key=lambda x: x[1], reverse=True)[:10], + # Deduplication stats + 'layouts_with_deduplication': layouts_with_deduplication, + 'total_duplicates_removed': total_duplicates_removed, + 'deduplication_rate': round(layouts_with_deduplication / total_layouts * 100, 1) if total_layouts > 0 else 0 + } + + return summary + + def deduplicate_master_matches(self, detected_masters: List[str]) -> List[str]: + """Remove duplicate master matches from a list while preserving order""" + if not detected_masters: + return detected_masters + + # Simple deduplication - remove exact duplicates while preserving order + seen = set() + deduplicated = [] + + for master_id in detected_masters: + if master_id not in seen: + seen.add(master_id) + deduplicated.append(master_id) + + return deduplicated + + def cleanup_temp_files(self): + """Clean up temporary processed image files - handles thread-safe filenames""" + try: + if self.temp_path.exists(): + # Clean up both old and new thread-safe naming patterns + for temp_file in self.temp_path.glob("*_processed*.jpg"): + temp_file.unlink() + # Remove temp directory if empty + if not any(self.temp_path.iterdir()): + self.temp_path.rmdir() + except Exception as e: + print(f"Warning: Failed to cleanup temp files: {e}") \ No newline at end of file diff --git a/hybrid_detector.py b/hybrid_detector.py new file mode 100644 index 0000000..0bbba01 --- /dev/null +++ b/hybrid_detector.py @@ -0,0 +1,2939 @@ +#!/usr/bin/env python3 +""" +Hybrid Image Detection Module +Combines OpenAI O3 panel counting with local inlier analysis for cost-efficient detection +""" + +import os +import json +import time +import base64 +import queue +from pathlib import Path +from typing import List, Dict, Optional +import numpy as np +import cv2 +import concurrent.futures +import threading +import multiprocessing +import psutil +import pickle +from openai_detector import OpenAIImageDetector +from panel_splitter import PanelSplitter +from advanced_splitter import AdvancedPanelSplitter +from simple_splitter import SimplePanelSplitter +from memory_manager import MemoryManager, memory_safe_execution, reduce_feature_count +from cost_calculator import cost_calculator, extract_token_usage_from_response + +# Vector mode imports +try: + from google.cloud import aiplatform + from vertexai.vision_models import MultiModalEmbeddingModel, Image as VertexImage + VERTEX_AI_AVAILABLE = True +except ImportError: + VERTEX_AI_AVAILABLE = False + + +def process_single_master_inlier_analysis(layout_path, master_id, master_path, min_good_matches=10, max_features=15000): + """ + Standalone function for processing a single master inlier analysis in a separate process. + Memory-safe version with feature limiting. + """ + try: + import cv2 + import numpy as np + import psutil + import gc + import os + from pathlib import Path + # Note: cost_calculator import removed from multiprocessing function + + + # Initialize OpenCV components in this process + akaze = cv2.AKAZE_create() + bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False) + + # Load images in grayscale for feature detection + layout_img = cv2.imread(layout_path, cv2.IMREAD_GRAYSCALE) + master_img = cv2.imread(master_path, cv2.IMREAD_GRAYSCALE) + + if layout_img is None or master_img is None: + return { + 'master_id': master_id, + 'inliers': 0, + 'confidence': 'low', + 'error': 'Could not read one or both images' + } + + # Check memory before feature detection + memory = psutil.virtual_memory() + if memory.percent > 85: + return { + 'master_id': master_id, + 'inliers': 0, + 'confidence': 'low', + 'error': f'Memory usage too high: {memory.percent:.1f}%' + } + + # Detect keypoints and descriptors + kp1, des1 = akaze.detectAndCompute(layout_img, None) + kp2, des2 = akaze.detectAndCompute(master_img, None) + + if des1 is None or des2 is None: + return { + 'master_id': master_id, + 'inliers': 0, + 'confidence': 'low', + 'error': 'No features detected in one or both images' + } + + # Limit features to prevent memory explosion + if len(kp1) > max_features: + # Keep best features based on response + responses = [kp.response for kp in kp1] + indices = np.argsort(responses)[-max_features:] + kp1 = [kp1[i] for i in indices] + des1 = des1[indices] + + if len(kp2) > max_features: + responses = [kp.response for kp in kp2] + indices = np.argsort(responses)[-max_features:] + kp2 = [kp2[i] for i in indices] + des2 = des2[indices] + + # Match features using k-nearest neighbors + matches = bf.knnMatch(des1, des2, k=2) + + # Apply Lowe's ratio test to filter good matches + good_matches = [] + for match_pair in matches: + if len(match_pair) == 2: + m, n = match_pair + if m.distance < 0.80 * n.distance: + good_matches.append(m) + + if len(good_matches) < min_good_matches: + return { + 'master_id': master_id, + 'inliers': 0, + 'confidence': 'low', + 'good_matches': len(good_matches), + 'reason': f'Insufficient good matches: {len(good_matches)} < {min_good_matches}' + } + + # Extract matched points + src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2) + dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2) + + # Find homography using RANSAC + M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 7.0) + + if mask is None: + return { + 'master_id': master_id, + 'inliers': 0, + 'confidence': 'low', + 'good_matches': len(good_matches), + 'error': 'Homography estimation failed' + } + + # Count inliers + inliers = int(np.sum(mask)) + + # Determine confidence based on inlier count and ratio + inlier_ratio = inliers / len(good_matches) + if inliers >= 30 and inlier_ratio >= 0.5: + confidence = 'high' + elif inliers >= 15 and inlier_ratio >= 0.3: + confidence = 'medium' + else: + confidence = 'low' + + return { + 'master_id': master_id, + 'inliers': inliers, + 'confidence': confidence, + 'good_matches': len(good_matches), + 'inlier_ratio': round(inlier_ratio, 3), + 'total_features_layout': len(kp1), + 'total_features_master': len(kp2) + } + + except Exception as e: + return { + 'master_id': master_id, + 'inliers': 0, + 'confidence': 'low', + 'error': str(e) + } + + +class InlierAnalysisCoordinator: + """ + Coordinates serial execution of inlier analysis tasks while allowing parallel layout processing. + Ensures that only one inlier analysis runs at a time to avoid overwhelming the system. + """ + + def __init__(self, local_workers, memory_manager, min_good_matches=10): + self.local_workers = local_workers + self.memory_manager = memory_manager + self.min_good_matches = min_good_matches + + # Task coordination + self.task_queue = queue.Queue() + self.worker_thread = None + self.shutdown_event = threading.Event() + self.active_analysis_lock = threading.Lock() + + # Statistics + self.total_tasks_processed = 0 + self.current_task_info = None + self.stats_lock = threading.Lock() + + def start(self): + """Start the inlier analysis worker thread""" + if self.worker_thread is None or not self.worker_thread.is_alive(): + self.shutdown_event.clear() + self.worker_thread = threading.Thread(target=self._worker_loop, daemon=True) + self.worker_thread.start() + print(f"InlierAnalysisCoordinator started with {self.local_workers} workers") + + def stop(self): + """Stop the inlier analysis worker thread""" + if self.worker_thread and self.worker_thread.is_alive(): + self.shutdown_event.set() + self.worker_thread.join(timeout=5.0) + print("InlierAnalysisCoordinator stopped") + + def submit_analysis(self, layout_id, analysis_type, analysis_params, result_future): + """ + Submit an inlier analysis task to the serial queue + + Args: + layout_id: Unique identifier for the layout + analysis_type: Type of analysis ('direct' or 'split') + analysis_params: Parameters for the analysis + result_future: Future object to signal completion + """ + task = { + 'layout_id': layout_id, + 'analysis_type': analysis_type, + 'analysis_params': analysis_params, + 'result_future': result_future, + 'submitted_at': time.time() + } + + self.task_queue.put(task) + print(f" → Submitted {analysis_type} analysis for {layout_id} to inlier queue (queue size: {self.task_queue.qsize()})") + + def get_queue_size(self): + """Get current queue size""" + return self.task_queue.qsize() + + def get_current_task_info(self): + """Get information about currently processing task""" + with self.stats_lock: + return self.current_task_info.copy() if self.current_task_info else None + + def _worker_loop(self): + """Main worker loop - processes inlier analysis tasks serially""" + print("InlierAnalysisCoordinator worker loop started") + + while not self.shutdown_event.is_set(): + try: + # Get next task with timeout + task = self.task_queue.get(timeout=1.0) + + # Process the task while holding the analysis lock + with self.active_analysis_lock: + self._process_inlier_analysis_task(task) + + # Mark task as done + self.task_queue.task_done() + + except queue.Empty: + continue + except Exception as e: + print(f"Error in inlier analysis worker loop: {e}") + # Continue processing other tasks + continue + + def _process_inlier_analysis_task(self, task): + """Process a single inlier analysis task""" + layout_id = task['layout_id'] + analysis_type = task['analysis_type'] + analysis_params = task['analysis_params'] + result_future = task['result_future'] + + # Update current task info + with self.stats_lock: + self.current_task_info = { + 'layout_id': layout_id, + 'analysis_type': analysis_type, + 'started_at': time.time(), + 'queue_wait_time': time.time() - task['submitted_at'] + } + + print(f" → Processing {analysis_type} inlier analysis for {layout_id} (queue wait: {self.current_task_info['queue_wait_time']:.1f}s)") + + try: + # Perform the actual analysis based on type + if analysis_type == 'direct': + result = self._perform_direct_inlier_analysis(layout_id, analysis_params) + elif analysis_type == 'split': + result = self._perform_split_inlier_analysis(layout_id, analysis_params) + else: + raise ValueError(f"Unknown analysis type: {analysis_type}") + + # Signal completion with result + result_future.set_result(result) + + # Update statistics + with self.stats_lock: + self.total_tasks_processed += 1 + processing_time = time.time() - self.current_task_info['started_at'] + print(f" → Completed {analysis_type} analysis for {layout_id} in {processing_time:.1f}s") + self.current_task_info = None + + except Exception as e: + print(f" → Error processing {analysis_type} analysis for {layout_id}: {e}") + # Signal error to the waiting layout worker + result_future.set_exception(e) + + # Clear current task info + with self.stats_lock: + self.current_task_info = None + + def _perform_direct_inlier_analysis(self, layout_id, params): + """Perform direct inlier analysis on a layout""" + layout_path = params['layout_path'] + master_images = params['master_images'] + vector_mode = params.get('vector_mode', False) + + if vector_mode: + return self._perform_vector_similarity_analysis(layout_path, params) + else: + return self._perform_opencv_inlier_analysis(layout_path, master_images, params) + + def _perform_split_inlier_analysis(self, layout_id, params): + """Perform inlier analysis on split panels""" + split_panels = params['split_panels'] + master_images = params['master_images'] + vector_mode = params.get('vector_mode', False) + + if vector_mode: + return self._perform_split_vector_analysis(split_panels, params) + else: + return self._perform_split_opencv_analysis(split_panels, master_images, params) + + def _perform_opencv_inlier_analysis(self, layout_path, master_images, params): + """Perform OpenCV-based inlier analysis using existing multiprocessing logic""" + try: + # Initialize OpenCV components + akaze = cv2.AKAZE_create() + + # Load layout image for feature detection + layout_img = cv2.imread(layout_path, cv2.IMREAD_GRAYSCALE) + if layout_img is None: + raise Exception(f"Could not load layout image: {layout_path}") + + # Detect features in layout image + layout_kp, layout_desc = akaze.detectAndCompute(layout_img, None) + if layout_desc is None: + raise Exception("No features detected in layout image") + + feature_count = len(layout_kp) + + # Dynamically adjust worker count based on feature count and memory + safe_workers = self._calculate_safe_worker_count(feature_count) + + # Prepare tasks for multiprocessing + tasks = [] + max_features = min(10000, feature_count // 2) if feature_count > 20000 else 10000 + + for master_id, master_path in master_images.items(): + tasks.append((layout_path, master_id, master_path, self.min_good_matches, max_features)) + + # Process masters in parallel using existing function + master_results = [] + with concurrent.futures.ProcessPoolExecutor(max_workers=safe_workers) as executor: + future_to_master = { + executor.submit(process_single_master_inlier_analysis, *task_args): task_args[1] + for task_args in tasks + } + + for future in concurrent.futures.as_completed(future_to_master): + master_id = future_to_master[future] + try: + result = future.result() + if result.get('inliers', 0) > 0 and result.get('confidence', 'low') != 'low': + master_results.append({ + 'master_id': master_id, + 'inliers': result.get('inliers', 0), + 'confidence': result.get('confidence', 'low'), + 'details': result + }) + except Exception as e: + print(f" → Error processing master {master_id}: {e}") + + # Sort and process results + master_results.sort(key=lambda x: x['inliers'], reverse=True) + + # Apply thresholds + detected_masters = [] + inlier_scores = {} + + if master_results: + best_match = master_results[0] + best_inliers = best_match['inliers'] + inlier_threshold = params.get('inlier_threshold', 0.65) + inlier_ratio_threshold = params.get('inlier_ratio_threshold', 0.4) + + min_inliers = max(self.min_good_matches, + best_inliers * inlier_ratio_threshold, + int(best_inliers * inlier_threshold)) + + for result in master_results: + inliers = result['inliers'] + confidence = result['confidence'] + master_id = result['master_id'] + + inlier_scores[master_id] = inliers + + if (inliers >= min_inliers and + confidence in ['high', 'medium'] and + inliers >= self.min_good_matches): + detected_masters.append(master_id) + + return { + 'detected_masters': detected_masters, + 'inlier_scores': inlier_scores, + 'total_masters_checked': len(master_images), + 'potential_matches_found': len(master_results), + 'analysis_mode': 'opencv_inlier_analysis', + 'layout_features': feature_count, + 'workers_used': safe_workers + } + + except Exception as e: + return { + 'detected_masters': [], + 'inlier_scores': {}, + 'error': str(e), + 'analysis_mode': 'opencv_inlier_analysis_error' + } + + def _perform_vector_similarity_analysis(self, layout_path, params): + """Perform vector similarity analysis""" + try: + embedding_model = params['embedding_model'] + master_embeddings = params['master_embeddings'] + similarity_threshold = params.get('similarity_threshold', 0.75) + + # Generate embedding for layout + layout_embedding = self._generate_layout_embedding(layout_path, embedding_model) + if layout_embedding is None: + raise Exception("Failed to generate layout embedding") + + # Compare with master embeddings + similarities = {} + detected_masters = [] + + for master_id, master_embedding in master_embeddings.items(): + similarity = self._compute_cosine_similarity(layout_embedding, master_embedding) + similarities[master_id] = similarity + + if similarity >= similarity_threshold: + detected_masters.append(master_id) + + # Sort by similarity + detected_masters.sort(key=lambda x: similarities[x], reverse=True) + + return { + 'detected_masters': detected_masters, + 'inlier_scores': similarities, # Use similarities as scores for compatibility + 'total_masters_checked': len(master_embeddings), + 'analysis_mode': 'vector_similarity_analysis', + 'similarity_threshold': similarity_threshold + } + + except Exception as e: + return { + 'detected_masters': [], + 'inlier_scores': {}, + 'error': str(e), + 'analysis_mode': 'vector_similarity_analysis_error' + } + + def _perform_split_opencv_analysis(self, split_panels, master_images, params): + """Perform OpenCV inlier analysis on split panels""" + try: + # Prepare tasks for all split panels against all masters + tasks = [] + split_panel_paths = [] + + for i, split_info in enumerate(split_panels): + # Save split panel to temporary file + split_path = f"/tmp/split_panel_{i}_{int(time.time())}.jpg" + cv2.imwrite(split_path, split_info['image']) + split_panel_paths.append(split_path) + + # Create tasks for this split panel against all masters + for master_id, master_path in master_images.items(): + tasks.append((split_path, master_id, master_path, self.min_good_matches, 15000)) + + # Process all tasks in parallel + all_results = [] + with concurrent.futures.ProcessPoolExecutor(max_workers=self.local_workers) as executor: + future_to_task = { + executor.submit(process_single_master_inlier_analysis, *task_args): task_args + for task_args in tasks + } + + for future in concurrent.futures.as_completed(future_to_task): + try: + result = future.result() + if result.get('confidence') != 'low' and result.get('inliers', 0) > 0: + all_results.append(result) + except Exception as e: + print(f" → Error processing split task: {e}") + + # Clean up temporary files + for split_path in split_panel_paths: + try: + os.remove(split_path) + except: + pass + + # Process results + detected_masters = [] + inlier_scores = {} + + if all_results: + all_results.sort(key=lambda x: x['inliers'], reverse=True) + + # Apply thresholds + best_inliers = all_results[0]['inliers'] + inlier_threshold = params.get('inlier_threshold', 0.65) + inlier_ratio_threshold = params.get('inlier_ratio_threshold', 0.4) + + min_inliers = max(self.min_good_matches, + best_inliers * inlier_ratio_threshold, + int(best_inliers * inlier_threshold)) + + for result in all_results: + master_id = result['master_id'] + inliers = result['inliers'] + + # Keep highest score for each master + if master_id not in inlier_scores or inliers > inlier_scores[master_id]: + inlier_scores[master_id] = inliers + + if (inliers >= min_inliers and + result['confidence'] in ['high', 'medium'] and + inliers >= self.min_good_matches and + master_id not in detected_masters): + detected_masters.append(master_id) + + return { + 'detected_masters': detected_masters, + 'inlier_scores': inlier_scores, + 'total_combinations_processed': len(tasks), + 'potential_matches_found': len(all_results), + 'analysis_mode': 'split_opencv_analysis', + 'splits_processed': len(split_panels) + } + + except Exception as e: + return { + 'detected_masters': [], + 'inlier_scores': {}, + 'error': str(e), + 'analysis_mode': 'split_opencv_analysis_error' + } + + def _perform_split_vector_analysis(self, split_panels, params): + """Perform vector similarity analysis on split panels""" + try: + embedding_model = params['embedding_model'] + master_embeddings = params['master_embeddings'] + similarity_threshold = params.get('similarity_threshold', 0.75) + + all_results = [] + split_panel_paths = [] + + for i, split_info in enumerate(split_panels): + # Save split panel to temporary file + split_path = f"/tmp/split_panel_{i}_{int(time.time())}.jpg" + cv2.imwrite(split_path, split_info['image']) + split_panel_paths.append(split_path) + + # Generate embedding for this split panel + split_embedding = self._generate_layout_embedding(split_path, embedding_model) + if split_embedding is None: + continue + + # Compare against all master embeddings + for master_id, master_embedding in master_embeddings.items(): + similarity = self._compute_cosine_similarity(split_embedding, master_embedding) + + if similarity >= similarity_threshold: + all_results.append({ + 'master_id': master_id, + 'similarity': similarity, + 'split_panel': i + }) + + # Clean up temporary files + for split_path in split_panel_paths: + try: + os.remove(split_path) + except: + pass + + # Process results + detected_masters = [] + similarity_scores = {} + + for result in all_results: + master_id = result['master_id'] + similarity = result['similarity'] + + # Keep highest similarity for each master + if master_id not in similarity_scores or similarity > similarity_scores[master_id]: + similarity_scores[master_id] = similarity + + if master_id not in detected_masters: + detected_masters.append(master_id) + + return { + 'detected_masters': detected_masters, + 'inlier_scores': similarity_scores, # Use similarities as scores for compatibility + 'total_combinations_processed': len(split_panels) * len(master_embeddings), + 'potential_matches_found': len(all_results), + 'analysis_mode': 'split_vector_analysis', + 'splits_processed': len(split_panels), + 'similarity_threshold': similarity_threshold + } + + except Exception as e: + return { + 'detected_masters': [], + 'inlier_scores': {}, + 'error': str(e), + 'analysis_mode': 'split_vector_analysis_error' + } + + def _calculate_safe_worker_count(self, feature_count): + """Calculate safe worker count based on feature count and memory""" + if feature_count > 50000: + safe_workers = max(1, self.local_workers // 2) + elif feature_count > 30000: + safe_workers = max(1, int(self.local_workers * 0.75)) + else: + safe_workers = self.local_workers + + # Further limit based on memory + if self.memory_manager: + safe_workers = min(safe_workers, self.memory_manager.limit_concurrent_processes(safe_workers)) + + return safe_workers + + def _generate_layout_embedding(self, layout_path, embedding_model): + """Generate embedding for layout image""" + try: + from vertexai.vision_models import Image as VertexImage + vertex_image = VertexImage.load_from_file(layout_path) + response = embedding_model.get_embeddings(image=vertex_image) + return np.array(response.image_embedding) + except Exception as e: + print(f" → Error generating embedding: {e}") + return None + + def _compute_cosine_similarity(self, embedding1, embedding2): + """Compute cosine similarity between two embeddings""" + norm1 = np.linalg.norm(embedding1) + norm2 = np.linalg.norm(embedding2) + + if norm1 == 0 or norm2 == 0: + return 0.0 + + return float(np.dot(embedding1, embedding2) / (norm1 * norm2)) + + +class ProgressTracker: + """ + Thread-safe progress tracking for parallel layout processing + """ + + def __init__(self, total_layouts): + self.total_layouts = total_layouts + self.completed_layouts = 0 + self.in_progress_layouts = 0 + self.failed_layouts = 0 + self.inlier_queue_size = 0 + self.lock = threading.Lock() + self.start_time = time.time() + + def start_layout(self): + """Mark a layout as started""" + with self.lock: + self.in_progress_layouts += 1 + + def complete_layout(self, success=True): + """Mark a layout as completed""" + with self.lock: + self.in_progress_layouts -= 1 + if success: + self.completed_layouts += 1 + else: + self.failed_layouts += 1 + + def update_queue_size(self, size): + """Update inlier queue size""" + with self.lock: + self.inlier_queue_size = size + + def get_progress_info(self): + """Get current progress information""" + with self.lock: + elapsed = time.time() - self.start_time + completed = self.completed_layouts + + if completed > 0: + avg_time = elapsed / completed + remaining = (self.total_layouts - completed) * avg_time + eta_mins = remaining / 60 + else: + eta_mins = 0 + + return { + 'total': self.total_layouts, + 'completed': completed, + 'in_progress': self.in_progress_layouts, + 'failed': self.failed_layouts, + 'queue_size': self.inlier_queue_size, + 'elapsed_mins': elapsed / 60, + 'eta_mins': eta_mins, + 'percentage': (completed / self.total_layouts * 100) if self.total_layouts > 0 else 0 + } + + def print_progress(self): + """Print current progress""" + info = self.get_progress_info() + print(f"Progress: {info['completed']}/{info['total']} ({info['percentage']:.1f}%) " + f"| In Progress: {info['in_progress']} | Failed: {info['failed']} " + f"| Queue: {info['queue_size']} | ETA: {info['eta_mins']:.1f}min") + + +class HybridImageDetector(OpenAIImageDetector): + def __init__(self, panel_threshold=2, inlier_threshold=0.65, + enable_greyscale=False, enable_contrast_enhancement=False, + min_good_matches=10, inlier_ratio_threshold=0.4, + openai_workers=None, local_workers=None, split_mode=False, + split_advanced=False, split_simple=False, percentile=10, min_gap=5, + vector_mode=False, similarity_threshold=0.75, fallback_one_at_a_time=False, + parallel_layouts=False, layout_workers=None, max_concurrent_layouts=None, + no_truncation=False, **kwargs): + """ + Initialize the hybrid image detector + + Args: + panel_threshold: Maximum panels to use local analysis (default: 2) + inlier_threshold: Minimum similarity threshold for local analysis (default: 0.65) + enable_greyscale: Enable greyscale processing (default: False for hybrid mode) + enable_contrast_enhancement: Enable contrast enhancement (default: False for hybrid mode) + min_good_matches: Minimum good matches for RANSAC (default: 10) + inlier_ratio_threshold: Minimum inlier ratio for confident matches (default: 0.4) + openai_workers: Number of workers for OpenAI analysis (default: auto-detect) + local_workers: Number of workers for local analysis (default: auto-detect) + split_mode: Enable traditional panel splitting + split_advanced: Enable advanced panel splitting with edge detection + split_simple: Enable simple panel splitting with even division (hybrid mode only) + percentile: Percentile threshold for gutter detection in advanced splitting (default: 10) + min_gap: Minimum gap size for gutter detection in advanced splitting (default: 5) + vector_mode: Enable vector similarity search instead of inlier analysis (default: False) + similarity_threshold: Similarity threshold for vector mode (default: 0.75) + fallback_one_at_a_time: Enable fallback to OpenAI one-at-a-time when matched masters < detected panels (default: False) + parallel_layouts: Enable parallel layout processing with serial inlier analysis coordination (default: False) + layout_workers: Number of concurrent layout workers for parallel processing (default: auto-detect) + max_concurrent_layouts: Maximum layouts processing simultaneously (default: same as layout_workers) + no_truncation: Disable truncation of match results (keeps all matches instead of limiting to panel count) (default: False) + """ + # Initialize parent class with OpenAI workers + super().__init__( + enable_greyscale=enable_greyscale, + enable_contrast_enhancement=enable_contrast_enhancement, + max_concurrent_workers=openai_workers or 5, # Temporary, will be updated after loading masters + split_mode=split_mode, + **kwargs + ) + + self.panel_threshold = panel_threshold + self.inlier_threshold = inlier_threshold + self.min_good_matches = min_good_matches + self.inlier_ratio_threshold = inlier_ratio_threshold + self.split_advanced = split_advanced + self.split_simple = split_simple + self.percentile = percentile + + # Vector mode configuration + self.vector_mode = vector_mode + self.similarity_threshold = similarity_threshold + + # Fallback configuration + self.fallback_one_at_a_time = fallback_one_at_a_time + + # Parallel processing configuration + self.parallel_layouts = parallel_layouts + self.layout_workers = layout_workers + self.max_concurrent_layouts = max_concurrent_layouts + + # Truncation configuration + self.no_truncation = no_truncation + + # Initialize memory manager + self.memory_manager = MemoryManager(max_memory_percent=75, max_swap_percent=80) + self.min_gap = min_gap + + # Store worker configurations for later auto-detection + self._openai_workers_config = openai_workers + self._local_workers_config = local_workers + self._layout_workers_config = layout_workers + + # Initialize worker attributes with temporary values for CLI display + self.openai_workers = openai_workers or "auto" + self.local_workers = local_workers or "auto" + + # Initialize parallel processing components + self.inlier_coordinator = None + self.progress_tracker = None + + # Initialize OpenCV components for local analysis (only if not using vector mode) + if not self.vector_mode: + self.akaze = cv2.AKAZE_create() + self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False) + + # Initialize vector mode components if enabled + if self.vector_mode: + if not VERTEX_AI_AVAILABLE: + raise ImportError("Google Vertex AI libraries not available. Please install: pip install google-cloud-aiplatform") + + # Initialize Vertex AI + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "service-account.json" + aiplatform.init(project="optical-414516", location="us-central1") + + # Initialize multimodal embedding model + self.embedding_model = MultiModalEmbeddingModel.from_pretrained("multimodalembedding@001") + + # Initialize vector-specific attributes + self.master_embeddings = {} + self.embeddings_cache_path = Path("embeddings_cache") + self.embeddings_cache_path.mkdir(exist_ok=True) + + print(f"Vector mode initialized: Using Google Vertex AI multimodalembedding@001") + + analysis_method = "vector similarity" if self.vector_mode else "local inlier analysis" + print(f"Hybrid detector initialized:") + print(f" Panel threshold: {panel_threshold} (≤{panel_threshold} panels → {analysis_method})") + print(f" Inlier threshold: {inlier_threshold}") + print(f" Vector mode: {self.vector_mode}") + if self.vector_mode: + print(f" Similarity threshold: {similarity_threshold}") + print(f" Greyscale processing: {enable_greyscale}") + print(f" Contrast enhancement: {enable_contrast_enhancement}") + print(f" Worker configuration: OpenAI={openai_workers or 'auto'}, Local={local_workers or 'auto'}") + + def _configure_worker_counts(self): + """Configure optimal worker counts based on system and dataset""" + import os + + # Auto-detect OpenAI workers (number of master images for optimal parallelization) + if self._openai_workers_config is None: + self.openai_workers = len(self.master_images) + print(f" OpenAI workers auto-detected: {self.openai_workers} (total masters)") + else: + self.openai_workers = self._openai_workers_config + print(f" OpenAI workers configured: {self.openai_workers}") + + # Auto-detect local workers (number of CPU cores minus 2 for optimal performance) + if self._local_workers_config is None: + self.local_workers = max(1, os.cpu_count() - 2) + print(f" Local workers auto-detected: {self.local_workers} (CPU cores - 2)") + else: + self.local_workers = self._local_workers_config + print(f" Local workers configured: {self.local_workers}") + + # Auto-detect layout workers for parallel processing + if self.parallel_layouts: + if self._layout_workers_config is None: + # Conservative default: min(4, CPU cores / 2) to avoid memory issues + self.layout_workers = min(4, max(1, os.cpu_count() // 2)) + print(f" Layout workers auto-detected: {self.layout_workers} (conservative: min(4, CPU cores / 2))") + else: + self.layout_workers = self._layout_workers_config + print(f" Layout workers configured: {self.layout_workers}") + + # Set max concurrent layouts if not specified + if self.max_concurrent_layouts is None: + self.max_concurrent_layouts = self.layout_workers + print(f" Max concurrent layouts: {self.max_concurrent_layouts} (same as layout workers)") + else: + self.layout_workers = 1 # Sequential processing + self.max_concurrent_layouts = 1 + + # Update the parent class max_concurrent_workers for OpenAI operations + self.max_concurrent_workers = self.openai_workers + + def generate_image_embedding(self, image_path: str) -> Optional[np.ndarray]: + """Generate 1408-dimensional embedding for an image using Vertex AI""" + if not self.vector_mode: + raise ValueError("Vector mode must be enabled to generate embeddings") + + try: + # Create Vertex AI Image object directly from file path + vertex_image = VertexImage.load_from_file(image_path) + + # Get embedding from Vertex AI + response = self.embedding_model.get_embeddings(image=vertex_image) + + # Extract the embedding vector (1408 dimensions) + embedding = np.array(response.image_embedding) + + return embedding + + except Exception as e: + print(f"Error generating embedding for {Path(image_path).name}: {e}") + return None + + def compute_cosine_similarity(self, embedding1: np.ndarray, embedding2: np.ndarray) -> float: + """Compute cosine similarity between two embeddings""" + # Normalize the embeddings + norm1 = np.linalg.norm(embedding1) + norm2 = np.linalg.norm(embedding2) + + if norm1 == 0 or norm2 == 0: + return 0.0 + + # Compute cosine similarity + similarity = np.dot(embedding1, embedding2) / (norm1 * norm2) + return float(similarity) + + def save_embedding_cache(self, embeddings: Dict, filename: str): + """Save embeddings to cache file""" + cache_file = self.embeddings_cache_path / f"{filename}.pkl" + with open(cache_file, 'wb') as f: + pickle.dump(embeddings, f) + print(f"Embeddings cached to: {cache_file}") + + def load_embedding_cache(self, filename: str) -> Optional[Dict]: + """Load embeddings from cache file""" + cache_file = self.embeddings_cache_path / f"{filename}.pkl" + if cache_file.exists(): + try: + with open(cache_file, 'rb') as f: + embeddings = pickle.load(f) + print(f"Loaded cached embeddings from: {cache_file}") + return embeddings + except Exception as e: + print(f"Error loading cached embeddings: {e}") + return None + + def generate_master_embeddings(self, force_regenerate=False) -> Dict[str, np.ndarray]: + """Generate embeddings for all master images (with caching)""" + if not self.vector_mode: + raise ValueError("Vector mode must be enabled to generate master embeddings") + + cache_filename = "master_embeddings" + + # Try to load from cache first + if not force_regenerate: + cached_embeddings = self.load_embedding_cache(cache_filename) + if cached_embeddings is not None: + # Verify all master images are in cache + if set(cached_embeddings.keys()) == set(self.master_images.keys()): + self.master_embeddings = cached_embeddings + print(f"✓ Using cached embeddings for {len(cached_embeddings)} master images") + return self.master_embeddings + else: + print("Cache incomplete, regenerating embeddings...") + + print(f"Generating embeddings for {len(self.master_images)} master images...") + self.master_embeddings = {} + + for i, (master_id, image_path) in enumerate(self.master_images.items(), 1): + print(f" {i}/{len(self.master_images)}: Generating embedding for {master_id}") + + embedding = self.generate_image_embedding(image_path) + if embedding is not None: + self.master_embeddings[master_id] = embedding + + # Small delay to avoid rate limiting + if i < len(self.master_images): + time.sleep(0.1) + + # Cache the embeddings + if self.master_embeddings: + self.save_embedding_cache(self.master_embeddings, cache_filename) + + print(f"✓ Generated embeddings for {len(self.master_embeddings)} master images") + return self.master_embeddings + + def detect_images_in_layout_hybrid(self, layout_path: str, layout_index: int, total_layouts: int) -> Dict: + """ + Hybrid detection method that routes to appropriate detection based on panel count + """ + layout_name = Path(layout_path).name + print(f"Processing {layout_index}/{total_layouts}: {layout_name} (Hybrid mode)") + + try: + # Step 1: Count panels and detect censorship using OpenAI O3 (consolidated call) + print(f" Step 1: Analyzing panels and censorship using OpenAI O3...") + combined_result = self.count_panels_and_detect_censorship(layout_path) + panel_count = combined_result.get('panel_count', 1) + panel_confidence = combined_result.get('panel_confidence', 'unknown') + is_censored = combined_result.get('is_censored', True) + censorship_confidence = combined_result.get('censorship_confidence', 'unknown') + + print(f" Panel analysis: {panel_count} panels detected (confidence: {panel_confidence})") + print(f" Censorship analysis: {'CENSORED' if is_censored else 'UNCENSORED'} (confidence: {censorship_confidence})") + + # Step 2: Route to appropriate detection method + analysis_method = "vector similarity" if self.vector_mode else "local inlier analysis" + if panel_count <= self.panel_threshold: + print(f" Step 2: Using {analysis_method} (≤{self.panel_threshold} panels)") + detection_result = self.detect_with_local_inlier_analysis(layout_path, layout_name) + detection_result['detection_method'] = 'vector_similarity' if self.vector_mode else 'local_inlier_analysis' + detection_result['api_calls_used'] = 1 # Only consolidated panel counting + censorship + else: + print(f" Step 2: Using split method + {analysis_method} (≥{self.panel_threshold + 1} panels)") + detection_result = self.detect_with_split_and_inlier_analysis(layout_path, layout_name, panel_count) + detection_result['detection_method'] = 'split_and_vector_similarity' if self.vector_mode else 'split_and_inlier_analysis' + detection_result['api_calls_used'] = 1 # Only consolidated panel counting + censorship + + # Step 3: Apply CEN refinement if enabled and we have CEN matches + current_masters = detection_result.get('detected_masters', []) + if self.refinement_mode and current_masters: + cen_images = [mid for mid in current_masters if self.is_cen_image(mid)] + if cen_images: + print(f" Step 3: Applying CEN refinement using stored censorship analysis...") + cen_result = self.apply_cen_refinement_with_stored_analysis(detection_result, is_censored, censorship_confidence) + detection_result.update(cen_result) + print(f" CEN refinement completed") + + # Step 4: Deduplicate and truncate matches if more than detected panels (using inlier scores) + current_masters = detection_result.get('detected_masters', []) + inlier_scores = detection_result.get('inlier_scores', {}) + + # First, deduplicate the results to remove exact duplicates + original_count = len(current_masters) + current_masters = self.deduplicate_master_matches(current_masters) + if len(current_masters) != original_count: + duplicates_removed = original_count - len(current_masters) + print(f" Step 4a: Removed {duplicates_removed} duplicate master(s)") + # Update the detection result with deduplicated masters + detection_result['detected_masters'] = current_masters + detection_result['detected_master_ids'] = current_masters + detection_result['detected_master_filenames'] = [f"{mid}.jpg" for mid in current_masters] + + if len(current_masters) > panel_count and inlier_scores and not self.no_truncation: + print(f" Step 4b: Truncating {len(current_masters)} matches to top {panel_count} by inlier score...") + + # Sort masters by inlier score (descending) + sorted_masters = sorted(current_masters, + key=lambda mid: inlier_scores.get(mid, 0), + reverse=True) + + # Keep only top N matches + truncated_masters = sorted_masters[:panel_count] + removed_masters = sorted_masters[panel_count:] + + detection_result['detected_masters'] = truncated_masters + detection_result['detected_master_ids'] = truncated_masters + detection_result['detected_master_filenames'] = [f"{mid}.jpg" for mid in truncated_masters] + detection_result['truncation_applied'] = True + detection_result['original_match_count'] = len(current_masters) + detection_result['final_match_count'] = len(truncated_masters) + detection_result['removed_masters'] = removed_masters + detection_result['removed_count'] = len(removed_masters) + + print(f" Truncated to {len(truncated_masters)} matches (removed {len(removed_masters)} lower-scoring matches)") + elif len(current_masters) > panel_count and self.no_truncation: + print(f" Step 4b: Truncation disabled - keeping all {len(current_masters)} matches") + detection_result['truncation_applied'] = False + detection_result['original_match_count'] = len(current_masters) + detection_result['final_match_count'] = len(current_masters) + + # Step 5: Add panel analysis and censorship information + detection_result['panel_analysis'] = { + 'panel_count': panel_count, + 'confidence': panel_confidence, + 'analysis': combined_result.get('panel_analysis', ''), + 'panel_descriptions': combined_result.get('panel_descriptions', []) + } + detection_result['censorship_analysis'] = { + 'is_censored': is_censored, + 'confidence': censorship_confidence, + 'analysis': combined_result.get('censorship_analysis', ''), + 'coverage_details': combined_result.get('coverage_details', '') + } + detection_result['panel_count'] = panel_count + detection_result['panel_threshold'] = self.panel_threshold + detection_result['processing_mode'] = 'hybrid' + detection_result['layout_path'] = layout_path + + # Step 6: Calculate confidence score based on matches vs panels ratio + final_matches = len(detection_result.get('detected_masters', [])) + confidence_percentage = self.calculate_confidence_score(final_matches, panel_count) + detection_result['confidence_score'] = confidence_percentage + + detected_count = len(detection_result.get('detected_masters', [])) + base_method = "vector similarity" if self.vector_mode else "local analysis" + method_name = base_method if panel_count <= self.panel_threshold else f"split + {base_method}" + print(f" Step 6: Confidence score calculated: {confidence_percentage:.1f}% ({final_matches} matches / {panel_count} panels)") + + # Step 7: Apply fallback to OpenAI one-at-a-time if enabled and needed + if self.fallback_one_at_a_time and final_matches < panel_count: + print(f" Step 7: Fallback triggered - {final_matches} matched masters < {panel_count} detected panels") + print(f" Running OpenAI one-at-a-time method with {len(self.master_images)} workers...") + print(f" DEBUG: Available master images: {len(self.master_images)}") + + # Create a temporary OpenAI detector for one-at-a-time processing + try: + temp_detector = OpenAIImageDetector( + enable_greyscale=self.enable_greyscale, + enable_contrast_enhancement=self.enable_contrast_enhancement, + contrast_factor=self.contrast_factor, + refinement_mode=self.refinement_mode, + one_at_a_time_mode=True, + max_concurrent_workers=len(self.master_images) # Use number of masters as worker count + ) + + # Manually set the master images since the constructor might not load them + temp_detector.master_images = self.master_images.copy() + print(f" DEBUG: Copied {len(self.master_images)} master images to temp detector") + + except Exception as e: + print(f" ERROR: Failed to create temporary OpenAI detector: {e}") + print(f" Fallback cancelled due to detector creation failure") + print(f"✓ Completed {layout_name} - Found {detected_count} matches using {method_name}") + return detection_result + + print(f" DEBUG: Temporary detector created with {temp_detector.max_concurrent_workers} workers") + print(f" DEBUG: Temp detector has {len(temp_detector.master_images)} master images") + print(f" DEBUG: Temp detector API key present: {temp_detector.api_key is not None}") + print(f" DEBUG: Temp detector one-at-a-time mode: {temp_detector.one_at_a_time_mode}") + print(f" DEBUG: Calling detect_images_in_layout_one_at_a_time...") + + # Run OpenAI one-at-a-time detection with stored censorship data + try: + stored_censorship_data = { + 'is_censored': is_censored, + 'confidence': censorship_confidence + } + fallback_result = temp_detector.detect_images_in_layout_one_at_a_time(layout_path, layout_index, total_layouts, stored_censorship_data) + print(f" DEBUG: OpenAI one-at-a-time call completed successfully") + except Exception as e: + print(f" ERROR: OpenAI one-at-a-time call failed: {e}") + print(f" Fallback cancelled due to OpenAI call failure") + print(f"✓ Completed {layout_name} - Found {detected_count} matches using {method_name}") + return detection_result + + print(f" DEBUG: Fallback result type: {type(fallback_result)}") + print(f" DEBUG: Fallback result keys: {list(fallback_result.keys()) if fallback_result else 'None'}") + + if fallback_result: + detected_masters = fallback_result.get('detected_masters', []) + print(f" DEBUG: Raw detected masters: {detected_masters}") + print(f" DEBUG: Number of raw detected masters: {len(detected_masters)}") + + if detected_masters: + print(f" Fallback successful - Found {len(detected_masters)} matches") + + # Apply CEN refinement if enabled + if self.refinement_mode: + print(f" DEBUG: Applying CEN refinement...") + fallback_result = self.apply_cen_refinement_with_stored_analysis(fallback_result, is_censored, censorship_confidence) + print(f" DEBUG: After CEN refinement: {fallback_result.get('detected_masters', [])}") + + # Apply deduplication + print(f" DEBUG: Applying deduplication...") + original_count = len(fallback_result['detected_masters']) + fallback_result['detected_masters'] = self.deduplicate_master_matches(fallback_result['detected_masters']) + fallback_result['detected_master_ids'] = fallback_result['detected_masters'] + fallback_result['detected_master_filenames'] = [f"{mid}.jpg" for mid in fallback_result['detected_masters']] + deduplicated_count = len(fallback_result['detected_masters']) + print(f" DEBUG: Deduplication: {original_count} -> {deduplicated_count}") + + # Update metadata to reflect fallback usage + fallback_result['fallback_applied'] = True + fallback_result['original_detection_method'] = detection_result.get('detection_method', 'unknown') + fallback_result['original_match_count'] = final_matches + fallback_result['fallback_match_count'] = len(fallback_result['detected_masters']) + fallback_result['detection_method'] = 'openai_one_at_a_time_fallback' + fallback_result['processing_mode'] = 'hybrid_with_fallback' + + # Preserve original analysis data + fallback_result['panel_analysis'] = detection_result['panel_analysis'] + fallback_result['censorship_analysis'] = detection_result['censorship_analysis'] + fallback_result['panel_count'] = panel_count + fallback_result['panel_threshold'] = self.panel_threshold + fallback_result['layout_path'] = layout_path + + # Recalculate confidence score + fallback_matches = len(fallback_result['detected_masters']) + fallback_result['confidence_score'] = self.calculate_confidence_score(fallback_matches, panel_count) + + # Update API call count to include fallback calls + base_api_calls = 1 # Initial hybrid analysis call + fallback_api_calls = fallback_result.get('api_calls_made', 0) + fallback_result['api_calls_used'] = base_api_calls + fallback_api_calls + + print(f"✓ Fallback completed {layout_name} - Found {fallback_matches} matches using OpenAI one-at-a-time") + return fallback_result + else: + print(f" DEBUG: No detected masters in fallback result") + else: + print(f" DEBUG: Fallback result is None or empty") + + print(f" Fallback failed - No additional matches found, keeping original results") + + print(f"✓ Completed {layout_name} - Found {detected_count} matches using {method_name}") + return detection_result + + except Exception as e: + error_msg = f"Error in hybrid analysis for {layout_name}: {e}" + print(error_msg) + return { + 'detected_masters': [], + 'detected_master_ids': [], + 'detected_master_filenames': [], + 'analysis': f'Hybrid analysis failed: {error_msg}', + 'error': str(e), + 'processing_mode': 'hybrid_error', + 'detection_method': 'error', + 'api_calls_used': 0 + } + + def detect_with_local_inlier_analysis(self, layout_path: str, layout_name: str) -> Dict: + """ + Detect masters using local analysis (inlier analysis or vector similarity based on vector_mode) + """ + if self.vector_mode: + return self.detect_with_vector_similarity(layout_path, layout_name) + else: + return self.detect_with_inlier_analysis(layout_path, layout_name) + + def detect_with_vector_similarity(self, layout_path: str, layout_name: str) -> Dict: + """ + Detect masters using vector similarity search + """ + print(f" → Analyzing {layout_name} with vector similarity (cosine similarity on embeddings)...") + + try: + # Generate embedding for layout image + layout_embedding = self.generate_image_embedding(layout_path) + + if layout_embedding is None: + raise Exception("Failed to generate layout embedding") + + # Compare with all master embeddings + similarities = {} + detected_masters = [] + + print(f" → Comparing against {len(self.master_embeddings)} master embeddings...") + for master_id, master_embedding in self.master_embeddings.items(): + similarity = self.compute_cosine_similarity(layout_embedding, master_embedding) + similarities[master_id] = similarity + + if similarity >= self.similarity_threshold: + detected_masters.append(master_id) + + # Sort detected masters by similarity (highest first) + detected_masters.sort(key=lambda x: similarities[x], reverse=True) + + # Get top similarities for analysis + top_similarities = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:5] + + # Create analysis text + analysis_parts = [ + f"Vector similarity analysis using Google Vertex AI embeddings (1408 dimensions).", + f"Similarity threshold: {self.similarity_threshold}", + f"Found {len(detected_masters)} matches above threshold.", + f"Top 5 similarities: " + ", ".join([f"{mid}({sim:.3f})" for mid, sim in top_similarities]) + ] + + # Apply deduplication (for consistency with inlier analysis) + original_detected = detected_masters[:] + detected_masters = self.deduplicate_master_matches(detected_masters) + + if len(detected_masters) != len(original_detected): + duplicates_removed = len(original_detected) - len(detected_masters) + analysis_parts.append(f"Removed {duplicates_removed} duplicate(s).") + + analysis = " ".join(analysis_parts) + + print(f" → Vector similarity analysis completed: {len(detected_masters)} matches found") + + # Create similarity scores dict for potential truncation + similarity_scores = {mid: similarities[mid] for mid in detected_masters} + + return { + 'detected_masters': detected_masters, + 'detected_master_ids': detected_masters, + 'detected_master_filenames': [f"{mid}.jpg" for mid in detected_masters], + 'analysis': analysis, + 'processing_mode': 'vector_similarity', + 'total_masters_checked': len(self.master_embeddings), + 'confident_matches': len(detected_masters), + 'similarity_threshold': self.similarity_threshold, + 'deduplication_applied': len(detected_masters) != len(original_detected), + 'duplicates_removed': len(original_detected) - len(detected_masters) if len(detected_masters) != len(original_detected) else 0, + 'original_detected_masters': original_detected, + 'inlier_scores': similarity_scores, # Using similarity scores for truncation compatibility + 'top_similarities': dict(top_similarities) + } + + except Exception as e: + error_msg = f"Error in vector similarity analysis for {layout_name}: {e}" + print(f" → {error_msg}") + return { + 'detected_masters': [], + 'detected_master_ids': [], + 'detected_master_filenames': [], + 'analysis': f'Vector similarity analysis failed: {error_msg}', + 'error': str(e), + 'processing_mode': 'vector_similarity_error' + } + + def detect_with_inlier_analysis(self, layout_path: str, layout_name: str) -> Dict: + """ + Detect masters using local OpenCV-based inlier analysis with multiprocessing + Memory-safe version with dynamic worker adjustment + """ + print(f" → Analyzing {layout_name} with local inlier analysis using {self.local_workers} processes...") + + try: + # Check memory before starting + if not self.memory_manager.is_memory_safe(): + print(" → Memory usage high, waiting for safe levels...") + if not self.memory_manager.wait_for_memory_safe(): + raise Exception("Memory usage too high to safely process") + + # Load layout image in grayscale for feature detection (for initial check) + layout_img = cv2.imread(layout_path, cv2.IMREAD_GRAYSCALE) + if layout_img is None: + raise Exception(f"Could not load layout image: {layout_path}") + + # Detect features in layout image for reporting + layout_kp, layout_desc = self.akaze.detectAndCompute(layout_img, None) + if layout_desc is None: + raise Exception("No features detected in layout image") + + feature_count = len(layout_kp) + print(f" → Detected {feature_count} features in layout") + + # Dynamically adjust worker count based on feature count and memory + if feature_count > 50000: + # High feature count - reduce parallelism + safe_workers = max(1, self.local_workers // 2) + print(f" → High feature count ({feature_count}), reducing workers to {safe_workers}") + elif feature_count > 30000: + safe_workers = max(1, int(self.local_workers * 0.75)) + print(f" → Medium feature count ({feature_count}), reducing workers to {safe_workers}") + else: + safe_workers = self.local_workers + + # Further limit based on available memory + safe_workers = min(safe_workers, self.memory_manager.limit_concurrent_processes(safe_workers)) + + # Prepare tasks for multiprocessing + total_masters = len(self.master_images) + tasks = [] + + # Add max_features parameter to prevent memory explosion + max_features = min(10000, feature_count // 2) if feature_count > 20000 else 10000 + + for master_id, master_path in self.master_images.items(): + tasks.append((layout_path, master_id, master_path, self.min_good_matches, max_features)) + + print(f" → Processing {total_masters} masters using {safe_workers} concurrent processes...") + print(f" → Feature limit per master: {max_features}") + + # Process masters in parallel + master_results = [] + start_time = time.time() + + with concurrent.futures.ProcessPoolExecutor(max_workers=safe_workers) as executor: + # Submit all tasks + future_to_master = { + executor.submit(process_single_master_inlier_analysis, *task_args): task_args[1] + for task_args in tasks + } + + completed_count = 0 + # Collect results as they complete + for future in concurrent.futures.as_completed(future_to_master): + master_id = future_to_master[future] + completed_count += 1 + + # Progress update and memory monitoring every 10 completed masters + if completed_count % 10 == 0: + usage = self.memory_manager.get_memory_usage() + print(f" → Completed {completed_count}/{total_masters} masters... Memory: {usage['memory_percent']:.1f}%") + + # If memory is getting high, warn and potentially pause + if usage['memory_percent'] > 80: + print(f" → WARNING: High memory usage {usage['memory_percent']:.1f}%, monitoring closely...") + if usage['swap_percent'] > 20: + print(f" → WARNING: Swap usage {usage['swap_percent']:.1f}%, may slow down...") + + try: + result = future.result() + + # Add to results if above threshold + inliers = result.get('inliers', 0) + confidence = result.get('confidence', 'low') + + if inliers > 0 and confidence != 'low': + master_results.append({ + 'master_id': master_id, + 'inliers': inliers, + 'confidence': confidence, + 'details': result + }) + + except Exception as e: + print(f" → Error processing {master_id}: {e}") + + + # Sort by inlier count (descending) + master_results.sort(key=lambda x: x['inliers'], reverse=True) + + # Apply thresholds and select confident matches + detected_masters = [] + inlier_scores = {} + analysis_parts = [] + + if master_results: + # Get the best match for reference + best_match = master_results[0] + best_inliers = best_match['inliers'] + + # Apply absolute and relative thresholds + min_inliers = max(self.min_good_matches, best_inliers * self.inlier_ratio_threshold, int(best_inliers * self.inlier_threshold)) + + for result in master_results: + inliers = result['inliers'] + confidence = result['confidence'] + master_id = result['master_id'] + + # Store inlier score for all results (for potential truncation) + inlier_scores[master_id] = inliers + + # Check if this match meets our criteria + if (inliers >= min_inliers and + confidence in ['high', 'medium'] and + inliers >= self.min_good_matches): + detected_masters.append(master_id) + + # Create analysis text + analysis_parts = [ + f"Multiprocessing local inlier analysis using OpenCV AKAZE features ({self.local_workers} processes).", + f"Processed {total_masters} master images against {len(layout_kp)} layout features.", + f"Found {len(master_results)} potential matches, {len(detected_masters)} above threshold.", + f"Best match: {best_match['master_id']} ({best_inliers} inliers, {best_match['confidence']} confidence)" + ] + else: + analysis_parts = [ + f"Multiprocessing local inlier analysis using OpenCV AKAZE features ({self.local_workers} processes).", + f"Processed {total_masters} master images against {len(layout_kp)} layout features.", + f"No confident matches found above threshold." + ] + + # Apply deduplication (shouldn't be needed for local analysis, but for safety) + original_detected = detected_masters[:] + detected_masters = self.deduplicate_master_matches(detected_masters) + + if len(detected_masters) != len(original_detected): + duplicates_removed = len(original_detected) - len(detected_masters) + analysis_parts.append(f"Removed {duplicates_removed} duplicate(s).") + + analysis = " ".join(analysis_parts) + + print(f" → Local analysis completed: {len(detected_masters)} matches found using {safe_workers} processes") + + # Force garbage collection to free memory + import gc + gc.collect() + + return { + 'detected_masters': detected_masters, + 'detected_master_ids': detected_masters, + 'detected_master_filenames': [f"{mid}.jpg" for mid in detected_masters], + 'analysis': analysis, + 'processing_mode': 'local_inlier_analysis_multiprocess', + 'total_masters_checked': total_masters, + 'potential_matches_found': len(master_results), + 'confident_matches': len(detected_masters), + 'inlier_threshold': min_inliers if master_results else self.min_good_matches, + 'master_analysis_results': master_results[:10], # Top 10 for debugging + 'concurrent_workers': self.local_workers, + 'deduplication_applied': len(detected_masters) != len(original_detected), + 'duplicates_removed': len(original_detected) - len(detected_masters) if len(detected_masters) != len(original_detected) else 0, + 'original_detected_masters': original_detected, + 'inlier_scores': inlier_scores # Track inlier scores for truncation + } + + except Exception as e: + error_msg = f"Error in local inlier analysis for {layout_name}: {e}" + print(f" → {error_msg}") + return { + 'detected_masters': [], + 'detected_master_ids': [], + 'detected_master_filenames': [], + 'analysis': f'Local inlier analysis failed: {error_msg}', + 'error': str(e), + 'processing_mode': 'local_inlier_analysis_error' + } + + def detect_with_split_and_inlier_analysis(self, layout_path: str, layout_name: str, panel_count: int) -> Dict: + """ + Detect masters using split method followed by local analysis (inlier analysis or vector similarity based on vector_mode) + """ + if self.vector_mode: + return self.detect_with_split_and_vector_similarity(layout_path, layout_name, panel_count) + else: + return self.detect_with_split_and_inlier_analysis_opencv(layout_path, layout_name, panel_count) + + def detect_with_split_and_vector_similarity(self, layout_path: str, layout_name: str, panel_count: int) -> Dict: + """ + Detect masters using split method followed by vector similarity analysis on individual panels + """ + print(f" → Analyzing {layout_name} with split method + vector similarity...") + + try: + # Initialize panel splitter if not already done + if not hasattr(self, 'splitter'): + if self.split_simple: + self.splitter = SimplePanelSplitter(debug=False) + elif self.split_advanced: + self.splitter = AdvancedPanelSplitter( + percentile=self.percentile, + min_gap=self.min_gap, + debug=False + ) + else: + self.splitter = PanelSplitter() + + # Step 1: Split layout into individual panels using already-known panel count + print(f" → Splitting layout into {panel_count} panels (using pre-analyzed count)...") + split_panels = self.splitter.split_panels(layout_path, panel_count) + + if not split_panels: + raise Exception("Panel splitting failed: No panels generated") + + splits_generated = len(split_panels) + print(f" → Successfully split into {splits_generated} individual panel images") + + # Step 2: Apply vector similarity analysis to each split panel + print(f" → Analyzing {splits_generated} split panels using vector similarity...") + + all_results = [] + split_panel_paths = [] + + for i, split_info in enumerate(split_panels): + # Save split panel to temporary file for analysis + split_path = f"/tmp/split_panel_{i}_{os.path.basename(layout_path)}" + cv2.imwrite(split_path, split_info['image']) + split_panel_paths.append(split_path) + + print(f" → Processing split panel {i+1}/{splits_generated}...") + + # Generate embedding for this split panel + split_embedding = self.generate_image_embedding(split_path) + + if split_embedding is None: + print(f" → Failed to generate embedding for split panel {i+1}") + continue + + # Compare split panel against all master embeddings + for master_id, master_embedding in self.master_embeddings.items(): + similarity = self.compute_cosine_similarity(split_embedding, master_embedding) + + if similarity >= self.similarity_threshold: + all_results.append({ + 'master_id': master_id, + 'similarity': similarity, + 'confidence': 'high' if similarity >= 0.9 else 'medium', + 'split_panel': i, + 'split_path': split_path + }) + + # Clean up temporary split panel files + for split_path in split_panel_paths: + try: + os.remove(split_path) + except: + pass + + # Step 3: Process results and find best matches + detected_masters = [] + similarity_scores = {} + + if all_results: + # Sort by similarity (descending) + all_results.sort(key=lambda x: x['similarity'], reverse=True) + + # Track similarity scores for all results (for potential truncation) + for result in all_results: + master_id = result['master_id'] + similarity = result['similarity'] + + # Keep the highest similarity score for each master (in case of multiple panel matches) + if master_id not in similarity_scores or similarity > similarity_scores[master_id]: + similarity_scores[master_id] = similarity + + # Add to detected masters if not already present + if master_id not in detected_masters: + detected_masters.append(master_id) + + # Apply deduplication + original_detected = detected_masters[:] + detected_masters = self.deduplicate_master_matches(detected_masters) + + if self.split_simple: + splitter_type = "SimplePanelSplitter (even division)" + elif self.split_advanced: + splitter_type = "AdvancedPanelSplitter (edge detection)" + else: + splitter_type = "PanelSplitter (multiple CV methods)" + + analysis_parts = [ + f"Multi-panel layout processed using split method + vector similarity analysis.", + f"Layout split into {splits_generated} individual panels using {splitter_type} (no additional API calls).", + f"Each panel analyzed against {len(self.master_embeddings)} master embeddings using Google Vertex AI (1408 dimensions).", + f"Similarity threshold: {self.similarity_threshold}", + f"Found {len(detected_masters)} matches after deduplication." + ] + + if len(detected_masters) != len(original_detected): + duplicates_removed = len(original_detected) - len(detected_masters) + analysis_parts.append(f"Removed {duplicates_removed} duplicate(s).") + + analysis = " ".join(analysis_parts) + + print(f" → Split + vector similarity analysis completed: {len(detected_masters)} matches found") + + return { + 'detected_masters': detected_masters, + 'detected_master_ids': detected_masters, + 'detected_master_filenames': [f"{mid}.jpg" for mid in detected_masters], + 'analysis': analysis, + 'processing_mode': 'split_and_vector_similarity', + 'splits_generated': splits_generated, + 'panel_count': panel_count, + 'deduplication_applied': len(detected_masters) != len(original_detected), + 'duplicates_removed': len(original_detected) - len(detected_masters) if len(detected_masters) != len(original_detected) else 0, + 'original_detected_masters': original_detected, + 'total_combinations_processed': len(all_results), + 'potential_matches_found': len(all_results), + 'inlier_scores': similarity_scores, # Using similarity scores for truncation compatibility + 'similarity_threshold': self.similarity_threshold + } + + except Exception as e: + error_msg = f"Error in split + vector similarity analysis for {layout_name}: {e}" + print(f" → {error_msg}") + return { + 'detected_masters': [], + 'detected_master_ids': [], + 'detected_master_filenames': [], + 'analysis': f'Split + vector similarity analysis failed: {error_msg}', + 'error': str(e), + 'processing_mode': 'split_and_vector_similarity_error' + } + + def detect_with_split_and_inlier_analysis_opencv(self, layout_path: str, layout_name: str, panel_count: int) -> Dict: + """ + Detect masters using split method followed by local inlier analysis on individual panels + """ + print(f" → Analyzing {layout_name} with split method + inlier analysis...") + + try: + # Initialize panel splitter if not already done + if not hasattr(self, 'splitter'): + if self.split_simple: + self.splitter = SimplePanelSplitter(debug=False) + elif self.split_advanced: + self.splitter = AdvancedPanelSplitter( + percentile=self.percentile, + min_gap=self.min_gap, + debug=False + ) + else: + self.splitter = PanelSplitter() + + # Step 1: Split layout into individual panels using already-known panel count + print(f" → Splitting layout into {panel_count} panels (using pre-analyzed count)...") + split_panels = self.splitter.split_panels(layout_path, panel_count) + + if not split_panels: + raise Exception("Panel splitting failed: No panels generated") + + splits_generated = len(split_panels) + print(f" → Successfully split into {splits_generated} individual panel images") + + # Step 2: Apply multiprocessing inlier analysis to each split panel + print(f" → Analyzing {splits_generated} split panels using {self.local_workers} concurrent processes...") + + # Prepare tasks for multiprocessing - analyze each split panel against all masters + tasks = [] + split_panel_paths = [] + + for i, split_info in enumerate(split_panels): + # Save split panel to temporary file for analysis + split_path = f"/tmp/split_panel_{i}_{os.path.basename(layout_path)}" + cv2.imwrite(split_path, split_info['image']) + split_panel_paths.append(split_path) + + # Create tasks for this split panel against all masters + for master_id, master_path in self.master_images.items(): + tasks.append((split_path, master_id, master_path, self.min_good_matches)) + + # Process all split panels against all masters in parallel + all_results = [] + start_time = time.time() + + with concurrent.futures.ProcessPoolExecutor(max_workers=self.local_workers) as executor: + future_to_task = { + executor.submit(process_single_master_inlier_analysis, *task_args): task_args + for task_args in tasks + } + + completed_count = 0 + for future in concurrent.futures.as_completed(future_to_task): + task_args = future_to_task[future] + completed_count += 1 + + # Progress update + if completed_count % 50 == 0: + total_tasks = len(tasks) + print(f" → Completed {completed_count}/{total_tasks} split-master combinations...") + + try: + result = future.result() + if result.get('confidence') != 'low' and result.get('inliers', 0) > 0: + all_results.append(result) + except Exception as e: + print(f" → Error processing split task: {e}") + + # Clean up temporary split panel files + for split_path in split_panel_paths: + try: + os.remove(split_path) + except: + pass + + # Step 3: Process results and find best matches + detected_masters = [] + inlier_scores = {} + if all_results: + # Sort by inlier count (descending) + all_results.sort(key=lambda x: x['inliers'], reverse=True) + + # Apply thresholds + best_inliers = all_results[0]['inliers'] + min_inliers = max(self.min_good_matches, best_inliers * self.inlier_ratio_threshold, int(best_inliers * self.inlier_threshold)) + + # Track inlier scores for all results (for potential truncation) + for result in all_results: + master_id = result['master_id'] + inliers = result['inliers'] + + # Keep the highest inlier score for each master (in case of multiple panel matches) + if master_id not in inlier_scores or inliers > inlier_scores[master_id]: + inlier_scores[master_id] = inliers + + if (inliers >= min_inliers and + result['confidence'] in ['high', 'medium'] and + inliers >= self.min_good_matches): + detected_masters.append(master_id) + + # Apply deduplication + original_detected = detected_masters[:] + detected_masters = self.deduplicate_master_matches(detected_masters) + + if self.split_simple: + splitter_type = "SimplePanelSplitter (even division)" + elif self.split_advanced: + splitter_type = "AdvancedPanelSplitter (edge detection)" + else: + splitter_type = "PanelSplitter (multiple CV methods)" + analysis_parts = [ + f"Multi-panel layout processed using split method + local inlier analysis.", + f"Layout split into {splits_generated} individual panels using {splitter_type} (no additional API calls).", + f"Each panel analyzed against {len(self.master_images)} masters using OpenCV AKAZE features with multiprocessing ({self.local_workers} processes).", + f"Processed {len(tasks)} split-master combinations in parallel.", + f"Found {len(detected_masters)} matches after deduplication." + ] + + if len(detected_masters) != len(original_detected): + duplicates_removed = len(original_detected) - len(detected_masters) + analysis_parts.append(f"Removed {duplicates_removed} duplicate(s).") + + analysis = " ".join(analysis_parts) + + print(f" → Split + inlier analysis completed: {len(detected_masters)} matches found using {self.local_workers} processes") + + return { + 'detected_masters': detected_masters, + 'detected_master_ids': detected_masters, + 'detected_master_filenames': [f"{mid}.jpg" for mid in detected_masters], + 'analysis': analysis, + 'processing_mode': 'split_and_inlier_analysis', + 'splits_generated': splits_generated, + 'panel_count': panel_count, + 'concurrent_workers': self.local_workers, + 'deduplication_applied': len(detected_masters) != len(original_detected), + 'duplicates_removed': len(original_detected) - len(detected_masters) if len(detected_masters) != len(original_detected) else 0, + 'original_detected_masters': original_detected, + 'total_combinations_processed': len(tasks), + 'potential_matches_found': len(all_results), + 'inlier_scores': inlier_scores # Track inlier scores for truncation + } + + except Exception as e: + error_msg = f"Error in split + inlier analysis for {layout_name}: {e}" + print(f" → {error_msg}") + return { + 'detected_masters': [], + 'detected_master_ids': [], + 'detected_master_filenames': [], + 'analysis': f'Split + inlier analysis failed: {error_msg}', + 'error': str(e), + 'processing_mode': 'split_and_inlier_analysis_error' + } + + def process_all_layouts_hybrid(self, limit: Optional[int] = None, specific_file: Optional[str] = None) -> Dict: + """ + Process all layout images using hybrid detection + """ + print("Starting hybrid batch processing...") + analysis_method = "vector similarity" if self.vector_mode else "local analysis" + print(f"Panel threshold: ≤{self.panel_threshold} panels → {analysis_method}, ≥{self.panel_threshold + 1} panels → split + {analysis_method}") + + # Load master images + self.load_master_images() + + # Generate master embeddings if vector mode is enabled + if self.vector_mode: + self.generate_master_embeddings() + + if not self.master_embeddings: + raise Exception("No master embeddings available for vector mode") + + # Configure worker counts now that we know the number of masters + self._configure_worker_counts() + + # Get layout files + if specific_file: + # Process only the specific file + layout_files = [self.layouts_path / specific_file] + if not layout_files[0].exists(): + raise FileNotFoundError(f"Layout file {specific_file} not found in {self.layouts_path}") + print(f"Processing specific file: {specific_file}") + else: + layout_files = list(self.layouts_path.glob("*.jpg")) + + if limit: + layout_files = layout_files[:limit] + print(f"Processing first {limit} layouts only") + + total_layouts = len(layout_files) + print(f"Processing {total_layouts} layout images in hybrid mode") + print("=" * 60) + + results = {} + start_time = time.time() + + # Track hybrid statistics + local_analysis_count = 0 + split_analysis_count = 0 + total_api_calls = 0 + truncation_count = 0 + total_truncated_matches = 0 + + for i, layout_path in enumerate(layout_files, 1): + layout_id = layout_path.stem + + # Detect images using hybrid method + result = self.detect_images_in_layout_hybrid(str(layout_path), i, total_layouts) + + # Track statistics + detection_method = result.get('detection_method') + if detection_method in ['local_inlier_analysis', 'vector_similarity']: + local_analysis_count += 1 + elif detection_method in ['split_and_inlier_analysis', 'split_and_vector_similarity']: + split_analysis_count += 1 + + total_api_calls += result.get('api_calls_used', 0) + + if result.get('truncation_applied'): + truncation_count += 1 + total_truncated_matches += result.get('removed_count', 0) + + layout_result = { + 'layout_filename': layout_path.name, + 'detected_master_ids': result.get('detected_master_ids', []), + 'detected_master_filenames': result.get('detected_master_filenames', []), + 'analysis': result.get('analysis', ''), + 'detection_method': result.get('detection_method', 'unknown'), + 'panel_count': result.get('panel_count', 1), + 'panel_threshold': self.panel_threshold, + 'processing_mode': 'hybrid' + } + + # Add method-specific fields + if 'panel_analysis' in result: + layout_result['panel_analysis'] = result['panel_analysis'] + + if 'total_masters_checked' in result: + layout_result['total_masters_checked'] = result['total_masters_checked'] + + if 'splits_generated' in result: + layout_result['splits_generated'] = result['splits_generated'] + layout_result['split_results'] = result.get('split_results', []) + + # Add deduplication fields if applied + if result.get('deduplication_applied'): + layout_result['deduplication_applied'] = result['deduplication_applied'] + layout_result['duplicates_removed'] = result['duplicates_removed'] + layout_result['original_detected_masters'] = result['original_detected_masters'] + + # Add refinement fields if applied + if result.get('refinement_applied'): + layout_result['refinement_applied'] = result['refinement_applied'] + layout_result['refinement_details'] = result['refinement_details'] + layout_result['censorship_analysis'] = result['censorship_analysis'] + + # Add truncation fields if applied + if result.get('truncation_applied'): + layout_result['truncation_applied'] = result['truncation_applied'] + layout_result['original_match_count'] = result['original_match_count'] + layout_result['final_match_count'] = result['final_match_count'] + layout_result['removed_count'] = result['removed_count'] + + if 'error' in result: + layout_result['error'] = result['error'] + + # Add cost breakdown for this layout + cost_breakdown = cost_calculator.get_layout_cost_breakdown(layout_path.name) + if cost_breakdown: + layout_result['cost_breakdown'] = cost_breakdown + + results[layout_id] = layout_result + + # Progress update with time estimate + elapsed = time.time() - start_time + avg_time = elapsed / i + remaining = (total_layouts - i) * avg_time + + print(f"Progress: {i}/{total_layouts} ({i/total_layouts*100:.1f}%) - Est. remaining: {remaining/60:.1f} min") + + # Save progress periodically + if i % 20 == 0: + self.save_results(results, f"hybrid_progress_{i}") + + total_time = time.time() - start_time + + # Print hybrid statistics + print(f"\n{'='*60}") + print("HYBRID PROCESSING STATISTICS") + print(f"{'='*60}") + print(f"Total layouts processed: {total_layouts}") + print(f"Local analysis used: {local_analysis_count} ({local_analysis_count/total_layouts*100:.1f}%)") + print(f"Split + inlier analysis used: {split_analysis_count} ({split_analysis_count/total_layouts*100:.1f}%)") + print(f"Truncation applied: {truncation_count} layouts ({truncation_count/total_layouts*100:.1f}%)") + print(f"Total matches truncated: {total_truncated_matches}") + print(f"Total API calls made: {total_api_calls}") + print(f"Average API calls per layout: {total_api_calls/total_layouts:.1f}") + print(f"Estimated cost savings vs one-at-a-time: {(1 - total_api_calls/(total_layouts * (len(self.master_images) + 1)))*100:.1f}%") + print(f"Total processing time: {total_time/60:.1f} minutes") + print(f"Average time per layout: {total_time/total_layouts:.1f} seconds") + + return results + + def process_all_layouts_hybrid_parallel(self, limit: Optional[int] = None, specific_file: Optional[str] = None) -> Dict: + """ + Process all layout images using hybrid detection with parallel layout processing + """ + print("Starting hybrid batch processing with parallel layout workers...") + analysis_method = "vector similarity" if self.vector_mode else "local analysis" + print(f"Panel threshold: ≤{self.panel_threshold} panels → {analysis_method}, ≥{self.panel_threshold + 1} panels → split + {analysis_method}") + print(f"Parallel processing enabled: {self.layout_workers} layout workers, max {self.max_concurrent_layouts} concurrent layouts") + + # Load master images + self.load_master_images() + + # Generate master embeddings if vector mode is enabled + if self.vector_mode: + self.generate_master_embeddings() + + if not self.master_embeddings: + raise Exception("No master embeddings available for vector mode") + + # Configure worker counts now that we know the number of masters + self._configure_worker_counts() + + # Initialize inlier analysis coordinator + self.inlier_coordinator = InlierAnalysisCoordinator( + self.local_workers, + self.memory_manager, + self.min_good_matches + ) + self.inlier_coordinator.start() + + # Get layout files + layout_files = self._prepare_layout_files(limit, specific_file) + total_layouts = len(layout_files) + + # Initialize progress tracker + self.progress_tracker = ProgressTracker(total_layouts) + + print(f"Processing {total_layouts} layout images in parallel hybrid mode") + print("=" * 60) + + # Process layouts in parallel + results = {} + start_time = time.time() + + # Track statistics + local_analysis_count = 0 + split_analysis_count = 0 + total_api_calls = 0 + truncation_count = 0 + total_truncated_matches = 0 + + try: + with concurrent.futures.ThreadPoolExecutor(max_workers=self.layout_workers) as executor: + # Submit all layout processing tasks + future_to_layout = { + executor.submit(self._process_single_layout_parallel, layout_path, i, total_layouts): layout_path + for i, layout_path in enumerate(layout_files, 1) + } + + # Progress monitoring thread + progress_thread = threading.Thread(target=self._monitor_progress, daemon=True) + progress_thread.start() + + # Collect results as they complete + for future in concurrent.futures.as_completed(future_to_layout): + layout_path = future_to_layout[future] + layout_id = layout_path.stem + + try: + result = future.result() + + # Track statistics + detection_method = result.get('detection_method', 'unknown') + if detection_method in ['local_inlier_analysis', 'vector_similarity']: + local_analysis_count += 1 + elif detection_method in ['split_and_inlier_analysis', 'split_and_vector_similarity']: + split_analysis_count += 1 + + total_api_calls += result.get('api_calls_used', 0) + + if result.get('truncation_applied'): + truncation_count += 1 + total_truncated_matches += result.get('removed_count', 0) + + # Create layout result + layout_result = self._create_layout_result(result, layout_path) + results[layout_id] = layout_result + + # Update progress + self.progress_tracker.complete_layout(success=True) + + except Exception as e: + print(f"Error processing layout {layout_path.name}: {e}") + + # Handle worker failure with potential adjustments + error_result = self._handle_worker_failure(layout_path.name, e) + results[layout_id] = error_result + self.progress_tracker.complete_layout(success=False) + + # Monitor memory and adjust workers if needed + self._monitor_memory_and_adjust_workers() + + # Save progress periodically + if len(results) % 20 == 0: + self.save_results(results, f"hybrid_parallel_progress_{len(results)}") + + finally: + # Stop coordinator + if self.inlier_coordinator: + self.inlier_coordinator.stop() + + total_time = time.time() - start_time + + # Print statistics + self._print_parallel_statistics( + total_layouts, local_analysis_count, split_analysis_count, + truncation_count, total_truncated_matches, total_api_calls, total_time + ) + + return results + + def _prepare_layout_files(self, limit: Optional[int], specific_file: Optional[str]) -> List: + """Prepare list of layout files to process""" + if specific_file: + # Process only the specific file + layout_files = [self.layouts_path / specific_file] + if not layout_files[0].exists(): + raise FileNotFoundError(f"Layout file {specific_file} not found in {self.layouts_path}") + print(f"Processing specific file: {specific_file}") + else: + layout_files = list(self.layouts_path.glob("*.jpg")) + + if limit: + layout_files = layout_files[:limit] + print(f"Processing first {limit} layouts only") + + return layout_files + + def _process_single_layout_parallel(self, layout_path, layout_index, total_layouts): + """Process a single layout in parallel mode""" + layout_name = layout_path.name + layout_id = layout_path.stem + + # Mark layout as started + self.progress_tracker.start_layout() + + print(f"Processing {layout_index}/{total_layouts}: {layout_name} (Parallel hybrid mode)") + + try: + # Phase 1: OpenAI API calls (can be parallel across layouts) + print(f" Step 1: Analyzing panels and censorship using OpenAI O3...") + combined_result = self.count_panels_and_detect_censorship(str(layout_path)) + panel_count = combined_result.get('panel_count', 1) + panel_confidence = combined_result.get('panel_confidence', 'unknown') + is_censored = combined_result.get('is_censored', True) + censorship_confidence = combined_result.get('censorship_confidence', 'unknown') + + print(f" Panel analysis: {panel_count} panels detected (confidence: {panel_confidence})") + print(f" Censorship analysis: {'CENSORED' if is_censored else 'UNCENSORED'} (confidence: {censorship_confidence})") + + # Phase 2: Determine analysis method and coordinate with serial inlier analysis + analysis_method = "vector similarity" if self.vector_mode else "local inlier analysis" + inlier_future = concurrent.futures.Future() + + if panel_count <= self.panel_threshold: + print(f" Step 2: Submitting {analysis_method} (≤{self.panel_threshold} panels) to inlier queue...") + + # Prepare analysis parameters + analysis_params = { + 'layout_path': str(layout_path), + 'master_images': self.master_images, + 'vector_mode': self.vector_mode, + 'inlier_threshold': self.inlier_threshold, + 'inlier_ratio_threshold': self.inlier_ratio_threshold + } + + # Add vector mode specific parameters + if self.vector_mode: + analysis_params['embedding_model'] = self.embedding_model + analysis_params['master_embeddings'] = self.master_embeddings + analysis_params['similarity_threshold'] = self.similarity_threshold + + # Submit to inlier analysis queue + self.inlier_coordinator.submit_analysis( + layout_id, 'direct', analysis_params, inlier_future + ) + + detection_method = 'vector_similarity' if self.vector_mode else 'local_inlier_analysis' + + else: + print(f" Step 2: Splitting layout and submitting {analysis_method} (≥{self.panel_threshold + 1} panels) to inlier queue...") + + # Split the layout first + split_panels = self._split_layout(layout_path, panel_count) + + # Prepare analysis parameters + analysis_params = { + 'split_panels': split_panels, + 'master_images': self.master_images, + 'vector_mode': self.vector_mode, + 'inlier_threshold': self.inlier_threshold, + 'inlier_ratio_threshold': self.inlier_ratio_threshold + } + + # Add vector mode specific parameters + if self.vector_mode: + analysis_params['embedding_model'] = self.embedding_model + analysis_params['master_embeddings'] = self.master_embeddings + analysis_params['similarity_threshold'] = self.similarity_threshold + + # Submit to inlier analysis queue + self.inlier_coordinator.submit_analysis( + layout_id, 'split', analysis_params, inlier_future + ) + + detection_method = 'split_and_vector_similarity' if self.vector_mode else 'split_and_inlier_analysis' + + # Wait for inlier analysis to complete + print(f" Step 3: Waiting for inlier analysis completion...") + inlier_result = inlier_future.result() # No timeout - wait indefinitely for completion + + # Phase 3: Post-process results (can be parallel across layouts) + detection_result = self._post_process_parallel_results( + inlier_result, combined_result, panel_count, detection_method, layout_path + ) + + detected_count = len(detection_result.get('detected_masters', [])) + base_method = "vector similarity" if self.vector_mode else "local analysis" + method_name = base_method if panel_count <= self.panel_threshold else f"split + {base_method}" + + print(f"✓ Completed {layout_name} - Found {detected_count} matches using {method_name}") + return detection_result + + except Exception as e: + error_msg = f"Error in parallel hybrid analysis for {layout_name}: {e}" + print(error_msg) + return self._create_error_result(layout_path, e) + + def _split_layout(self, layout_path, panel_count): + """Split layout into panels""" + # Initialize panel splitter if not already done + if not hasattr(self, 'splitter'): + if self.split_simple: + self.splitter = SimplePanelSplitter(debug=False) + elif self.split_advanced: + self.splitter = AdvancedPanelSplitter( + percentile=self.percentile, + min_gap=self.min_gap, + debug=False + ) + else: + self.splitter = PanelSplitter() + + # Split layout into panels + return self.splitter.split_panels(str(layout_path), panel_count) + + def _post_process_parallel_results(self, inlier_result, combined_result, panel_count, detection_method, layout_path): + """Post-process results from parallel inlier analysis""" + # Extract basic results + detected_masters = inlier_result.get('detected_masters', []) + inlier_scores = inlier_result.get('inlier_scores', {}) + + # Apply CEN refinement if enabled + if self.refinement_mode and detected_masters: + cen_images = [mid for mid in detected_masters if self.is_cen_image(mid)] + if cen_images: + print(f" Step 4: Applying CEN refinement...") + is_censored = combined_result.get('is_censored', True) + censorship_confidence = combined_result.get('censorship_confidence', 'unknown') + + refined_result = self.apply_cen_refinement_with_stored_analysis( + {'detected_masters': detected_masters, 'layout_path': str(layout_path)}, + is_censored, censorship_confidence + ) + detected_masters = refined_result.get('detected_masters', []) + + # Apply deduplication + original_count = len(detected_masters) + detected_masters = self.deduplicate_master_matches(detected_masters) + duplicates_removed = original_count - len(detected_masters) + + if duplicates_removed > 0: + print(f" Step 5: Removed {duplicates_removed} duplicate master(s)") + + # Apply truncation if needed + if len(detected_masters) > panel_count and inlier_scores and not self.no_truncation: + print(f" Step 6: Truncating {len(detected_masters)} matches to top {panel_count} by score...") + + # Sort masters by score (descending) + sorted_masters = sorted(detected_masters, + key=lambda mid: inlier_scores.get(mid, 0), + reverse=True) + + # Keep only top N matches + truncated_masters = sorted_masters[:panel_count] + removed_masters = sorted_masters[panel_count:] + + detected_masters = truncated_masters + truncation_applied = True + original_match_count = len(sorted_masters) + removed_count = len(removed_masters) + elif len(detected_masters) > panel_count and self.no_truncation: + print(f" Step 6: Truncation disabled - keeping all {len(detected_masters)} matches") + truncation_applied = False + original_match_count = len(detected_masters) + removed_count = 0 + else: + truncation_applied = False + original_match_count = len(detected_masters) + removed_count = 0 + + # Calculate confidence score + final_matches = len(detected_masters) + confidence_score = self.calculate_confidence_score(final_matches, panel_count) + + # Apply fallback to OpenAI one-at-a-time if enabled and needed + if self.fallback_one_at_a_time and final_matches < panel_count: + print(f" Step 7: Fallback triggered - {final_matches} matched masters < {panel_count} detected panels") + print(f" Running OpenAI one-at-a-time method with {len(self.master_images)} workers...") + + # Check resource usage before starting fallback + self._check_resource_usage() + + # Create a temporary OpenAI detector for one-at-a-time processing + try: + # Force garbage collection before creating detector + import gc + gc.collect() + + # Limit concurrent workers to prevent file descriptor exhaustion + max_workers = min(len(self.master_images), 20) # Cap at 20 to prevent resource exhaustion + + temp_detector = OpenAIImageDetector( + enable_greyscale=self.enable_greyscale, + enable_contrast_enhancement=self.enable_contrast_enhancement, + contrast_factor=self.contrast_factor, + refinement_mode=self.refinement_mode, + one_at_a_time_mode=True, + max_concurrent_workers=max_workers + ) + + # Set the master images + temp_detector.master_images = self.master_images.copy() + + print(f" Using {max_workers} workers for fallback (limited to prevent resource exhaustion)") + + # Run OpenAI one-at-a-time detection with stored censorship data + stored_censorship_data = { + 'is_censored': combined_result.get('is_censored', True), + 'confidence': combined_result.get('censorship_confidence', 'unknown') + } + + fallback_result = temp_detector.detect_images_in_layout_one_at_a_time( + str(layout_path), 0, 1, stored_censorship_data + ) + + # Clean up the temporary detector + if hasattr(temp_detector, 'cleanup_temp_files'): + temp_detector.cleanup_temp_files() + + # Force garbage collection after fallback + del temp_detector + gc.collect() + + if fallback_result and fallback_result.get('detected_masters'): + fallback_masters = fallback_result['detected_masters'] + print(f" Fallback successful - Found {len(fallback_masters)} matches") + + # Apply CEN refinement if enabled + if self.refinement_mode: + fallback_result = self.apply_cen_refinement_with_stored_analysis( + fallback_result, + combined_result.get('is_censored', True), + combined_result.get('censorship_confidence', 'unknown') + ) + + # Apply deduplication + original_count = len(fallback_result['detected_masters']) + fallback_result['detected_masters'] = self.deduplicate_master_matches(fallback_result['detected_masters']) + + # Use fallback results + detected_masters = fallback_result['detected_masters'] + final_matches = len(detected_masters) + confidence_score = self.calculate_confidence_score(final_matches, panel_count) + + print(f" Fallback completed - Using {final_matches} matches from OpenAI one-at-a-time") + + # Mark that fallback was used + fallback_applied = True + original_detection_method = detection_method + detection_method = 'openai_one_at_a_time_fallback' + fallback_api_calls = fallback_result.get('api_calls_made', 0) + else: + print(f" Fallback failed - No additional matches found, keeping original results") + fallback_applied = False + original_detection_method = detection_method + fallback_api_calls = 0 + + except Exception as e: + print(f" ERROR: Fallback failed with error: {e}") + + # Handle specific "Too many open files" error + if "Too many open files" in str(e) or "Errno 24" in str(e): + print(f" → This is a resource exhaustion issue - forcing cleanup...") + import gc + gc.collect() + + # Try to clean up any temp detector that might exist + if 'temp_detector' in locals(): + try: + if hasattr(temp_detector, 'cleanup_temp_files'): + temp_detector.cleanup_temp_files() + del temp_detector + except: + pass + + print(f" → Cleanup completed, continuing with original results...") + + fallback_applied = False + original_detection_method = detection_method + fallback_api_calls = 0 + else: + fallback_applied = False + original_detection_method = detection_method + fallback_api_calls = 0 + + # Build result dictionary + result = { + 'detected_masters': detected_masters, + 'detected_master_ids': detected_masters, + 'detected_master_filenames': [f"{mid}.jpg" for mid in detected_masters], + 'analysis': f"Parallel hybrid processing: {detection_method}", + 'detection_method': detection_method, + 'panel_count': panel_count, + 'panel_threshold': self.panel_threshold, + 'processing_mode': 'hybrid_parallel', + 'layout_path': str(layout_path), + 'confidence_score': confidence_score, + 'api_calls_used': 1 + fallback_api_calls, # Consolidated panel counting + censorship + fallback calls + 'truncation_applied': truncation_applied, + 'original_match_count': original_match_count, + 'final_match_count': final_matches, + 'removed_count': removed_count, + 'deduplication_applied': duplicates_removed > 0, + 'duplicates_removed': duplicates_removed, + 'fallback_applied': fallback_applied + } + + # Add fallback-specific fields if fallback was used + if fallback_applied: + result['original_detection_method'] = original_detection_method + result['processing_mode'] = 'hybrid_parallel_with_fallback' + + # Add panel and censorship analysis + result['panel_analysis'] = { + 'panel_count': panel_count, + 'confidence': combined_result.get('panel_confidence', 'unknown'), + 'analysis': combined_result.get('panel_analysis', ''), + 'panel_descriptions': combined_result.get('panel_descriptions', []) + } + result['censorship_analysis'] = { + 'is_censored': combined_result.get('is_censored', True), + 'confidence': combined_result.get('censorship_confidence', 'unknown'), + 'analysis': combined_result.get('censorship_analysis', ''), + 'coverage_details': combined_result.get('coverage_details', '') + } + + # Add inlier analysis specific results + if 'analysis_mode' in inlier_result: + result['inlier_analysis_mode'] = inlier_result['analysis_mode'] + if 'total_masters_checked' in inlier_result: + result['total_masters_checked'] = inlier_result['total_masters_checked'] + if 'potential_matches_found' in inlier_result: + result['potential_matches_found'] = inlier_result['potential_matches_found'] + if 'workers_used' in inlier_result: + result['inlier_workers_used'] = inlier_result['workers_used'] + + return result + + def _create_layout_result(self, result, layout_path): + """Create standardized layout result dictionary""" + layout_result = { + 'layout_filename': layout_path.name, + 'detected_master_ids': result.get('detected_master_ids', []), + 'detected_master_filenames': result.get('detected_master_filenames', []), + 'analysis': result.get('analysis', ''), + 'detection_method': result.get('detection_method', 'unknown'), + 'panel_count': result.get('panel_count', 1), + 'panel_threshold': self.panel_threshold, + 'processing_mode': result.get('processing_mode', 'hybrid_parallel'), + 'confidence_score': result.get('confidence_score', 0.0) + } + + # Add optional fields if present + optional_fields = [ + 'panel_analysis', 'censorship_analysis', 'total_masters_checked', + 'truncation_applied', 'original_match_count', 'final_match_count', + 'removed_count', 'deduplication_applied', 'duplicates_removed', + 'inlier_analysis_mode', 'potential_matches_found', 'inlier_workers_used' + ] + + for field in optional_fields: + if field in result: + layout_result[field] = result[field] + + # Add cost breakdown if available + cost_breakdown = cost_calculator.get_layout_cost_breakdown(layout_path.name) + if cost_breakdown: + layout_result['cost_breakdown'] = cost_breakdown + + return layout_result + + def _create_error_result(self, layout_path, error): + """Create error result dictionary""" + return { + 'layout_filename': layout_path.name, + 'detected_master_ids': [], + 'detected_master_filenames': [], + 'analysis': f'Error in parallel hybrid processing: {error}', + 'detection_method': 'error', + 'panel_count': 0, + 'panel_threshold': self.panel_threshold, + 'processing_mode': 'hybrid_parallel_error', + 'confidence_score': 0.0, + 'error': str(error) + } + + def _monitor_progress(self): + """Monitor and display progress periodically""" + monitor_cycles = 0 + last_completed = 0 + stall_count = 0 + + while True: + time.sleep(10) # Update every 10 seconds + monitor_cycles += 1 + + # Update queue size + if self.inlier_coordinator: + queue_size = self.inlier_coordinator.get_queue_size() + self.progress_tracker.update_queue_size(queue_size) + + # Check for potential stalls + info = self.progress_tracker.get_progress_info() + current_completed = info['completed'] + + # Detect stall condition + if current_completed == last_completed and queue_size > 0: + stall_count += 1 + if stall_count >= 6: # 60 seconds of no progress + print(f" → STALL DETECTED: No progress for 60s with {queue_size} items in queue") + current_task = self.inlier_coordinator.get_current_task_info() + if current_task: + processing_time = time.time() - current_task['started_at'] + print(f" → Current task: {current_task['layout_id']} ({current_task['analysis_type']}) - {processing_time:.1f}s") + + # If a task is taking too long, provide information but don't timeout + if current_task and processing_time > 300: # 5 minutes + print(f" → INFO: Long-running task detected ({processing_time:.1f}s) - continuing to wait for completion") + + # Automatic queue pressure relief + if queue_size >= 3 and stall_count >= 6: + print(f" → QUEUE PRESSURE RELIEF: Reducing layout workers to help with bottleneck") + if hasattr(self, 'layout_workers') and isinstance(self.layout_workers, int) and self.layout_workers > 1: + original_workers = self.layout_workers + self.layout_workers = max(1, self.layout_workers - 1) + print(f" → Reduced layout workers: {original_workers} → {self.layout_workers}") + else: + print(f" → Cannot reduce layout workers further (current: {getattr(self, 'layout_workers', 'unknown')})") + + stall_count = 0 # Reset after taking action + else: + stall_count = 0 + last_completed = current_completed + + # Print progress + self.progress_tracker.print_progress() + + # Monitor memory and adjust workers every 30 seconds (3 cycles) + if monitor_cycles % 3 == 0: + self._monitor_memory_and_adjust_workers() + + # Check if we're done + info = self.progress_tracker.get_progress_info() + if info['completed'] + info['failed'] >= info['total']: + break + + def _print_parallel_statistics(self, total_layouts, local_analysis_count, split_analysis_count, + truncation_count, total_truncated_matches, total_api_calls, total_time): + """Print parallel processing statistics""" + print(f"\n{'='*60}") + print("PARALLEL HYBRID PROCESSING STATISTICS") + print(f"{'='*60}") + print(f"Total layouts processed: {total_layouts}") + print(f"Layout workers used: {self.layout_workers}") + print(f"Local analysis used: {local_analysis_count} ({local_analysis_count/total_layouts*100:.1f}%)") + print(f"Split + analysis used: {split_analysis_count} ({split_analysis_count/total_layouts*100:.1f}%)") + print(f"Truncation applied: {truncation_count} layouts ({truncation_count/total_layouts*100:.1f}%)") + print(f"Total matches truncated: {total_truncated_matches}") + print(f"Total API calls made: {total_api_calls}") + print(f"Average API calls per layout: {total_api_calls/total_layouts:.1f}") + print(f"Estimated cost savings vs one-at-a-time: {(1 - total_api_calls/(total_layouts * (len(self.master_images) + 1)))*100:.1f}%") + print(f"Total processing time: {total_time/60:.1f} minutes") + print(f"Average time per layout: {total_time/total_layouts:.1f} seconds") + + # Estimate speedup + if hasattr(self, 'layout_workers') and self.layout_workers > 1: + estimated_sequential_time = total_time * self.layout_workers + print(f"Estimated sequential time: {estimated_sequential_time/60:.1f} minutes") + print(f"Parallel speedup: {estimated_sequential_time/total_time:.1f}x") + + def _monitor_memory_and_adjust_workers(self): + """Monitor memory usage and dynamically adjust worker counts""" + try: + memory_usage = psutil.virtual_memory().percent + swap_usage = psutil.swap_memory().percent + + # Get queue information for bottleneck detection + queue_size = 0 + if hasattr(self, 'inlier_coordinator') and self.inlier_coordinator: + queue_size = self.inlier_coordinator.get_queue_size() + + # Check if we need to reduce workers due to memory pressure + # Very lenient swap usage threshold since swap being full is acceptable + # Also consider queue pressure as a factor + memory_pressure = memory_usage > 85 or (swap_usage > 95 and memory_usage > 80) + queue_pressure = queue_size >= 3 + + if memory_pressure or queue_pressure: + adjustments_made = False + + if memory_pressure: + print(f" → Memory pressure detected: {memory_usage:.1f}% RAM, {swap_usage:.1f}% swap") + + # For memory pressure, reduce both types of workers + if hasattr(self, 'layout_workers') and isinstance(self.layout_workers, int) and self.layout_workers > 1: + original_workers = self.layout_workers + self.layout_workers = max(1, self.layout_workers - 1) + print(f" → Reduced layout workers: {original_workers} → {self.layout_workers}") + adjustments_made = True + + if hasattr(self, 'local_workers') and isinstance(self.local_workers, int) and self.local_workers > 1: + original_local = self.local_workers + self.local_workers = max(1, self.local_workers - 1) + print(f" → Reduced local workers: {original_local} → {self.local_workers}") + + # Update inlier coordinator if it exists + if hasattr(self, 'inlier_coordinator') and self.inlier_coordinator: + self.inlier_coordinator.local_workers = self.local_workers + adjustments_made = True + + if queue_pressure: + print(f" → Queue pressure detected: {queue_size} items in inlier queue") + + # For queue pressure, ONLY reduce layout workers (producers) + # DO NOT reduce local workers (consumers) - that makes the problem worse! + if hasattr(self, 'layout_workers') and isinstance(self.layout_workers, int) and self.layout_workers > 1: + original_workers = self.layout_workers + self.layout_workers = max(1, self.layout_workers - 1) + print(f" → Reduced layout workers to reduce queue pressure: {original_workers} → {self.layout_workers}") + adjustments_made = True + else: + print(f" → Cannot reduce layout workers further (current: {self.layout_workers})") + + return adjustments_made + + # Check if we can safely increase workers + # More lenient conditions for increasing workers + elif memory_usage < 75 and swap_usage < 80: + cpu_count = os.cpu_count() + + # Can we increase layout workers? + if (hasattr(self, 'layout_workers') and + hasattr(self, '_layout_workers_config') and + self._layout_workers_config is None and # Only if auto-detected + isinstance(self.layout_workers, int) and + self.layout_workers < min(4, cpu_count // 2)): + + original_workers = self.layout_workers + self.layout_workers = min(4, self.layout_workers + 1) + print(f" → Increased layout workers: {original_workers} → {self.layout_workers}") + + return True # Adjustments made + + # Can we increase local workers? + if (hasattr(self, 'local_workers') and + hasattr(self, '_local_workers_config') and + self._local_workers_config is None and # Only if auto-detected + isinstance(self.local_workers, int) and + self.local_workers < max(1, cpu_count - 2)): + + original_local = self.local_workers + self.local_workers = min(cpu_count - 2, self.local_workers + 1) + print(f" → Increased local workers: {original_local} → {self.local_workers}") + + # Update inlier coordinator if it exists + if hasattr(self, 'inlier_coordinator') and self.inlier_coordinator: + self.inlier_coordinator.local_workers = self.local_workers + + return True # Adjustments made + + return False # No adjustments needed + + except Exception as e: + print(f" → Error monitoring memory: {e}") + return False + + def _handle_worker_failure(self, failed_layout, exception): + """Handle individual layout worker failures gracefully""" + print(f" → Worker failure detected for {failed_layout}: {exception}") + + # Check if it's a memory-related error + error_str = str(exception).lower() + if any(keyword in error_str for keyword in ['memory', 'out of memory', 'memoryerror', 'killed']): + print(f" → Memory-related failure detected, reducing worker counts") + + # Emergency reduction of all workers + if hasattr(self, 'layout_workers') and isinstance(self.layout_workers, int) and self.layout_workers > 1: + self.layout_workers = max(1, self.layout_workers // 2) + print(f" → Emergency: Reduced layout workers to {self.layout_workers}") + + if hasattr(self, 'local_workers') and isinstance(self.local_workers, int) and self.local_workers > 1: + self.local_workers = max(1, self.local_workers // 2) + print(f" → Emergency: Reduced local workers to {self.local_workers}") + + # Update inlier coordinator + if hasattr(self, 'inlier_coordinator') and self.inlier_coordinator: + self.inlier_coordinator.local_workers = self.local_workers + + # Return an error result + return { + 'layout_filename': failed_layout, + 'detected_master_ids': [], + 'detected_master_filenames': [], + 'analysis': f'Worker failure: {exception}', + 'detection_method': 'worker_failure', + 'panel_count': 0, + 'panel_threshold': self.panel_threshold, + 'processing_mode': 'hybrid_parallel_worker_failure', + 'confidence_score': 0.0, + 'error': str(exception), + 'worker_failure': True + } + + def _check_resource_usage(self): + """Check and log current resource usage""" + try: + import resource + import gc + + # Get current file descriptor usage + soft_limit, hard_limit = resource.getrlimit(resource.RLIMIT_NOFILE) + + # Get current process info + import os + pid = os.getpid() + + # Count open file descriptors (macOS specific) + try: + import subprocess + result = subprocess.run(['lsof', '-p', str(pid)], capture_output=True, text=True) + open_files = len(result.stdout.strip().split('\n')) - 1 # -1 for header + except: + open_files = "unknown" + + # Force garbage collection + gc.collect() + + print(f" → Resource check: {open_files} open files, limit: {soft_limit}/{hard_limit}") + + # Warn if getting close to limit + if isinstance(open_files, int) and open_files > soft_limit * 0.8: + print(f" → WARNING: High file descriptor usage ({open_files}/{soft_limit})") + print(f" → Consider reducing concurrent workers or increasing system limits") + + except Exception as e: + print(f" → Could not check resource usage: {e}") + + def save_results(self, results: Dict, filename: str = "hybrid_detection_results") -> str: + """Save hybrid detection results to JSON file""" + output_path = self.results_path / f"{filename}.json" + + # Add metadata + model_description = 'openai_o3_plus_vector_similarity' if self.vector_mode else 'openai_o3_plus_local_analysis' + processing_mode = 'hybrid_vector' if self.vector_mode else 'hybrid' + + output_data = { + 'metadata': { + 'total_layouts_processed': len(results), + 'total_master_images': len(self.master_images), + 'master_images_available': list(self.master_files.keys()), + 'provider': 'hybrid', + 'model': model_description, + 'panel_threshold': self.panel_threshold, + 'inlier_threshold': self.inlier_threshold, + 'greyscale_enabled': self.enable_greyscale, + 'contrast_enhancement_enabled': self.enable_contrast_enhancement, + 'processing_mode': processing_mode, + 'vector_mode': self.vector_mode + }, + 'results': results + } + + # Add vector mode specific metadata + if self.vector_mode: + output_data['metadata']['similarity_threshold'] = self.similarity_threshold + output_data['metadata']['embedding_model'] = 'Google Vertex AI multimodalembedding@001' + output_data['metadata']['embedding_dimensions'] = 1408 + + # Add cost tracking metadata if enabled + if cost_calculator.enable_tracking: + session_summary = cost_calculator.get_session_summary() + if session_summary['tracking_enabled']: + output_data['metadata']['cost_tracking'] = session_summary + + with open(output_path, 'w') as f: + json.dump(output_data, f, indent=2) + + print(f"Results saved to: {output_path}") + return str(output_path) + + def generate_summary(self, results: Dict) -> Dict: + """Generate summary statistics for hybrid detection""" + total_layouts = len(results) + layouts_with_matches = sum(1 for r in results.values() if r['detected_master_ids']) + + # Count detection methods used + local_analysis_count = sum(1 for r in results.values() if r.get('detection_method') in ['local_inlier_analysis', 'vector_similarity']) + split_analysis_count = sum(1 for r in results.values() if r.get('detection_method') in ['split_and_inlier_analysis', 'split_and_vector_similarity']) + + # Count master image occurrences + master_counts = {} + for result in results.values(): + for master_id in result['detected_master_ids']: + master_counts[master_id] = master_counts.get(master_id, 0) + 1 + + # Deduplication statistics + layouts_with_deduplication = sum(1 for r in results.values() if r.get('deduplication_applied', False)) + total_duplicates_removed = sum(r.get('duplicates_removed', 0) for r in results.values()) + + # Truncation statistics + layouts_with_truncation = sum(1 for r in results.values() if r.get('truncation_applied', False)) + total_matches_removed = sum(r.get('removed_count', 0) for r in results.values()) + + summary = { + 'total_layouts_processed': total_layouts, + 'layouts_with_matches': layouts_with_matches, + 'layouts_without_matches': total_layouts - layouts_with_matches, + 'local_analysis_used': local_analysis_count, + 'split_analysis_used': split_analysis_count, + 'local_analysis_percentage': round(local_analysis_count / total_layouts * 100, 1) if total_layouts > 0 else 0, + 'split_analysis_percentage': round(split_analysis_count / total_layouts * 100, 1) if total_layouts > 0 else 0, + 'master_image_usage': master_counts, + 'most_used_masters': sorted(master_counts.items(), key=lambda x: x[1], reverse=True)[:10], + # Deduplication stats + 'layouts_with_deduplication': layouts_with_deduplication, + 'total_duplicates_removed': total_duplicates_removed, + 'deduplication_rate': round(layouts_with_deduplication / total_layouts * 100, 1) if total_layouts > 0 else 0, + # Truncation stats + 'layouts_with_truncation': layouts_with_truncation, + 'total_matches_removed_by_truncation': total_matches_removed, + 'truncation_rate': round(layouts_with_truncation / total_layouts * 100, 1) if total_layouts > 0 else 0, + 'provider': 'hybrid', + 'model': 'openai_o3_plus_vector_similarity' if self.vector_mode else 'openai_o3_plus_local_analysis_plus_split', + 'panel_threshold': self.panel_threshold, + 'inlier_threshold': self.inlier_threshold, + 'vector_mode': self.vector_mode + } + + # Add vector mode specific fields + if self.vector_mode: + summary['similarity_threshold'] = self.similarity_threshold + summary['embedding_model'] = 'Google Vertex AI multimodalembedding@001' + summary['embedding_dimensions'] = 1408 + + return summary + + def apply_cen_refinement_with_stored_analysis(self, initial_results: Dict, is_layout_censored: bool, censorship_confidence: str) -> Dict: + """ + Apply CEN refinement using stored censorship analysis from the consolidated API call + """ + layout_name = Path(initial_results.get('layout_path', 'unknown')).name + detected_masters = initial_results.get('detected_masters', []) + + # Find CEN images in the results + cen_images = [mid for mid in detected_masters if self.is_cen_image(mid)] + + if not cen_images: + # No CEN images found, return original results + return initial_results + + print(f" Refining {len(cen_images)} CEN matches for {layout_name}") + print(f" Using stored censorship analysis: {'CENSORED' if is_layout_censored else 'UNCENSORED'} (confidence: {censorship_confidence})") + + refined_masters = [] + refinement_details = [] + changes_made = 0 + + # Process each detected image + for master_id in detected_masters: + if self.is_cen_image(master_id): + # This is a CEN image + non_cen_id = self.find_corresponding_non_cen_image(master_id) + + if not is_layout_censored and non_cen_id: + # Layout is uncensored, switch to non-CEN version + refined_masters.append(non_cen_id) + refinement_details.append({ + 'original_cen_match': master_id, + 'non_cen_alternative': non_cen_id, + 'final_choice': non_cen_id, + 'confidence': censorship_confidence, + 'analysis': f"Layout determined to be uncensored, switched from {master_id} to {non_cen_id}", + 'changed': True, + 'reason': 'layout_uncensored' + }) + changes_made += 1 + print(f" → Changed {master_id} to {non_cen_id} (layout is uncensored)") + else: + # Layout is censored or no non-CEN alternative, keep CEN version + refined_masters.append(master_id) + reason = 'layout_censored' if is_layout_censored else 'no_non_cen_alternative' + refinement_details.append({ + 'original_cen_match': master_id, + 'non_cen_alternative': non_cen_id, + 'final_choice': master_id, + 'confidence': censorship_confidence, + 'analysis': f"Layout is censored, keeping CEN version: {master_id}", + 'changed': False, + 'reason': reason + }) + print(f" → Kept {master_id} (layout is censored)") + else: + # Not a CEN image, keep as is + refined_masters.append(master_id) + + # Update the results + refined_results = initial_results.copy() + refined_results['detected_masters'] = refined_masters + refined_results['detected_master_ids'] = refined_masters + refined_results['detected_master_filenames'] = [f"{mid}.jpg" for mid in refined_masters] + + # Store refinement information + refined_results['cen_refinement_applied'] = True + refined_results['cen_refinement_details'] = refinement_details + refined_results['cen_refinement_changes'] = changes_made + refined_results['censorship_used_stored_analysis'] = True + + print(f" → CEN refinement completed: {changes_made} changes made") + + return refined_results + + def calculate_confidence_score(self, final_matches: int, panel_count: int) -> float: + """ + Calculate confidence score as percentage ratio of final matches to detected panels + + Args: + final_matches: Number of final detected matches after deduplication and refinement + panel_count: Number of panels detected by OpenAI O3 + + Returns: + float: Confidence percentage (0.0 to 100.0) + """ + if panel_count == 0: + return 0.0 + + # Calculate raw percentage + raw_percentage = (final_matches / panel_count) * 100 + + # Cap at 100% (can't have more confidence than 100%) + confidence_percentage = min(raw_percentage, 100.0) + + return confidence_percentage \ No newline at end of file diff --git a/image_detector.py b/image_detector.py new file mode 100644 index 0000000..10e878c --- /dev/null +++ b/image_detector.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +""" +Master Image Detection in Layout Images - Modular Version +Uses Google Gemini 2.5 Pro API to detect which master images appear in layout images + +This is the main entry point that imports from the refactored modules: +- process_detection.py: Standalone process function for detection +- gemini_detector.py: ImageDetector class using Gemini LLM +- vector_detector.py: VectorImageDetector class using embeddings +- cli.py: Command line interface + +For backward compatibility, this module re-exports the main classes and functions. +""" + +# Import all components from the refactored modules +from process_detection import process_single_master_detection +from gemini_detector import ImageDetector +from vector_detector import VectorImageDetector +from cli import main, parse_arguments + +# Re-export for backward compatibility +__all__ = [ + 'process_single_master_detection', + 'ImageDetector', + 'VectorImageDetector', + 'main', + 'parse_arguments' +] + +if __name__ == "__main__": + # Import the main execution from cli module + import multiprocessing + multiprocessing.set_start_method('spawn', force=True) + exit(main()) \ No newline at end of file diff --git a/improved_splitting.py b/improved_splitting.py new file mode 100644 index 0000000..37d4064 --- /dev/null +++ b/improved_splitting.py @@ -0,0 +1,264 @@ +#!/usr/bin/env python3 +""" +Improved horizontal splitting algorithm for fashion layout panels +""" +import cv2 +import numpy as np +from pathlib import Path +import os +from scipy.ndimage import gaussian_filter1d +from scipy.signal import find_peaks + +def improved_horizontal_splitting(image_path: str, debug=False): + """ + Improved algorithm for horizontal panel detection + Focuses on major structural separators, not text/content details + """ + print(f"\nTesting improved algorithm on: {Path(image_path).name}") + + # Load image + img = cv2.imread(image_path) + height, width = img.shape[:2] + print(f"Image dimensions: {width}x{height}") + + # Only process wide images + if width <= height * 1.2: + print("Not a wide layout, treating as single panel") + return [{ + 'bbox': (0, 0, width, height), + 'width': width, + 'height': height, + 'crop_id': "single" + }] + + # Convert to grayscale + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + # Method 1: Structural edge detection + # Focus on strong vertical edges that span most of the height + edges = cv2.Canny(gray, 30, 100) + + # Create a tall vertical kernel to detect full-height separators + vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, height // 3)) + vertical_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, vertical_kernel) + + # Get vertical projection of strong edges + edge_projection = np.sum(vertical_edges, axis=0) + + # Method 2: Intensity histogram analysis + # Look for consistent dark/light vertical bands + horizontal_hist = np.sum(gray, axis=0) + + # Smooth both signals + smoothed_edges = gaussian_filter1d(edge_projection, sigma=15) + smoothed_hist = gaussian_filter1d(horizontal_hist, sigma=15) + + # Invert histogram to find valleys (potential separators) + inverted_hist = np.max(smoothed_hist) - smoothed_hist + + # Adaptive parameters based on image size + if width < 2000: + # Small images: likely 1-2 panels + min_panel_width = width // 4 # At least 25% of image width per panel + max_panels = 3 + elif width < 5000: + # Medium images: likely 2-4 panels + min_panel_width = width // 6 # At least 16% of image width per panel + max_panels = 6 + else: + # Large images: multi-panel layouts + min_panel_width = width // 12 # At least 8% of image width per panel + max_panels = 15 + + print(f"Min panel width: {min_panel_width}px, Max panels: {max_panels}") + + # Find separator candidates using both methods + edge_threshold = np.max(smoothed_edges) * 0.4 # Strong edges only + hist_threshold = np.max(inverted_hist) * 0.3 # Significant valleys only + + # Edge-based separators + edge_peaks, _ = find_peaks(smoothed_edges, + distance=min_panel_width, + height=edge_threshold, + prominence=np.max(smoothed_edges) * 0.2) + + # Histogram-based separators + hist_peaks, _ = find_peaks(inverted_hist, + distance=min_panel_width, + height=hist_threshold, + prominence=np.max(inverted_hist) * 0.15) + + print(f"Edge peaks: {len(edge_peaks)}, Histogram peaks: {len(hist_peaks)}") + + # Combine and validate separators + all_separators = set(edge_peaks) | set(hist_peaks) + + # Filter separators that are too close to image boundaries + boundary_margin = width * 0.05 # 5% margin from edges + valid_separators = [s for s in all_separators + if boundary_margin < s < width - boundary_margin] + + # Sort separators + valid_separators = sorted(valid_separators) + + # Remove separators that are too close to each other + final_separators = [] + for sep in valid_separators: + if not final_separators or sep - final_separators[-1] >= min_panel_width: + final_separators.append(sep) + + # Limit to reasonable number of panels + if len(final_separators) >= max_panels: + # Keep only the strongest separators + separator_scores = [] + for sep in final_separators: + edge_score = smoothed_edges[sep] if sep < len(smoothed_edges) else 0 + hist_score = inverted_hist[sep] if sep < len(inverted_hist) else 0 + combined_score = edge_score + hist_score + separator_scores.append((sep, combined_score)) + + # Sort by score and take top ones + separator_scores.sort(key=lambda x: x[1], reverse=True) + final_separators = [s[0] for s in separator_scores[:max_panels-1]] + final_separators.sort() + + print(f"Final separators: {final_separators}") + + # Create crops + x_boundaries = [0] + final_separators + [width] + crops = [] + + for i in range(len(x_boundaries) - 1): + x1, x2 = x_boundaries[i], x_boundaries[i + 1] + + # Ensure minimum panel width + if x2 - x1 >= min_panel_width: + crops.append({ + 'bbox': (x1, 0, x2, height), + 'width': x2 - x1, + 'height': height, + 'crop_id': f"panel_{i}" + }) + + print(f"Generated {len(crops)} panels") + + # Debug visualization + if debug: + debug_dir = Path("debug_improved") + debug_dir.mkdir(exist_ok=True) + + import matplotlib.pyplot as plt + + fig, axes = plt.subplots(4, 1, figsize=(15, 12)) + + # Original image + axes[0].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) + axes[0].set_title("Original Image") + for sep in final_separators: + axes[0].axvline(x=sep, color='red', linewidth=2) + + # Edge projection + axes[1].plot(smoothed_edges) + axes[1].set_title("Edge Projection (Smoothed)") + axes[1].axhline(y=edge_threshold, color='red', linestyle='--', alpha=0.7) + for sep in edge_peaks: + axes[1].axvline(x=sep, color='red', alpha=0.7) + + # Histogram analysis + axes[2].plot(inverted_hist) + axes[2].set_title("Inverted Histogram (Smoothed)") + axes[2].axhline(y=hist_threshold, color='red', linestyle='--', alpha=0.7) + for sep in hist_peaks: + axes[2].axvline(x=sep, color='blue', alpha=0.7) + + # Final result + axes[3].plot(smoothed_edges, label='Edges', alpha=0.7) + axes[3].plot(inverted_hist, label='Histogram', alpha=0.7) + axes[3].set_title("Combined Analysis with Final Separators") + for sep in final_separators: + axes[3].axvline(x=sep, color='red', linewidth=2, label='Final Separator') + axes[3].legend() + + plt.tight_layout() + debug_file = debug_dir / f"{Path(image_path).stem}_analysis.png" + plt.savefig(debug_file, dpi=150, bbox_inches='tight') + plt.close() + + print(f"Debug visualization saved: {debug_file}") + + return crops + +def test_improved_algorithm(): + """Test the improved algorithm on various layouts""" + + test_cases = [ + # Single panels + {"path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6785934.jpg", "expected": 1, "type": "Single"}, + {"path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6813573.jpg", "expected": 1, "type": "Single"}, + + # Double panels + {"path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6785852.jpg", "expected": 2, "type": "Double"}, + + # 4-panel layouts + {"path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6799150.jpg", "expected": 4, "type": "4-Panel"}, + {"path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6813643.jpg", "expected": 4, "type": "4-Panel"}, + + # Multi-panel layouts + {"path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6791144.jpg", "expected": 8, "type": "Multi-Panel"}, + {"path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6786505.jpg", "expected": 10, "type": "Multi-Panel"}, + ] + + print("TESTING IMPROVED HORIZONTAL SPLITTING ALGORITHM") + print("="*60) + + results = [] + crops_dir = Path("improved_crops") + crops_dir.mkdir(exist_ok=True) + + for test_case in test_cases: + if not os.path.exists(test_case["path"]): + print(f"⚠️ File not found: {test_case['path']}") + continue + + crops = improved_horizontal_splitting(test_case["path"], debug=True) + + # Save crop previews + img = cv2.imread(test_case["path"]) + base_name = Path(test_case["path"]).stem + + for i, crop in enumerate(crops): + x1, y1, x2, y2 = crop['bbox'] + cropped = img[y1:y2, x1:x2] + crop_filename = f"{base_name}_improved_crop{i+1:02d}.jpg" + cv2.imwrite(str(crops_dir / crop_filename), cropped) + + # Analyze result + detected = len(crops) + expected = test_case["expected"] + accurate = abs(detected - expected) <= 1 + + status = "✅" if accurate else "❌" + print(f"{status} {base_name}: {detected}/{expected} panels ({test_case['type']})") + + results.append({ + "file": base_name, + "type": test_case["type"], + "expected": expected, + "detected": detected, + "accurate": accurate + }) + + # Summary + print(f"\n{'='*60}") + print("IMPROVED ALGORITHM SUMMARY") + print(f"{'='*60}") + + accurate_count = sum(1 for r in results if r["accurate"]) + total_count = len(results) + + print(f"Accurate results: {accurate_count}/{total_count} ({accurate_count/total_count*100:.1f}%)") + print(f"Crop previews saved to: {crops_dir}/") + print(f"Debug visualizations saved to: debug_improved/") + +if __name__ == "__main__": + test_improved_algorithm() \ No newline at end of file diff --git a/logging_config.py b/logging_config.py new file mode 100644 index 0000000..61c6b61 --- /dev/null +++ b/logging_config.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +""" +Logging Configuration - Dual output to terminal and file for crash tracking +""" + +import logging +import sys +import os +from datetime import datetime + +def setup_dual_logging(log_level=logging.INFO): + """ + Configure logging to output to both terminal and file + + Args: + log_level: Logging level (default: logging.INFO) + + Returns: + logger: Configured logger instance + """ + + # Create logger + logger = logging.getLogger('master_adapt_detect') + logger.setLevel(log_level) + + # Prevent duplicate handlers if called multiple times + if logger.handlers: + return logger + + # Create formatter + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + + # Console handler (terminal output) + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(log_level) + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + # File handler (file output) + log_filename = f"master_adapt_detect_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" + file_handler = logging.FileHandler(log_filename, mode='w') + file_handler.setLevel(log_level) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + # Log the setup + logger.info(f"Dual logging initialized - Console: {log_level}, File: {log_filename}") + + return logger + +def log_system_info(logger): + """Log system information for debugging""" + import platform + import psutil + + logger.info("="*60) + logger.info("SYSTEM INFORMATION") + logger.info("="*60) + logger.info(f"Platform: {platform.platform()}") + logger.info(f"Python version: {platform.python_version()}") + logger.info(f"CPU count: {psutil.cpu_count()}") + logger.info(f"Memory: {psutil.virtual_memory().total / (1024**3):.2f} GB total") + logger.info(f"Available memory: {psutil.virtual_memory().available / (1024**3):.2f} GB") + logger.info(f"Process ID: {os.getpid()}") + logger.info("="*60) + +def log_exception(logger, exc_info=None): + """Log exception with full traceback""" + if exc_info is None: + exc_info = sys.exc_info() + + logger.error("EXCEPTION OCCURRED", exc_info=exc_info) + + # Also log memory usage at time of crash + try: + import psutil + process = psutil.Process() + memory_info = process.memory_info() + system_memory = psutil.virtual_memory() + swap_memory = psutil.swap_memory() + + logger.error(f"System memory at crash: {system_memory.percent:.1f}% used, " + f"{system_memory.available / (1024**3):.2f} GB available") + logger.error(f"Swap memory at crash: {swap_memory.percent:.1f}% used, " + f"{swap_memory.used / (1024**3):.2f} GB used") + logger.error(f"Process memory at crash: RSS={memory_info.rss / (1024**3):.2f} GB, " + f"VMS={memory_info.vms / (1024**3):.2f} GB") + logger.error(f"CPU usage at crash: {process.cpu_percent()}%") + except Exception as e: + logger.error(f"Could not get resource usage: {e}") + +def log_memory_warning(logger, usage): + """Log memory warning with details""" + logger.warning(f"MEMORY WARNING: {usage['memory_percent']:.1f}% memory used, " + f"{usage['memory_available_gb']:.1f} GB available") + if usage['swap_percent'] > 0: + logger.warning(f"SWAP WARNING: {usage['swap_percent']:.1f}% swap used, " + f"{usage['swap_used_gb']:.1f} GB used") + +class DualLogger: + """Wrapper class to provide print-like interface with dual logging""" + + def __init__(self, logger): + self.logger = logger + + def print(self, *args, **kwargs): + """Print to both terminal and file""" + # Convert args to string like print() would + message = ' '.join(str(arg) for arg in args) + self.logger.info(message) + + def error(self, *args, **kwargs): + """Log error message""" + message = ' '.join(str(arg) for arg in args) + self.logger.error(message) + + def warning(self, *args, **kwargs): + """Log warning message""" + message = ' '.join(str(arg) for arg in args) + self.logger.warning(message) + + def debug(self, *args, **kwargs): + """Log debug message""" + message = ' '.join(str(arg) for arg in args) + self.logger.debug(message) \ No newline at end of file diff --git a/master_adapt_detector_diagram.md b/master_adapt_detector_diagram.md new file mode 100644 index 0000000..50e2d86 --- /dev/null +++ b/master_adapt_detector_diagram.md @@ -0,0 +1,444 @@ +# Master Adapt Detector Architecture Diagram + +This diagram illustrates the architecture and flow of the Master Adapt Detector application, which uses multiple AI models and computer vision techniques to detect master images within layout images. + +## High-Level Architecture + +```mermaid +graph TB + subgraph "Entry Point" + CLI[cli.py - Command Line Interface] + end + + subgraph "Core Detection Engines" + GD[Gemini Detector
gemini_detector.py] + OD[OpenAI Detector
openai_detector.py] + VD[Vector Detector
vector_detector.py] + HD[Hybrid Detector
hybrid_detector.py] + end + + subgraph "Panel Splitting System" + PS[Panel Splitter
panel_splitter.py] + AS[Advanced Splitter
advanced_splitter.py] + SS[Simple Splitter
simple_splitter.py] + end + + subgraph "Support Systems" + MM[Memory Manager
memory_manager.py] + LC[Logging Config
logging_config.py] + PD[Process Detection
process_detection.py] + end + + subgraph "AI APIs" + GEMINI[Google Gemini 2.5 Pro] + OPENAI[OpenAI o3] + VERTEX[Google Vertex AI
Vector Embeddings] + end + + subgraph "Computer Vision" + OPENCV[OpenCV
Feature Detection] + AKAZE[AKAZE Features] + RANSAC[RANSAC Homography] + end + + subgraph "Data Storage" + MI[Master Images
master_images/] + LI[Layout Images
layouts/] + RES[Results
results/] + EMB[Embeddings Cache
embeddings_cache/] + end + + CLI --> GD + CLI --> OD + CLI --> VD + CLI --> HD + + HD --> OD + HD --> VD + HD --> PS + HD --> AS + HD --> SS + + GD --> GEMINI + OD --> OPENAI + VD --> VERTEX + + HD --> OPENCV + HD --> AKAZE + HD --> RANSAC + + PS --> OPENCV + AS --> OPENCV + SS --> OPENCV + + GD --> MM + OD --> MM + VD --> MM + HD --> MM + + CLI --> LC + PD --> LC + + GD --> MI + OD --> MI + VD --> MI + HD --> MI + + GD --> LI + OD --> LI + VD --> LI + HD --> LI + + GD --> RES + OD --> RES + VD --> RES + HD --> RES + + VD --> EMB + HD --> EMB +``` + +## Detailed Application Flow + +```mermaid +flowchart TD + START([Application Start]) --> PARSE[Parse CLI Arguments] + PARSE --> MODE{Select Mode} + + MODE -->|--hybrid| HYBRID[Hybrid Mode] + MODE -->|--openai| OPENAI_MODE[OpenAI Mode] + MODE -->|--vector-mode| VECTOR_MODE[Vector Mode] + MODE -->|default| GEMINI_MODE[Gemini Mode] + + subgraph "Hybrid Mode Processing" + HYBRID --> LOAD_MASTERS[Load Master Images] + LOAD_MASTERS --> INIT_EMBED{Vector Mode?} + INIT_EMBED -->|Yes| GEN_EMBED[Generate Master Embeddings] + INIT_EMBED -->|No| INIT_CV[Initialize OpenCV Components] + GEN_EMBED --> PROCESS_LAYOUT[Process Layout] + INIT_CV --> PROCESS_LAYOUT + + PROCESS_LAYOUT --> COUNT_PANELS[Count Panels with OpenAI o3] + COUNT_PANELS --> DETECT_CENSOR[Detect Censorship with OpenAI o3] + DETECT_CENSOR --> PANEL_CHECK{Panel Count ≤ Threshold?} + + PANEL_CHECK -->|Yes| LOCAL_ANALYSIS[Local Analysis] + PANEL_CHECK -->|No| SPLIT_ANALYSIS[Split + Analysis] + + LOCAL_ANALYSIS --> VECTOR_CHECK{Vector Mode?} + VECTOR_CHECK -->|Yes| VECTOR_SIM[Vector Similarity] + VECTOR_CHECK -->|No| INLIER_ANALYSIS[Inlier Analysis] + + SPLIT_ANALYSIS --> SPLIT_PANELS[Split Panels] + SPLIT_PANELS --> SPLIT_VECTOR_CHECK{Vector Mode?} + SPLIT_VECTOR_CHECK -->|Yes| SPLIT_VECTOR[Split + Vector Similarity] + SPLIT_VECTOR_CHECK -->|No| SPLIT_INLIER[Split + Inlier Analysis] + + VECTOR_SIM --> APPLY_REFINEMENT + INLIER_ANALYSIS --> APPLY_REFINEMENT + SPLIT_VECTOR --> APPLY_REFINEMENT + SPLIT_INLIER --> APPLY_REFINEMENT + + APPLY_REFINEMENT[Apply CEN Refinement] --> DEDUP[Deduplication] + DEDUP --> TRUNCATE[Truncate to Panel Count] + TRUNCATE --> FALLBACK_CHECK{Fallback Enabled?} + + FALLBACK_CHECK -->|Yes & Needed| FALLBACK[OpenAI One-at-a-Time Fallback] + FALLBACK_CHECK -->|No| SAVE_RESULTS + FALLBACK --> SAVE_RESULTS[Save Results] + end + + subgraph "OpenAI Mode Processing" + OPENAI_MODE --> LOAD_MASTERS_O[Load Master Images] + LOAD_MASTERS_O --> ONE_AT_TIME{One-at-a-Time?} + ONE_AT_TIME -->|Yes| PARALLEL_MASTERS[Parallel Master Processing] + ONE_AT_TIME -->|No| BATCH_PROCESS[Batch Processing] + + PARALLEL_MASTERS --> PANEL_AWARE{Panel-Aware Refinement?} + PANEL_AWARE -->|Yes| COUNT_PANELS_O[Count Panels] --> INLIER_REFINE[Inlier Refinement] + PANEL_AWARE -->|No| APPLY_CEN_O[Apply CEN Refinement] + + INLIER_REFINE --> APPLY_CEN_O + BATCH_PROCESS --> APPLY_CEN_O + APPLY_CEN_O --> SAVE_RESULTS_O[Save Results] + end + + subgraph "Vector Mode Processing" + VECTOR_MODE --> LOAD_MASTERS_V[Load Master Images] + LOAD_MASTERS_V --> GEN_EMBED_V[Generate Master Embeddings] + GEN_EMBED_V --> SPLITTING_CHECK{Splitting Enabled?} + SPLITTING_CHECK -->|Yes| SPLIT_LAYOUT[Split Layout] + SPLITTING_CHECK -->|No| COMPARE_EMBED[Compare Embeddings] + + SPLIT_LAYOUT --> COMPARE_SPLITS[Compare Split Embeddings] + COMPARE_SPLITS --> SAVE_RESULTS_V[Save Results] + COMPARE_EMBED --> SAVE_RESULTS_V + end + + subgraph "Gemini Mode Processing" + GEMINI_MODE --> LOAD_MASTERS_G[Load Master Images] + LOAD_MASTERS_G --> GEMINI_ONE_AT_TIME{One-at-a-Time?} + GEMINI_ONE_AT_TIME -->|Yes| PARALLEL_MASTERS_G[Parallel Master Processing] + GEMINI_ONE_AT_TIME -->|No| BATCH_PROCESS_G[Batch Processing] + + PARALLEL_MASTERS_G --> APPLY_CEN_G[Apply CEN Refinement] + BATCH_PROCESS_G --> APPLY_CEN_G + APPLY_CEN_G --> SAVE_RESULTS_G[Save Results] + end + + SAVE_RESULTS --> END([End]) + SAVE_RESULTS_O --> END + SAVE_RESULTS_V --> END + SAVE_RESULTS_G --> END +``` + +## Panel Splitting Architecture + +```mermaid +graph TB + subgraph "Panel Splitting System" + INPUT[Layout Image] --> DETECTOR{Splitter Type} + + DETECTOR -->|Basic| PANEL_SPLITTER[PanelSplitter] + DETECTOR -->|Advanced| ADVANCED_SPLITTER[AdvancedPanelSplitter] + DETECTOR -->|Simple| SIMPLE_SPLITTER[SimplePanelSplitter] + + subgraph "PanelSplitter Methods" + PANEL_SPLITTER --> EDGE_DETECT[Edge Detection] + PANEL_SPLITTER --> CONTOUR_FIND[Contour Finding] + PANEL_SPLITTER --> HIST_ANALYSIS[Histogram Analysis] + PANEL_SPLITTER --> KMEANS[K-Means Clustering] + end + + subgraph "AdvancedPanelSplitter Methods" + ADVANCED_SPLITTER --> SOBEL[Sobel Edge Detection] + ADVANCED_SPLITTER --> GUTTER_DETECT[Gutter Detection] + ADVANCED_SPLITTER --> ENERGY_ANALYSIS[Energy Analysis] + ADVANCED_SPLITTER --> PERCENTILE_THRESH[Percentile Thresholding] + end + + subgraph "SimplePanelSplitter Methods" + SIMPLE_SPLITTER --> EVEN_SPLIT[Even Division] + SIMPLE_SPLITTER --> PANEL_COUNT[Use Panel Count] + end + + EDGE_DETECT --> SPLIT_RESULTS[Split Results] + CONTOUR_FIND --> SPLIT_RESULTS + HIST_ANALYSIS --> SPLIT_RESULTS + KMEANS --> SPLIT_RESULTS + + SOBEL --> SPLIT_RESULTS + GUTTER_DETECT --> SPLIT_RESULTS + ENERGY_ANALYSIS --> SPLIT_RESULTS + PERCENTILE_THRESH --> SPLIT_RESULTS + + EVEN_SPLIT --> SPLIT_RESULTS + PANEL_COUNT --> SPLIT_RESULTS + end + + SPLIT_RESULTS --> INDIVIDUAL_PANELS[Individual Panel Images] + INDIVIDUAL_PANELS --> MATCH_PROCESS[Match Each Panel to Masters] +``` + +## Memory Management and Multiprocessing + +```mermaid +graph TB + subgraph "Memory Management System" + MEMORY_MANAGER[Memory Manager] --> MONITOR[Monitor Usage] + MONITOR --> THRESH_CHECK{Usage > Threshold?} + THRESH_CHECK -->|Yes| THROTTLE[Throttle Processes] + THRESH_CHECK -->|No| CONTINUE[Continue Processing] + + THROTTLE --> WAIT[Wait for Memory] + WAIT --> REDUCE_WORKERS[Reduce Worker Count] + REDUCE_WORKERS --> CONTINUE + + CONTINUE --> PROCESS_POOL[Process Pool Executor] + PROCESS_POOL --> WORKER1[Worker Process 1] + PROCESS_POOL --> WORKER2[Worker Process 2] + PROCESS_POOL --> WORKERN[Worker Process N] + + subgraph "Worker Process" + WORKER1 --> ISOLATED_ENV[Isolated Environment] + ISOLATED_ENV --> LOAD_MODELS[Load Models] + LOAD_MODELS --> PROCESS_TASK[Process Task] + PROCESS_TASK --> CLEANUP[Cleanup] + end + + WORKER2 --> ISOLATED_ENV + WORKERN --> ISOLATED_ENV + end + + subgraph "Feature Limiting" + PROCESS_TASK --> FEATURE_COUNT[Count Features] + FEATURE_COUNT --> FEATURE_CHECK{Features > Limit?} + FEATURE_CHECK -->|Yes| LIMIT_FEATURES[Limit Features] + FEATURE_CHECK -->|No| PROCEED[Proceed] + LIMIT_FEATURES --> PROCEED + end +``` + +## Data Flow and Storage + +```mermaid +graph LR + subgraph "Input Data" + MI[Master Images
41 images] + LI[Layout Images
299+ images] + end + + subgraph "Processing Cache" + TEMP[Temp Processed Images] + EMB_CACHE[Embeddings Cache] + SPLITS[Split Panel Images] + end + + subgraph "Output Data" + JSON[JSON Results] + LOGS[Log Files] + DEBUG[Debug Images] + CROPS[Crop Images] + end + + MI --> TEMP + LI --> TEMP + + TEMP --> EMB_CACHE + TEMP --> SPLITS + + EMB_CACHE --> JSON + SPLITS --> JSON + + JSON --> LOGS + JSON --> DEBUG + JSON --> CROPS + + subgraph "Result Structure" + JSON --> METADATA[Metadata] + JSON --> LAYOUT_RESULTS[Layout Results] + + METADATA --> TOTAL_LAYOUTS[Total Layouts] + METADATA --> MASTER_COUNT[Master Count] + METADATA --> PROVIDER[Provider Info] + METADATA --> PROCESSING_MODE[Processing Mode] + + LAYOUT_RESULTS --> DETECTED_MASTERS[Detected Masters] + LAYOUT_RESULTS --> ANALYSIS[Analysis Text] + LAYOUT_RESULTS --> CONFIDENCE[Confidence Score] + LAYOUT_RESULTS --> PANEL_INFO[Panel Information] + end +``` + +## Key Components and Their Roles + +### 1. **CLI Interface (`cli.py`)** +- **Purpose**: Command-line interface for the application +- **Features**: Argument parsing, mode selection, batch processing options +- **Modes**: Gemini, OpenAI, Vector, Hybrid +- **Options**: Test mode, batch processing, custom outputs, splitting options + +### 2. **Detection Engines** + +#### **Hybrid Detector (`hybrid_detector.py`)** +- **Purpose**: Cost-efficient detection combining OpenAI panel counting with local analysis +- **Features**: + - Panel threshold-based routing + - Vector similarity or inlier analysis + - Automatic fallback to OpenAI one-at-a-time + - CEN refinement and deduplication +- **Workflow**: Panel count → Route to local/split analysis → Apply refinements + +#### **OpenAI Detector (`openai_detector.py`)** +- **Purpose**: Uses OpenAI o3 model for image matching +- **Features**: + - One-at-a-time processing with multiprocessing + - Panel-aware refinement + - Image preprocessing (greyscale, contrast) +- **API**: OpenAI o3 vision model + +#### **Vector Detector (`vector_detector.py`)** +- **Purpose**: Uses Google Vertex AI embeddings for similarity matching +- **Features**: + - 1408-dimensional embeddings + - Cosine similarity matching + - Embedding caching +- **API**: Google Vertex AI Multimodal Embeddings + +#### **Gemini Detector (`gemini_detector.py`)** +- **Purpose**: Uses Google Gemini 2.5 Pro for image analysis +- **Features**: + - Batch processing + - Safety settings handling + - Image preprocessing +- **API**: Google Gemini 2.5 Pro + +### 3. **Panel Splitting System** + +#### **Panel Splitter (`panel_splitter.py`)** +- **Purpose**: Basic multi-method panel splitting +- **Methods**: Edge detection, contour finding, histogram analysis, K-means clustering + +#### **Advanced Splitter (`advanced_splitter.py`)** +- **Purpose**: Advanced edge detection and gutter analysis +- **Methods**: Sobel edge detection, energy analysis, percentile thresholding + +#### **Simple Splitter (`simple_splitter.py`)** +- **Purpose**: Simple even division based on panel count +- **Methods**: Even division, panel count-based splitting + +### 4. **Support Systems** + +#### **Memory Manager (`memory_manager.py`)** +- **Purpose**: Prevents memory exhaustion during processing +- **Features**: Memory monitoring, worker throttling, safe execution decorators + +#### **Logging Config (`logging_config.py`)** +- **Purpose**: Dual logging to terminal and file +- **Features**: System info logging, exception tracking, memory usage logging + +#### **Process Detection (`process_detection.py`)** +- **Purpose**: Standalone functions for multiprocessing +- **Features**: Process isolation, error handling, resource cleanup + +### 5. **Key Algorithms** + +#### **Inlier Analysis (OpenCV)** +- **Purpose**: Local feature matching using computer vision +- **Algorithm**: AKAZE features → RANSAC homography → Inlier counting +- **Advantage**: No API costs, fast processing + +#### **Vector Similarity (Vertex AI)** +- **Purpose**: Semantic similarity using embeddings +- **Algorithm**: Image embeddings → Cosine similarity → Threshold matching +- **Advantage**: Semantic understanding, good for transformed images + +#### **Panel Detection (OpenAI o3)** +- **Purpose**: Intelligent panel counting and censorship detection +- **Algorithm**: Vision model analysis → Panel count + censorship status +- **Advantage**: Accurate panel analysis, handles complex layouts + +### 6. **Processing Modes** + +#### **Hybrid Mode** (Recommended) +- **Strategy**: OpenAI panel counting + local analysis for efficiency +- **Routing**: ≤2 panels → local analysis, ≥3 panels → split + analysis +- **Fallback**: OpenAI one-at-a-time if insufficient matches +- **Cost**: ~1 API call per layout vs ~41 for pure OpenAI + +#### **OpenAI Mode** +- **Strategy**: Pure OpenAI o3 processing +- **Options**: Batch or one-at-a-time with panel-aware refinement +- **Cost**: High API usage but highest accuracy + +#### **Vector Mode** +- **Strategy**: Pure vector embedding similarity +- **Options**: Splitting modes for multi-panel layouts +- **Cost**: No API costs after embedding generation + +#### **Gemini Mode** +- **Strategy**: Google Gemini 2.5 Pro processing +- **Options**: Batch or one-at-a-time processing +- **Cost**: Lower than OpenAI but higher than vector + +This architecture provides a flexible, scalable system for master image detection with multiple processing strategies optimized for different use cases and cost requirements. \ No newline at end of file diff --git a/memory_manager.py b/memory_manager.py new file mode 100644 index 0000000..1b3aad1 --- /dev/null +++ b/memory_manager.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 +""" +Memory Manager - Prevents memory exhaustion during concurrent processing +""" + +import psutil +import os +import time +import logging +from functools import wraps + +class MemoryManager: + """Memory management utility to prevent system crashes""" + + def __init__(self, max_memory_percent=80, max_swap_percent=80): + """ + Initialize memory manager + + Args: + max_memory_percent: Maximum memory usage percentage before throttling + max_swap_percent: Maximum swap usage percentage before warning (does not throttle) + """ + self.max_memory_percent = max_memory_percent + self.max_swap_percent = max_swap_percent + self.logger = logging.getLogger('master_adapt_detect') + + def get_memory_usage(self): + """Get current memory and swap usage""" + memory = psutil.virtual_memory() + swap = psutil.swap_memory() + + return { + 'memory_percent': memory.percent, + 'memory_available_gb': memory.available / (1024**3), + 'swap_percent': swap.percent, + 'swap_used_gb': swap.used / (1024**3) + } + + def is_memory_safe(self): + """Check if memory usage is within safe limits (only RAM, not swap)""" + usage = self.get_memory_usage() + + # Warn about swap usage but don't block processing + if usage['swap_percent'] > self.max_swap_percent: + self.logger.warning(f"High swap usage: {usage['swap_percent']:.1f}% - Performance may be degraded") + + # Only block processing for high RAM usage + if usage['memory_percent'] > self.max_memory_percent: + self.logger.warning(f"High memory usage: {usage['memory_percent']:.1f}%") + return False + + return True + + def wait_for_memory_safe(self, timeout=30): + """Wait for memory to return to safe levels""" + start_time = time.time() + + while not self.is_memory_safe(): + if time.time() - start_time > timeout: + self.logger.error("Memory did not return to safe levels within timeout") + return False + + self.logger.info("Waiting for memory to return to safe levels...") + time.sleep(1) + + return True + + def limit_concurrent_processes(self, max_processes=None): + """Calculate safe number of concurrent processes based on memory""" + # Always get available memory for logging + available_gb = psutil.virtual_memory().available / (1024**3) + + if max_processes is None: + # Conservative estimate based on available memory + # Assume each process needs ~2GB for feature processing + max_processes = max(1, int(available_gb / 2)) + + cpu_count = psutil.cpu_count() + + # Don't exceed CPU count or memory-based limit + safe_processes = min(max_processes, cpu_count) + + self.logger.info(f"Limiting concurrent processes to {safe_processes} (Memory: {available_gb:.1f}GB available)") + return safe_processes + +def memory_safe_execution(memory_manager): + """Decorator to ensure memory-safe execution of functions""" + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + # Check memory before execution + if not memory_manager.is_memory_safe(): + memory_manager.logger.warning("Memory usage high, waiting before execution...") + if not memory_manager.wait_for_memory_safe(): + raise MemoryError("Memory usage too high to safely execute function") + + try: + return func(*args, **kwargs) + except MemoryError as e: + memory_manager.logger.error(f"Memory error in {func.__name__}: {e}") + raise + finally: + # Force garbage collection + import gc + gc.collect() + + return wrapper + return decorator + +def reduce_feature_count(features, max_features=10000): + """Reduce feature count to prevent memory explosion""" + if len(features) > max_features: + # Keep best features based on response strength + import numpy as np + responses = [f.response for f in features] + indices = np.argsort(responses)[-max_features:] + reduced_features = [features[i] for i in indices] + return reduced_features + return features \ No newline at end of file diff --git a/openai_detector.py b/openai_detector.py new file mode 100644 index 0000000..db3dc36 --- /dev/null +++ b/openai_detector.py @@ -0,0 +1,1605 @@ +#!/usr/bin/env python3 +""" +OpenAI Image Detector - Alternative to Gemini detector +Uses OpenAI o3 model to detect which master images appear in layout images +""" + +import os +import json +import time +import base64 +from pathlib import Path +from typing import List, Dict, Optional +from openai import OpenAI +from dotenv import load_dotenv +from PIL import Image, ImageEnhance +import tempfile +import numpy as np +import pickle +import cv2 +import concurrent.futures +import threading +import uuid +import multiprocessing +from functools import partial +from panel_splitter import PanelSplitter +from cost_calculator import cost_calculator, extract_token_usage_from_response + + +def process_single_master_detection_openai(layout_path, master_id, master_path, enable_greyscale, enable_contrast_enhancement, contrast_factor, api_key): + """ + Standalone function for processing a single master detection using OpenAI in a separate process. + """ + try: + # Import and configure in each process to avoid shared state + import os + import json + import time + import base64 + from pathlib import Path + from PIL import Image, ImageEnhance + from openai import OpenAI + from dotenv import load_dotenv + import uuid + import threading + # Note: cost_calculator import removed from multiprocessing function + + + # Initialize OpenAI client in this process + client = OpenAI(api_key=api_key) + + # Create temp directory for this process + temp_path = Path("temp_processed") + temp_path.mkdir(exist_ok=True) + + def preprocess_image_local(image_path: str) -> str: + """Local preprocessing function for this process""" + if not enable_greyscale and not enable_contrast_enhancement: + return image_path + + try: + with Image.open(image_path) as img: + processed_img = img.copy() + + if enable_greyscale: + processed_img = processed_img.convert('L') + processed_img = processed_img.convert('RGB') + + if enable_contrast_enhancement: + contrast_enhancer = ImageEnhance.Contrast(processed_img) + processed_img = contrast_enhancer.enhance(contrast_factor) + + sharpness_enhancer = ImageEnhance.Sharpness(processed_img) + processed_img = sharpness_enhancer.enhance(1.3) + + # Thread-safe filename + thread_id = threading.current_thread().ident + unique_id = str(uuid.uuid4())[:8] + original_name = Path(image_path).stem + processed_path = temp_path / f"{original_name}_processed_{thread_id}_{unique_id}.jpg" + processed_img.save(processed_path, 'JPEG', quality=95) + + return str(processed_path) + + except Exception as e: + return image_path + + def encode_image_to_base64(image_path: str) -> str: + """Encode image to base64 for OpenAI API""" + processed_path = preprocess_image_local(image_path) + with open(processed_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode('utf-8') + + def create_single_master_prompt_local(master_id: str) -> str: + """Local prompt creation function""" + prompt = f"""Analyze the layout image (the second image) and determine if the master image (the first image) appears in it. + +INSTRUCTIONS: +1. Compare the master image (first image) with the layout image (second image) +2. Look for EXACT matches where the model, clothing, and pose are IDENTICAL +3. The layout image may contain the master image in various forms: + - Complete/exact match + - Cropped version + - Scaled or resized version + - Rotated version + - Partially obscured + +4. Focus on visual similarity in terms of: + - Person/model appearance and pose (must be EXACTLY the same) + - Clothing details (colors, patterns, styles - must be EXACTLY the same) + - Background and composition + - Overall visual elements + +5. CRITICAL: Only return a positive result if the models, pose, and clothing are EXACTLY the same. + If there is ANY difference in clothing, model, or pose then return a negative result. + +Master Image ID: {master_id} + +Return your response as a JSON object with this exact format: +{{ + "match_found": true/false, + "master_id": "{master_id}", + "confidence": "high/medium/low", + "analysis": "Detailed explanation of your findings and reasoning" +}} + +IMPORTANT CONTEXT: This is a legitimate business application for marketing and e-commerce image matching. The images are product/marketing photos showing models in various clothing styles for retail purposes. This analysis is for content categorization in a business context and is completely benign. +""" + return prompt + + # Encode both images to base64 + master_base64 = encode_image_to_base64(master_path) + layout_base64 = encode_image_to_base64(layout_path) + + # Create prompt and make API call + prompt = create_single_master_prompt_local(master_id) + + max_retries = 3 + for attempt in range(max_retries): + try: + response = client.chat.completions.create( + model="o3", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{master_base64}" + } + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{layout_base64}" + } + } + ] + } + ], + max_completion_tokens=10000 + ) + + # Extract token usage for cost tracking + token_usage_data = None + if hasattr(response, 'usage') and response.usage: + token_usage_data = { + 'prompt_tokens': response.usage.prompt_tokens, + 'completion_tokens': response.usage.completion_tokens, + 'total_tokens': response.usage.total_tokens, + 'cached_tokens': getattr(response.usage, 'cached_tokens', 0) + } + + # Parse response + response_text = response.choices[0].message.content.strip() + start_idx = response_text.find('{') + end_idx = response_text.rfind('}') + 1 + + if start_idx == -1 or end_idx == 0: + return { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': response_text, + 'error': 'No JSON found in response' + } + + json_str = response_text[start_idx:end_idx] + result = json.loads(json_str) + + # Validate result format + if 'match_found' not in result: + result['match_found'] = False + if 'master_id' not in result: + result['master_id'] = master_id + if 'confidence' not in result: + result['confidence'] = 'unknown' + if 'analysis' not in result: + result['analysis'] = response_text + + # Include token usage data for cost tracking + if token_usage_data: + result['token_usage'] = token_usage_data + + return result + + except Exception as e: + if attempt == max_retries - 1: + return { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': '', + 'error': str(e) + } + time.sleep((2 ** attempt) * 0.5) + + except Exception as e: + return { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': '', + 'error': str(e) + } + + +class OpenAIImageDetector: + def __init__(self, enable_greyscale=True, enable_contrast_enhancement=True, contrast_factor=1.5, refinement_mode=False, one_at_a_time_mode=False, max_concurrent_workers=5, panel_aware_refinement=False, split_mode=False): + """Initialize the image detector with OpenAI API configuration""" + load_dotenv() + + api_key = os.getenv('OPENAI_API_KEY') + if not api_key: + raise ValueError("OPENAI_API_KEY not found in environment variables") + + self.client = OpenAI(api_key=api_key) + self.api_key = api_key + + # Concurrency settings + self.max_concurrent_workers = max_concurrent_workers + self._progress_lock = threading.Lock() + + print("Initialized OpenAI detector with o3 model.") + + # Image processing settings + self.enable_greyscale = enable_greyscale + self.enable_contrast_enhancement = enable_contrast_enhancement + self.contrast_factor = contrast_factor + self.refinement_mode = refinement_mode + self.one_at_a_time_mode = one_at_a_time_mode + self.panel_aware_refinement = panel_aware_refinement + + # Split mode configuration + self.split_mode = split_mode + if self.split_mode: + self.splitter = PanelSplitter(debug=True) + print("Split mode enabled: Will split multi-panel layouts before matching") + + # Paths + self.master_images_path = Path("master_images") + self.layouts_path = Path("layouts") + self.results_path = Path("results") + self.temp_path = Path("temp_processed") + + # Create directories + self.results_path.mkdir(exist_ok=True) + self.temp_path.mkdir(exist_ok=True) + + # Master images cache + self.master_images = {} + self.master_files = {} + + def load_master_images(self) -> Dict[str, str]: + """Load all master images and create ID mapping using filenames""" + print("Loading master images...") + + master_files = list(self.master_images_path.glob("*.jpg")) + print(f"Found {len(master_files)} master images") + + for file_path in master_files: + # Use filename (without extension) as the master ID + master_id = file_path.stem + self.master_images[master_id] = str(file_path) + self.master_files[master_id] = file_path.name + + return self.master_images + + def match_split_to_masters(self, split_path: str, master_images: List[str]) -> List[Dict]: + """Match a split image to master images using inlier analysis""" + matches = [] + + for master_id in master_images: + if master_id in self.master_images: + master_path = self.master_images[master_id] + + # Use existing inlier analysis + inlier_result = self.calculate_inliers_for_match(split_path, master_path, master_id) + + # Only include matches with reasonable confidence + if inlier_result.get('confidence') in ['high', 'medium']: + matches.append({ + 'master_id': master_id, + 'confidence': inlier_result.get('confidence', 'unknown'), + 'inliers': inlier_result.get('inliers', 0), + 'match_details': inlier_result + }) + + return matches + + def preprocess_image(self, image_path: str) -> str: + """Preprocess image: convert to greyscale and enhance contrast - THREAD-SAFE VERSION""" + if not self.enable_greyscale and not self.enable_contrast_enhancement: + return image_path + + try: + # Open the image + with Image.open(image_path) as img: + processed_img = img.copy() + + # Convert to greyscale if enabled + if self.enable_greyscale: + processed_img = processed_img.convert('L') + # Convert back to RGB for consistency + processed_img = processed_img.convert('RGB') + + # Enhance contrast if enabled + if self.enable_contrast_enhancement: + # Global contrast enhancement + contrast_enhancer = ImageEnhance.Contrast(processed_img) + processed_img = contrast_enhancer.enhance(self.contrast_factor) + + # Edge contrast enhancement using sharpness + sharpness_enhancer = ImageEnhance.Sharpness(processed_img) + processed_img = sharpness_enhancer.enhance(1.3) + + # Save processed image with thread-safe filename + import threading + import uuid + thread_id = threading.current_thread().ident + unique_id = str(uuid.uuid4())[:8] + original_name = Path(image_path).stem + processed_path = self.temp_path / f"{original_name}_processed_{thread_id}_{unique_id}.jpg" + processed_img.save(processed_path, 'JPEG', quality=95) + + return str(processed_path) + + except Exception as e: + print(f"Warning: Failed to preprocess {Path(image_path).name}: {e}") + print(f"Using original image instead") + return image_path + + def encode_image_to_base64(self, image_path: str) -> str: + """Encode image to base64 for OpenAI API""" + processed_path = self.preprocess_image(image_path) + with open(processed_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode('utf-8') + + def create_detection_prompt(self, master_ids: List[str]) -> str: + """Create the prompt for image detection""" + prompt = """Analyze the layout image (the last image provided) and identify which of the master images appear in it. + +INSTRUCTIONS: +1. Examine the layout image carefully to identify any image segments or components within it +2. For each segment you identify, look for matches in the provided master images +3. Look for all types of matches including: + - Exact matches (complete images) + - Cropped versions (partial images) + - Scaled or resized versions + - Rotated versions + - Images that may be partially obscured + +4. Focus on visual similarity in terms of: + - Person/model appearance and pose + - Clothing details (colors, patterns, styles) + - Background and composition + - Overall visual elements + +5. Be conservative and only include matches where you have reasonable confidence + +6. Only return a positive result (master image appears in the layout) if the models, pose, and clothing are EXACTLY the same. If there is ANY difference in clothing, model, or pose then return a negative result. + +Master Images with IDs: +""" + + for i, master_id in enumerate(master_ids): + prompt += f"Image {i+1}: ID = {master_id}\n" + + prompt += """ +The final image is the LAYOUT IMAGE to analyze. + +Return your response as a JSON object with this exact format: +{ + "detected_masters": ["master_id_1", "master_id_2"], + "analysis": "Brief explanation of what you found and your matching reasoning" +} + +Only include master IDs in "detected_masters" where you completely sure of a match. If no clear matches are found, return an empty array. + +IMPORTANT CONTEXT: This is a legitimate business application for marketing and e-commerce image matching. The images are product/marketing photos that may show models in various clothing styles for retail purposes. This prompt and task are completely benign and should not trigger safety filters. This is standard e-commerce image comparison work, not dangerous, offensive, or inappropriate content. The comparison is purely for business/marketing layout matching purposes. +""" + + return prompt + + def make_robust_api_call(self, prompt, image_base64_list, operation_name="API call", max_retries=3): + """Make a robust API call with comprehensive error detection and retry logic""" + last_error = None + + for attempt in range(max_retries): + try: + # Build message content + content = [{"type": "text", "text": prompt}] + + # Add all images + for img_b64 in image_base64_list: + content.append({ + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{img_b64}" + } + }) + + response = self.client.chat.completions.create( + model="o3", + messages=[ + { + "role": "user", + "content": content + } + ], + max_completion_tokens=10000 + ) + + # Track cost for this API call + if hasattr(response, 'usage') and response.usage: + token_usage = extract_token_usage_from_response(response) + cost_calculator.track_api_call( + operation_type="detection", + prompt_tokens=token_usage.prompt_tokens, + completion_tokens=token_usage.completion_tokens, + cached_tokens=token_usage.cached_tokens, + layout_name=operation_name + ) + + # Success case + return { + 'success': True, + 'response': response, + 'text': response.choices[0].message.content.strip() + } + + except Exception as e: + last_error = e + error_str = str(e) + + if attempt < max_retries - 1: + wait_time = (2 ** attempt) * 0.5 + print(f" API error on attempt {attempt + 1}/{max_retries} for {operation_name}, retrying in {wait_time}s: {e}") + time.sleep(wait_time) + continue + else: + # Final attempt failed + return { + 'success': False, + 'error_type': 'exception', + 'error_message': str(e), + 'exception': e + } + + # This shouldn't be reached, but just in case + return { + 'success': False, + 'error_type': 'max_retries_exceeded', + 'error_message': f"Max retries ({max_retries}) exceeded", + 'last_error': str(last_error) if last_error else "Unknown error" + } + + def detect_images_in_layout(self, layout_path: str, layout_index: int, total_layouts: int) -> Dict: + """Detect which master images appear in a single layout image""" + layout_name = Path(layout_path).name + print(f"Processing {layout_index}/{total_layouts}: {layout_name}") + + try: + # Encode all master images and the layout image + master_ids = list(self.master_images.keys()) + image_base64_list = [] + + # Add master images + for master_id in master_ids: + master_path = self.master_images[master_id] + master_b64 = self.encode_image_to_base64(master_path) + image_base64_list.append(master_b64) + + # Add layout image + layout_b64 = self.encode_image_to_base64(layout_path) + image_base64_list.append(layout_b64) + + # Create prompt + prompt = self.create_detection_prompt(master_ids) + + # Make API call + api_result = self.make_robust_api_call(prompt, image_base64_list, f"detection for {layout_name}") + + # Handle API call failure + if not api_result['success']: + error_msg = api_result['error_message'] + print(f"API call failed for {layout_name}: {error_msg}") + return { + 'detected_masters': [], + 'analysis': f'API call failed: {error_msg}', + 'error': f"{api_result['error_type']}: {error_msg}", + 'retry_count': 3 # Max retries were attempted + } + + # Parse response + response_text = api_result['text'] + + # Extract JSON from response + try: + start_idx = response_text.find('{') + end_idx = response_text.rfind('}') + 1 + + if start_idx == -1 or end_idx == 0: + raise ValueError("No JSON found in response") + + json_str = response_text[start_idx:end_idx] + result = json.loads(json_str) + + # Validate result format + if 'detected_masters' not in result: + result['detected_masters'] = [] + if 'analysis' not in result: + result['analysis'] = response_text + + # Deduplicate detected masters + original_detected = result['detected_masters'][:] + result['detected_masters'] = self.deduplicate_master_matches(result['detected_masters']) + + # Track deduplication if any duplicates were removed + if len(result['detected_masters']) != len(original_detected): + duplicates_removed = len(original_detected) - len(result['detected_masters']) + result['deduplication_applied'] = True + result['duplicates_removed'] = duplicates_removed + result['original_detected_masters'] = original_detected + print(f" Deduplication: Removed {duplicates_removed} duplicate master(s) from {layout_name}") + + # Log completion + detected_count = len(result['detected_masters']) + print(f"✓ Completed {layout_name} - Found {detected_count} matches") + + return result + + except json.JSONDecodeError as e: + print(f"JSON decode error for {layout_name}: {e}") + return { + 'detected_masters': [], + 'analysis': response_text, + 'error': f'JSON decode error: {e}' + } + + except Exception as e: + error_msg = f"Error analyzing {layout_name}: {e}" + print(error_msg) + return { + 'detected_masters': [], + 'analysis': '', + 'error': str(e) + } + + def detect_images_in_layout_one_at_a_time(self, layout_path: str, layout_index: int, total_layouts: int, stored_censorship_data=None) -> Dict: + """Detect which master images appear in a layout by checking each master individually using process-based concurrency""" + layout_name = Path(layout_path).name + print(f"Processing {layout_index}/{total_layouts}: {layout_name} (Process-based one-at-a-time mode)") + + master_ids = list(self.master_images.keys()) + total_masters = len(master_ids) + detected_masters = [] + detailed_results = [] + + print(f" Checking {total_masters} masters using {self.max_concurrent_workers} concurrent processes...") + + # Prepare arguments for process pool + tasks = [] + for master_id in master_ids: + master_path = self.master_images[master_id] + task_args = ( + layout_path, + master_id, + master_path, + self.enable_greyscale, + self.enable_contrast_enhancement, + self.contrast_factor, + self.api_key + ) + tasks.append(task_args) + + # Use ProcessPoolExecutor for true isolation + with concurrent.futures.ProcessPoolExecutor(max_workers=self.max_concurrent_workers) as executor: + # Submit all tasks + future_to_master = { + executor.submit(process_single_master_detection_openai, *task_args): task_args[1] + for task_args in tasks + } + + completed_count = 0 + # Collect results as they complete + for future in concurrent.futures.as_completed(future_to_master): + master_id = future_to_master[future] + completed_count += 1 + + try: + result = future.result() + detailed_results.append(result) + + # Track cost for this API call if token usage data is available + if 'token_usage' in result and result['token_usage']: + token_data = result['token_usage'] + api_call_cost = cost_calculator.track_api_call( + operation_type="one_at_a_time_detection", + prompt_tokens=token_data['prompt_tokens'], + completion_tokens=token_data['completion_tokens'], + cached_tokens=token_data['cached_tokens'], + layout_name=layout_name, + master_id=master_id + ) + + # Show cost tracking progress every 10 completed masters + if cost_calculator.enable_tracking and completed_count % 10 == 0: + print(f" → API call cost: ${api_call_cost.total_cost:.4f} (Running total: ${cost_calculator.total_cost:.4f})") + elif cost_calculator.enable_tracking: + print(f" → Warning: No token usage data available for {master_id}") + + # If match found, add to detected masters + if result.get('match_found', False): + detected_masters.append(master_id) + confidence = result.get('confidence', 'unknown') + print(f" {completed_count}/{total_masters}: ✓ MATCH found for {master_id} (confidence: {confidence})") + else: + if 'error' in result: + print(f" {completed_count}/{total_masters}: Error checking {master_id}: {result['error']}") + else: + print(f" {completed_count}/{total_masters}: No match for {master_id}") + + except Exception as e: + print(f" {completed_count}/{total_masters}: Process error checking {master_id}: {e}") + # Add error result to maintain consistency + error_result = { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': '', + 'error': str(e) + } + detailed_results.append(error_result) + + # Sort detailed_results by master_id to maintain consistent ordering + detailed_results.sort(key=lambda x: x.get('master_id', '')) + + # Deduplicate detected masters (shouldn't be needed in one-at-a-time mode, but for safety) + original_detected = detected_masters[:] + detected_masters = self.deduplicate_master_matches(detected_masters) + + if len(detected_masters) != len(original_detected): + duplicates_removed = len(original_detected) - len(detected_masters) + print(f" Deduplication: Removed {duplicates_removed} duplicate master(s)") + + detected_count = len(detected_masters) + print(f"✓ Completed {layout_name} - Found {detected_count} matches using {self.max_concurrent_workers} concurrent processes") + + final_result = { + 'detected_masters': detected_masters, + 'detected_master_ids': detected_masters, + 'detected_master_filenames': [f"{mid}.jpg" for mid in detected_masters], + 'analysis': f'Process-based one-at-a-time analysis completed. Made {total_masters} separate API calls (one per master). Found {detected_count} exact matches out of {total_masters} masters checked using {self.max_concurrent_workers} concurrent processes.', + 'detailed_results': detailed_results, + 'processing_mode': 'process_based_one_at_a_time', + 'total_masters_checked': total_masters, + 'concurrent_workers': self.max_concurrent_workers, + 'api_calls_made': total_masters, # One API call per master + 'deduplication_applied': len(detected_masters) != len(original_detected), + 'duplicates_removed': len(original_detected) - len(detected_masters) if len(detected_masters) != len(original_detected) else 0, + 'original_detected_masters': original_detected + } + + # STEP 1: Apply CEN refinement first if enabled and we have CEN matches + current_masters = detected_masters + if self.refinement_mode and current_masters: + cen_images = [mid for mid in current_masters if self.is_cen_image(mid)] + if cen_images: + print(f" Applying CEN refinement for {layout_name} (Step 1/2)...") + cen_result = self.apply_cen_refinement_to_results(layout_path, final_result, stored_censorship_data) + current_masters = cen_result.get('detected_masters', current_masters) + + # Update final result with CEN refinement information + final_result.update(cen_result) + + cen_count = len(current_masters) + print(f"✓ CEN refinement completed for {layout_name} - Result: {cen_count} masters") + + # STEP 2: Apply panel-aware refinement if enabled and we have detected masters + if self.panel_aware_refinement and current_masters: + step_label = "Step 2/2" if self.refinement_mode else "Step 1/1" + print(f" Applying panel-aware refinement for {layout_name} ({step_label})...") + + # Count panels in the layout + panel_result = self.count_panels_in_layout(layout_path) + panel_count = panel_result.get('panel_count', 1) + panel_confidence = panel_result.get('confidence', 'unknown') + + print(f" Panel analysis: {panel_count} panels detected (confidence: {panel_confidence})") + + # Refine matches based on panel count using current masters (after CEN refinement) + refinement_result = self.refine_matches_by_panel_count(layout_path, current_masters, panel_count) + + # Update final result with panel-aware refinement information + final_result['detected_masters'] = refinement_result['refined_masters'] + final_result['detected_master_ids'] = refinement_result['refined_masters'] + final_result['detected_master_filenames'] = [f"{mid}.jpg" for mid in refinement_result['refined_masters']] + final_result['panel_aware_refinement_applied'] = True + final_result['panel_count_analysis'] = panel_result + final_result['panel_refinement_details'] = refinement_result + + # Update analysis text + if refinement_result['refinement_applied']: + panel_desc = f"Panel-aware refinement applied: reduced from {refinement_result['original_count']} to {refinement_result['final_count']} masters based on {panel_count} detected panels." + else: + panel_desc = f"Panel-aware refinement skipped: {refinement_result['reason']}." + + final_result['analysis'] += f" {panel_desc}" + + final_detected_count = len(refinement_result['refined_masters']) + print(f"✓ Panel-aware refinement completed for {layout_name} - Final result: {final_detected_count} masters") + + return final_result + + def process_all_layouts(self, limit: Optional[int] = None, specific_file: Optional[str] = None) -> Dict: + """Process all layout images sequentially""" + if self.one_at_a_time_mode: + mode_desc = "OpenAI One-at-a-time Mode" + else: + mode_desc = "OpenAI Multi Master Mode" + + print(f"Starting sequential batch processing ({mode_desc})...") + + # Load master images + self.load_master_images() + + # Get layout files + if specific_file: + # Process only the specific file + layout_files = [self.layouts_path / specific_file] + if not layout_files[0].exists(): + raise FileNotFoundError(f"Layout file {specific_file} not found in {self.layouts_path}") + print(f"Processing specific file: {specific_file}") + else: + layout_files = list(self.layouts_path.glob("*.jpg")) + + if limit: + layout_files = layout_files[:limit] + print(f"Processing first {limit} layouts only") + + total_layouts = len(layout_files) + print(f"Processing {total_layouts} layout images in {mode_desc}") + print("=" * 60) + + results = {} + start_time = time.time() + + for i, layout_path in enumerate(layout_files, 1): + layout_id = layout_path.stem + + # Detect images in layout using the appropriate method + if self.split_mode: + # Split mode: split layout into panels and match each panel + master_ids = list(self.master_images.keys()) + result = self.splitter.split_layout_and_match(str(layout_path), master_ids, self) + + # Apply CEN refinement if enabled and there are matches + if self.refinement_mode and result.get('detected_masters'): + result = self.apply_cen_refinement_to_results(str(layout_path), result) + elif self.one_at_a_time_mode: + # One-at-a-time mode handles both CEN and panel-aware refinement internally + result = self.detect_images_in_layout_one_at_a_time(str(layout_path), i, total_layouts) + else: + # Multi-master mode only supports CEN refinement (not panel-aware) + result = self.detect_images_in_layout(str(layout_path), i, total_layouts) + + # Apply CEN refinement if enabled and there are CEN matches + if self.refinement_mode and result.get('detected_masters'): + result = self.apply_cen_refinement_to_results(str(layout_path), result) + + layout_result = { + 'layout_filename': layout_path.name, + 'detected_master_ids': result['detected_masters'], + 'detected_master_filenames': [f"{mid}.jpg" for mid in result['detected_masters']], + 'analysis': result.get('analysis', 'Split mode analysis'), + 'detection_mode': mode_desc.lower().replace(' ', '_').replace('with_', '') + } + + # Add split mode specific fields + if self.split_mode: + layout_result['split_mode'] = True + layout_result['splits_generated'] = result.get('splits_generated', 0) + layout_result['panel_count'] = result.get('panel_count', 1) + layout_result['panel_confidence'] = result.get('panel_confidence', 'unknown') + if 'split_results' in result: + layout_result['split_results'] = result['split_results'] + + # Add deduplication fields if applied + if 'deduplication_applied' in result: + layout_result['deduplication_applied'] = result['deduplication_applied'] + layout_result['duplicates_removed'] = result['duplicates_removed'] + layout_result['original_detected_masters'] = result['original_detected_masters'] + + if 'error' in result: + layout_result['error'] = result['error'] + + # Add refinement mode specific fields + if self.refinement_mode and result.get('refinement_applied'): + layout_result['refinement_applied'] = result['refinement_applied'] + layout_result['refinement_details'] = result['refinement_details'] + layout_result['censorship_analysis'] = result['censorship_analysis'] + layout_result['original_detection_count'] = result['original_detection_count'] + layout_result['refined_detection_count'] = result['refined_detection_count'] + layout_result['changes_made'] = result.get('changes_made', 0) + + results[layout_id] = layout_result + + # Progress update with time estimate + elapsed = time.time() - start_time + avg_time = elapsed / i + remaining = (total_layouts - i) * avg_time + + print(f"Progress: {i}/{total_layouts} ({i/total_layouts*100:.1f}%) - Est. remaining: {remaining/60:.1f} min") + + # Save progress periodically + if i % 20 == 0: + self.save_results(results, f"openai_progress_{i}") + + total_time = time.time() - start_time + print(f"\n✓ Completed processing all {total_layouts} layouts in {total_time/60:.1f} minutes") + print(f"Average time per layout: {total_time/total_layouts:.1f} seconds") + return results + + def save_results(self, results: Dict, filename: str = "openai_detection_results") -> str: + """Save results to JSON file""" + output_path = self.results_path / f"{filename}.json" + + # Add metadata + output_data = { + 'metadata': { + 'total_layouts_processed': len(results), + 'total_master_images': len(self.master_images), + 'master_images_available': list(self.master_files.keys()), + 'provider': 'openai', + 'model': 'o3' + }, + 'results': results + } + + with open(output_path, 'w') as f: + json.dump(output_data, f, indent=2) + + print(f"Results saved to: {output_path}") + return str(output_path) + + def generate_summary(self, results: Dict) -> Dict: + """Generate summary statistics""" + total_layouts = len(results) + layouts_with_matches = sum(1 for r in results.values() if r['detected_master_ids']) + + # Count master image occurrences + master_counts = {} + for result in results.values(): + for master_id in result['detected_master_ids']: + master_counts[master_id] = master_counts.get(master_id, 0) + 1 + + # Deduplication statistics + layouts_with_deduplication = sum(1 for r in results.values() if r.get('deduplication_applied', False)) + total_duplicates_removed = sum(r.get('duplicates_removed', 0) for r in results.values()) + + summary = { + 'total_layouts_processed': total_layouts, + 'layouts_with_matches': layouts_with_matches, + 'layouts_without_matches': total_layouts - layouts_with_matches, + 'master_image_usage': master_counts, + 'most_used_masters': sorted(master_counts.items(), key=lambda x: x[1], reverse=True)[:10], + # Deduplication stats + 'layouts_with_deduplication': layouts_with_deduplication, + 'total_duplicates_removed': total_duplicates_removed, + 'deduplication_rate': round(layouts_with_deduplication / total_layouts * 100, 1) if total_layouts > 0 else 0, + 'provider': 'openai', + 'model': 'o3' + } + + return summary + + def deduplicate_master_matches(self, detected_masters: List[str]) -> List[str]: + """Remove duplicate master matches from a list while preserving order""" + if not detected_masters: + return detected_masters + + # Simple deduplication - remove exact duplicates while preserving order + seen = set() + deduplicated = [] + + for master_id in detected_masters: + if master_id not in seen: + seen.add(master_id) + deduplicated.append(master_id) + + return deduplicated + + def cleanup_temp_files(self): + """Clean up temporary processed image files - handles thread-safe filenames""" + try: + if self.temp_path.exists(): + # Clean up both old and new thread-safe naming patterns + for temp_file in self.temp_path.glob("*_processed*.jpg"): + temp_file.unlink() + # Remove temp directory if empty + if not any(self.temp_path.iterdir()): + self.temp_path.rmdir() + except Exception as e: + print(f"Warning: Failed to cleanup temp files: {e}") + + def is_cen_image(self, master_id: str) -> bool: + """Check if a master image ID represents a CEN (censored) image""" + return '_CEN' in master_id + + def find_corresponding_non_cen_image(self, cen_master_id: str) -> Optional[str]: + """Find the corresponding non-CEN image for a given CEN master ID""" + if not self.is_cen_image(cen_master_id): + return None + + # Transform CEN filename to non-CEN filename + # Example: "1011A_1011A_1011_01_CEN" -> "1011A_1011_01" + parts = cen_master_id.split('_') + if len(parts) >= 4 and parts[-1] == 'CEN': + # Remove the middle duplicate part and _CEN suffix + # Pattern: prefix_prefix_middle_suffix_CEN -> prefix_middle_suffix + if len(parts) >= 5: + non_cen_id = f"{parts[0]}_{parts[2]}_{parts[3]}" + else: + # Fallback: just remove _CEN + non_cen_id = '_'.join(parts[:-1]) + + # Check if this non-CEN image exists in our master images + if non_cen_id in self.master_images: + return non_cen_id + + return None + + def create_censorship_detection_prompt(self) -> str: + """Create prompt for detecting if a layout image contains censored content""" + prompt = """Analyze this layout image to determine if it contains censored or uncensored content. + +TASK: Determine whether the images in this layout are censored (covered) or uncensored (more exposed). + +CENSORSHIP INDICATORS TO LOOK FOR: +1. **Clothing Coverage**: + - Long sleeves vs. sleeveless/short sleeves + - Full-length pants/skirts vs. shorts or shorter garments + - High necklines vs. lower necklines + +2. **Skin Coverage**: + - Arms: Fully covered vs. bare arms + - Legs: Fully covered vs. exposed legs/thighs + - Torso: Additional covering vs. more exposed areas + +3. **Added Elements**: + - Opaque or semi-transparent overlay layers covering skin + - Additional fabric or clothing elements that appear to cover exposed areas + - Digital modifications that add coverage + +CLASSIFICATION: +- **CENSORED**: If models show significant additional clothing coverage, long sleeves, full pants/skirts, or digital overlays covering skin +- **UNCENSORED**: If models show more exposed skin, shorter garments, bare arms/legs, or natural clothing without added coverage + +Return your response as a JSON object with this exact format: +{{ + "is_censored": true/false, + "confidence": "high/medium/low", + "analysis": "Detailed explanation of the coverage patterns observed and reasoning for the classification", + "coverage_details": "Specific description of clothing and skin coverage in the layout" +}} + +Be precise and focus on the actual clothing and coverage patterns visible in the image. + +IMPORTANT CONTEXT: This is a legitimate business application for marketing and e-commerce image classification. The images are product/marketing photos showing models in various clothing styles for retail purposes. This analysis is for content categorization in a business context and is completely benign. +""" + return prompt + + def detect_layout_censorship(self, layout_path: str) -> Dict: + """Detect if a layout image contains censored or uncensored content""" + try: + print(f" → Analyzing layout image with OpenAI o3 model...") + + # Process the layout image + processed_layout_path = self.preprocess_image(layout_path) + + # Encode image to base64 + with open(processed_layout_path, "rb") as image_file: + base64_image = base64.b64encode(image_file.read()).decode('utf-8') + + # Create censorship detection prompt + prompt = self.create_censorship_detection_prompt() + + # Make API call to OpenAI + print(f" → Making API call to OpenAI o3 for censorship analysis...") + response = self.client.chat.completions.create( + model="o3", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_image}", + "detail": "high" + } + } + ] + } + ], + max_completion_tokens=10000 + ) + + # Track cost for this API call + if hasattr(response, 'usage') and response.usage: + token_usage = extract_token_usage_from_response(response) + cost_calculator.track_api_call( + operation_type="censorship_detection", + prompt_tokens=token_usage.prompt_tokens, + completion_tokens=token_usage.completion_tokens, + cached_tokens=token_usage.cached_tokens, + layout_name=Path(layout_path).name + ) + + response_text = response.choices[0].message.content + print(f" → Received response from OpenAI o3") + + # Extract JSON from response + try: + start_idx = response_text.find('{') + end_idx = response_text.rfind('}') + 1 + + if start_idx == -1 or end_idx == 0: + raise ValueError("No JSON found in response") + + json_str = response_text[start_idx:end_idx] + result = json.loads(json_str) + + # Validate result format + if 'is_censored' not in result: + result['is_censored'] = True # Default to censored if unclear + if 'confidence' not in result: + result['confidence'] = 'unknown' + if 'analysis' not in result: + result['analysis'] = response_text + + print(f" → OpenAI analysis successful: {result.get('is_censored')} (confidence: {result.get('confidence')})") + return result + + except json.JSONDecodeError as e: + print(f" → JSON parsing failed: {e}") + print(f" → Raw response: {response_text[:200]}...") + return { + 'is_censored': True, # Default to censored if parsing fails + 'confidence': 'unknown', + 'analysis': response_text, + 'error': f'JSON decode error: {e}' + } + + except Exception as e: + print(f" → Error in censorship detection: {e}") + return { + 'is_censored': True, # Default to censored if error + 'confidence': 'unknown', + 'analysis': '', + 'error': str(e) + } + + def apply_cen_refinement_to_results(self, layout_path: str, initial_results: Dict, stored_censorship_data=None) -> Dict: + """Apply CEN refinement to initial detection results""" + layout_name = Path(layout_path).name + detected_masters = initial_results.get('detected_masters', []) + + # First, deduplicate the detected masters to avoid processing duplicates + original_count = len(detected_masters) + detected_masters = self.deduplicate_master_matches(detected_masters) + if len(detected_masters) != original_count: + duplicates_removed = original_count - len(detected_masters) + print(f" Removed {duplicates_removed} duplicate master(s) before CEN refinement") + + # Find CEN images in the results + cen_images = [mid for mid in detected_masters if self.is_cen_image(mid)] + + if not cen_images: + # No CEN images found, return original results + return initial_results + + print(f" Refining {len(cen_images)} CEN matches for {layout_name}") + + # Use stored censorship data if provided, otherwise make API call + if stored_censorship_data: + is_layout_censored = stored_censorship_data.get('is_censored', True) + confidence = stored_censorship_data.get('confidence', 'unknown') + print(f" Using stored censorship analysis: {'CENSORED' if is_layout_censored else 'UNCENSORED'} (confidence: {confidence})") + # Create censorship_result from stored data for consistency + censorship_result = { + 'is_censored': is_layout_censored, + 'confidence': confidence, + 'analysis': stored_censorship_data.get('analysis', ''), + 'coverage_details': stored_censorship_data.get('coverage_details', '') + } + else: + print(f" Analyzing layout to determine censorship level...") + # Detect if the layout is censored or uncensored + censorship_result = self.detect_layout_censorship(layout_path) + is_layout_censored = censorship_result.get('is_censored', True) + confidence = censorship_result.get('confidence', 'unknown') + print(f" Layout analysis: {'CENSORED' if is_layout_censored else 'UNCENSORED'} (confidence: {confidence})") + + refined_masters = [] + refinement_details = [] + changes_made = 0 + + # Process each detected image + for master_id in detected_masters: + if self.is_cen_image(master_id): + # This is a CEN image + non_cen_id = self.find_corresponding_non_cen_image(master_id) + + if not is_layout_censored and non_cen_id: + # Layout is uncensored, switch to non-CEN version + refined_masters.append(non_cen_id) + refinement_details.append({ + 'original_cen_match': master_id, + 'non_cen_alternative': non_cen_id, + 'final_choice': non_cen_id, + 'confidence': confidence, + 'analysis': f"Layout determined to be uncensored, switched from {master_id} to {non_cen_id}", + 'changed': True, + 'reason': 'layout_uncensored' + }) + changes_made += 1 + print(f" → Changed {master_id} to {non_cen_id} (layout is uncensored)") + else: + # Layout is censored or no non-CEN alternative, keep CEN version + refined_masters.append(master_id) + reason = 'layout_censored' if is_layout_censored else 'no_non_cen_alternative' + refinement_details.append({ + 'original_cen_match': master_id, + 'non_cen_alternative': non_cen_id, + 'final_choice': master_id, + 'confidence': confidence, + 'analysis': f"Kept {master_id} - layout is censored or no non-CEN alternative available", + 'changed': False, + 'reason': reason + }) + print(f" → Kept {master_id} ({'layout is censored' if is_layout_censored else 'no non-CEN alternative'})") + else: + # This is not a CEN image, keep it as-is + refined_masters.append(master_id) + + print(f" Summary: {changes_made} CEN images changed to non-CEN versions") + + # Apply deduplication to refined masters in case refinement introduced duplicates + original_refined = refined_masters[:] + refined_masters = self.deduplicate_master_matches(refined_masters) + + if len(refined_masters) != len(original_refined): + post_refinement_duplicates = len(original_refined) - len(refined_masters) + print(f" Post-refinement deduplication: Removed {post_refinement_duplicates} duplicate(s)") + + # Update results with refinement information + refined_results = initial_results.copy() + refined_results['detected_masters'] = refined_masters + refined_results['detected_master_ids'] = refined_masters # Update both fields for consistency + refined_results['detected_master_filenames'] = [f"{mid}.jpg" for mid in refined_masters] + refined_results['refinement_applied'] = True + refined_results['refinement_details'] = refinement_details + refined_results['censorship_analysis'] = censorship_result + refined_results['original_detection_count'] = len(detected_masters) + refined_results['refined_detection_count'] = len(refined_masters) + refined_results['changes_made'] = changes_made + + return refined_results + + def count_panels_and_detect_censorship(self, layout_path: str) -> Dict: + """Count panels and detect censorship in a layout image using OpenAI o3 in a single call""" + layout_name = Path(layout_path).name + + try: + print(f" → Analyzing panels and censorship in {layout_name} using OpenAI o3...") + + # Encode layout image to base64 + layout_b64 = self.encode_image_to_base64(layout_path) + + # Create combined prompt for panel counting and censorship detection + prompt = """SYSTEM +You are a vision‑language expert hired to (a) count discrete image panels in fashion‑layout collages and +(b) flag any content requiring censorship review. +Follow every instruction exactly. Think first, then answer. + +──────────────────────────────────────── +TASK 1 – PANEL COUNTING +──────────────────────────────────────── +❶ INTERNAL THINKING (keep private – do NOT reveal in final JSON) + • Load the entire image at native resolution. + • Scan left→right looking for vertical "gutters": ≥2 px columns whose pixel variance ≈ background (usually white/grey). Treat each continuous non‑gutter block as a candidate panel. + • Merge blocks if they depict the same photo merely split by design elements (logo strip, overlay text, drop shadow) – panels must contain *distinct* photographic content. + • If a wide candidate clearly contains multiple, non‑overlapping photos with no visual gutter (e.g., triptych glued together) **count each sub‑photo**; otherwise treat the whole block as one panel. + • Ignore duplicate imagery: identical crop, mirror, slight colour shift, size change ⇒ count once. + • Keep a running list: ⟨panel # , x‑start , x‑end , short human description⟩. + +❷ AFTER thinking, produce: + { + "panel_count": , + "panel_confidence": "high" | "medium" | "low", + "panel_analysis": "", + "panel_descriptions": ["", … ""] + } + +──────────────────────────────────────── +TASK 2 – CENSORSHIP SCREEN +──────────────────────────────────────── +For every panel, decide whether it might violate standard fashion‑industry ad rules +(fully nude, explicit sexuality, hate imagery, illegal acts). +Add a sibling field: + + "censorship_flags": ["clean", "clean", …] # length == panel_count + • "clean" – appears compliant + • "review‑nudity", "review‑sexual", "review‑violence", "review‑other" + +──────────────────────────────────────── +OUTPUT FORMAT (exactly, no extra keys, no Markdown) +──────────────────────────────────────── +{ "panel_count": , "panel_confidence": "high/medium/low", "panel_analysis": "…", + "panel_descriptions": […], "censorship_flags": […], "is_censored": true/false, + "censorship_confidence": "high/medium/low", "censorship_analysis": "…" } + +──────────────────────────────────────── +💡 WORKED EXAMPLE — image: "H&M Spring campaign collage" +(This is for your reference; remove in production runs.) + +INTERNAL THINK (abridged) +• Detected 17 vertical low‑variance gutters ⇒ 16 content blocks. +• Verified no duplicate crops; two blocks are composites but count as 1 each because photos overlap with no gutter. +• No NSFW elements (fashion poses, fully clothed). + +PUBLIC OUTPUT +{ + "panel_count": 16, + "panel_confidence": "high", + "panel_analysis": "Identified 16 distinct image tiles separated by visible white gutters; two wide tiles are multi‑photo composites but have no gutters so each treated as one panel. All panels show fully clothed fashion models.", + "panel_descriptions": [ + "Two female models in brown gown & cream slip, 'SPRING' text", + "Solo model in black oversized coat + brown skirt, red H&M logo", + "Full‑body shot: peach maxi dress with tote bag", + "Full‑body shot: brown coat, black boots", + "Composite: three models in brown/peach plus two in cream suits, 'SPRING' overlay", + "Two female models leaning, matching cream flared suits, red H&M", + "Close‑up portrait of two women, heads touching", + "Two women embracing, neutral slip & cream jacket", + "Model in black leather jacket & white shorts, 'SPRING' text", + "Model in black bomber jacket & white shorts", + "Model in cream embellished cardigan & flared trousers, red H&M", + "Seated model in oversized white shirt", + "Two models in white outfits, playful pose", + "Wide triptych: (a) two models white/yellow mini + 'SPRING', (b) B&W shirt pose, red H&M, (c) close‑up couple", + "Composite: left pair in cream tunics, right pair trench + black mini, 'SPRING'", + "Two models – tan trench & black dress – red H&M logo" + ], + "censorship_flags": [ + "clean","clean","clean","clean", + "clean","clean","clean","clean", + "clean","clean","clean","clean", + "clean","clean","clean","clean" + ], + "is_censored": false, + "censorship_confidence": "high", + "censorship_analysis": "All panels show fully clothed fashion models with appropriate coverage for retail advertising" +} +END OF EXAMPLE""" + + # Make API call + max_retries = 3 + for attempt in range(max_retries): + try: + response = self.client.chat.completions.create( + model="o3", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{layout_b64}", + "detail": "high" + } + } + ] + } + ], + max_completion_tokens=10000 + ) + + # Track cost for this API call + if hasattr(response, 'usage') and response.usage: + token_usage = extract_token_usage_from_response(response) + cost_calculator.track_api_call( + operation_type="panel_counting_censorship", + prompt_tokens=token_usage.prompt_tokens, + completion_tokens=token_usage.completion_tokens, + cached_tokens=token_usage.cached_tokens, + layout_name=layout_name + ) + + response_text = response.choices[0].message.content.strip() + print(f" → Received combined analysis response from OpenAI o3") + + # Extract JSON from response + try: + start_idx = response_text.find('{') + end_idx = response_text.rfind('}') + 1 + + if start_idx == -1 or end_idx == 0: + raise ValueError("No JSON found in response") + + json_str = response_text[start_idx:end_idx] + result = json.loads(json_str) + + # Validate and normalize panel count fields + if 'panel_count' not in result: + result['panel_count'] = 1 # Default to single panel + if 'panel_confidence' not in result: + result['panel_confidence'] = 'unknown' + if 'panel_analysis' not in result: + result['panel_analysis'] = response_text + + # Validate and normalize censorship fields + if 'is_censored' not in result: + result['is_censored'] = True # Default to censored if unclear + if 'censorship_confidence' not in result: + result['censorship_confidence'] = 'unknown' + if 'censorship_analysis' not in result: + result['censorship_analysis'] = response_text + + # Ensure panel_count is a positive integer + try: + result['panel_count'] = max(1, int(result['panel_count'])) + except (ValueError, TypeError): + result['panel_count'] = 1 + + # Ensure is_censored is a boolean + if isinstance(result['is_censored'], str): + result['is_censored'] = result['is_censored'].lower() in ['true', '1', 'yes'] + + print(f" → Combined analysis successful: {result['panel_count']} panels (confidence: {result.get('panel_confidence')}), censored: {result['is_censored']} (confidence: {result.get('censorship_confidence')})") + return result + + except json.JSONDecodeError as e: + print(f" → JSON parsing failed: {e}") + if attempt == max_retries - 1: + return { + 'panel_count': 1, # Default to single panel + 'panel_confidence': 'unknown', + 'panel_analysis': response_text, + 'is_censored': True, # Default to censored + 'censorship_confidence': 'unknown', + 'censorship_analysis': response_text, + 'error': f'JSON decode error: {e}' + } + + except Exception as e: + if attempt == max_retries - 1: + print(f" → Error in combined analysis: {e}") + return { + 'panel_count': 1, # Default to single panel + 'panel_confidence': 'unknown', + 'panel_analysis': '', + 'is_censored': True, # Default to censored + 'censorship_confidence': 'unknown', + 'censorship_analysis': '', + 'error': str(e) + } + time.sleep((2 ** attempt) * 0.5) + + except Exception as e: + print(f" → Error in combined analysis: {e}") + return { + 'panel_count': 1, # Default to single panel + 'panel_confidence': 'unknown', + 'panel_analysis': '', + 'is_censored': True, # Default to censored + 'censorship_confidence': 'unknown', + 'censorship_analysis': '', + 'error': str(e) + } + + def count_panels_in_layout(self, layout_path: str) -> Dict: + """Legacy compatibility method for panel counting only""" + combined_result = self.count_panels_and_detect_censorship(layout_path) + + # Convert to old format for backward compatibility + return { + 'panel_count': combined_result.get('panel_count', 1), + 'confidence': combined_result.get('panel_confidence', 'unknown'), + 'analysis': combined_result.get('panel_analysis', ''), + 'panel_descriptions': combined_result.get('panel_descriptions', []), + 'error': combined_result.get('error', None) + } + + def calculate_inliers_for_match(self, layout_path: str, master_path: str, master_id: str) -> Dict: + """Calculate inlier count for a master image match using OpenCV feature matching""" + try: + # Read images in grayscale for feature detection + layout_img = cv2.imread(layout_path, cv2.IMREAD_GRAYSCALE) + master_img = cv2.imread(master_path, cv2.IMREAD_GRAYSCALE) + + if layout_img is None or master_img is None: + return { + 'master_id': master_id, + 'inliers': 0, + 'confidence': 'low', + 'error': 'Could not read one or both images' + } + + # Initialize feature detector and matcher (using same approach as example code) + akaze = cv2.AKAZE_create() + bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False) + + # Detect keypoints and descriptors + kp1, des1 = akaze.detectAndCompute(layout_img, None) + kp2, des2 = akaze.detectAndCompute(master_img, None) + + if des1 is None or des2 is None: + return { + 'master_id': master_id, + 'inliers': 0, + 'confidence': 'low', + 'error': 'No features detected in one or both images' + } + + # Match features using k-nearest neighbors + matches = bf.knnMatch(des1, des2, k=2) + + # Apply Lowe's ratio test to filter good matches + good_matches = [] + for match_pair in matches: + if len(match_pair) == 2: + m, n = match_pair + if m.distance < 0.75 * n.distance: + good_matches.append(m) + + min_good_matches = 10 # Same threshold as example code + if len(good_matches) < min_good_matches: + return { + 'master_id': master_id, + 'inliers': 0, + 'confidence': 'low', + 'good_matches': len(good_matches), + 'reason': f'Insufficient good matches: {len(good_matches)} < {min_good_matches}' + } + + # Extract matched points + src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2) + dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2) + + # Find homography using RANSAC + M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0) + + if mask is None: + return { + 'master_id': master_id, + 'inliers': 0, + 'confidence': 'low', + 'good_matches': len(good_matches), + 'error': 'Homography estimation failed' + } + + # Count inliers + inliers = int(np.sum(mask)) + + # Determine confidence based on inlier count and ratio + inlier_ratio = inliers / len(good_matches) + if inliers >= 50 and inlier_ratio >= 0.6: + confidence = 'high' + elif inliers >= 20 and inlier_ratio >= 0.4: + confidence = 'medium' + else: + confidence = 'low' + + return { + 'master_id': master_id, + 'inliers': inliers, + 'confidence': confidence, + 'good_matches': len(good_matches), + 'inlier_ratio': round(inlier_ratio, 3), + 'total_features_layout': len(kp1), + 'total_features_master': len(kp2) + } + + except Exception as e: + return { + 'master_id': master_id, + 'inliers': 0, + 'confidence': 'low', + 'error': str(e) + } + + def refine_matches_by_panel_count(self, layout_path: str, detected_masters: List[str], panel_count: int) -> Dict: + """Refine detected masters based on panel count using inlier analysis""" + layout_name = Path(layout_path).name + + # First, deduplicate the detected masters to avoid processing the same master multiple times + original_count = len(detected_masters) + detected_masters = self.deduplicate_master_matches(detected_masters) + if len(detected_masters) != original_count: + duplicates_removed = original_count - len(detected_masters) + print(f" Removed {duplicates_removed} duplicate master(s) before panel-aware refinement") + + # Optimization: If panel count equals detected masters count, skip refinement + if panel_count == len(detected_masters): + print(f" Panel count ({panel_count}) matches detected masters count ({len(detected_masters)}) - skipping refinement") + return { + 'refined_masters': detected_masters, + 'refinement_applied': False, + 'reason': 'panel_count_matches_detected_count', + 'panel_count': panel_count, + 'original_count': len(detected_masters), + 'final_count': len(detected_masters) + } + + # Only refine if we have more detected masters than panels + if len(detected_masters) <= panel_count: + print(f" Detected masters ({len(detected_masters)}) <= panel count ({panel_count}) - no refinement needed") + return { + 'refined_masters': detected_masters, + 'refinement_applied': False, + 'reason': 'detected_count_within_panel_limit', + 'panel_count': panel_count, + 'original_count': len(detected_masters), + 'final_count': len(detected_masters) + } + + print(f" Refining {len(detected_masters)} masters to best {panel_count} using inlier analysis...") + + # Calculate inliers for each detected master + inlier_results = [] + for i, master_id in enumerate(detected_masters): + master_path = self.master_images[master_id] + print(f" → Analyzing {i+1}/{len(detected_masters)}: {master_id}") + + inlier_result = self.calculate_inliers_for_match(layout_path, master_path, master_id) + inlier_results.append(inlier_result) + + inliers = inlier_result.get('inliers', 0) + confidence = inlier_result.get('confidence', 'unknown') + print(f" ✓ {master_id}: {inliers} inliers (confidence: {confidence})") + + # Sort by inlier count (descending) to get best matches + inlier_results.sort(key=lambda x: x.get('inliers', 0), reverse=True) + + # Select top N matches where N = panel_count + refined_masters = [result['master_id'] for result in inlier_results[:panel_count]] + + print(f" Refinement complete: Selected top {len(refined_masters)} masters based on inlier analysis") + + # Log the selection details + for i, result in enumerate(inlier_results[:panel_count]): + rank = i + 1 + master_id = result['master_id'] + inliers = result.get('inliers', 0) + confidence = result.get('confidence', 'unknown') + print(f" → Rank {rank}: {master_id} ({inliers} inliers, {confidence} confidence)") + + return { + 'refined_masters': refined_masters, + 'refinement_applied': True, + 'reason': 'inlier_based_selection', + 'panel_count': panel_count, + 'original_count': len(detected_masters), + 'final_count': len(refined_masters), + 'inlier_analysis': inlier_results, + 'selection_details': { + 'method': 'highest_inlier_count', + 'selected_masters': [ + { + 'rank': i+1, + 'master_id': result['master_id'], + 'inliers': result.get('inliers', 0), + 'confidence': result.get('confidence', 'unknown') + } + for i, result in enumerate(inlier_results[:panel_count]) + ] + } + } \ No newline at end of file diff --git a/optimize_split_parameters.py b/optimize_split_parameters.py new file mode 100644 index 0000000..88d9b30 --- /dev/null +++ b/optimize_split_parameters.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +""" +Parameter Optimization Script for Advanced Panel Splitting + +This script iteratively tests different percentile and min_gap parameters +to find the optimal settings that result in exactly 8 panels for layout 6791346.jpg. +""" + +import os +import sys +import cv2 +import numpy as np +from pathlib import Path +from PIL import Image +from advanced_splitter import AdvancedPanelSplitter +import itertools + + +def test_parameters(image_path: str, percentile: float, min_gap: int, target_panels: int = 8, verbose: bool = False) -> dict: + """ + Test a specific combination of parameters and return results + + Args: + image_path: Path to the layout image + percentile: Percentile threshold for gutter detection + min_gap: Minimum gap size for gutter detection + target_panels: Target number of panels (default: 8) + verbose: Print detailed information + + Returns: + Dict with test results + """ + try: + # Create splitter with test parameters + splitter = AdvancedPanelSplitter(percentile=percentile, min_gap=min_gap, debug=False) + + # Load image + img = Image.open(image_path).convert("RGB") + img_gray = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY) + + # Find boundaries + boundaries = splitter.find_boundaries_auto(img_gray) + panel_count = len(boundaries) - 1 # Number of panels is boundaries - 1 + + if verbose: + print(f" Percentile: {percentile:5.1f}, Min Gap: {min_gap:2d} → {panel_count:2d} panels (boundaries: {boundaries})") + + return { + 'percentile': percentile, + 'min_gap': min_gap, + 'panel_count': panel_count, + 'boundaries': boundaries, + 'exact_match': panel_count == target_panels, + 'error': abs(panel_count - target_panels) + } + + except Exception as e: + if verbose: + print(f" ERROR with percentile={percentile}, min_gap={min_gap}: {e}") + return { + 'percentile': percentile, + 'min_gap': min_gap, + 'panel_count': 0, + 'boundaries': [], + 'exact_match': False, + 'error': float('inf'), + 'exception': str(e) + } + + +def optimize_parameters(image_path: str, target_panels: int = 8) -> dict: + """ + Optimize parameters to achieve the target number of panels + + Args: + image_path: Path to the layout image + target_panels: Target number of panels + + Returns: + Dict with optimization results + """ + if not os.path.exists(image_path): + raise FileNotFoundError(f"Image not found: {image_path}") + + print(f"Optimizing parameters for {os.path.basename(image_path)}") + print(f"Target panel count: {target_panels}") + print("-" * 60) + + # Define parameter ranges to test + percentile_range = list(range(1, 51)) # 1% to 50% + min_gap_range = list(range(1, 21)) # 1 to 20 pixels + + # Store all results + all_results = [] + exact_matches = [] + + print("Testing parameter combinations...") + total_combinations = len(percentile_range) * len(min_gap_range) + tested = 0 + + # Test all combinations + for percentile in percentile_range: + for min_gap in min_gap_range: + tested += 1 + if tested % 50 == 0: + print(f" Progress: {tested}/{total_combinations} ({tested/total_combinations*100:.1f}%)") + + result = test_parameters(image_path, percentile, min_gap, target_panels) + all_results.append(result) + + if result['exact_match']: + exact_matches.append(result) + + print(f"\nCompleted testing {total_combinations} parameter combinations") + print(f"Found {len(exact_matches)} exact matches for {target_panels} panels") + + # Analysis + if exact_matches: + print("\n" + "="*60) + print("EXACT MATCHES FOUND!") + print("="*60) + + print(f"\nAll parameter combinations that produce exactly {target_panels} panels:") + print("Percentile | Min Gap | Boundaries") + print("-" * 40) + + for match in exact_matches: + boundaries_str = str(match['boundaries']) + if len(boundaries_str) > 40: + boundaries_str = boundaries_str[:37] + "..." + print(f"{match['percentile']:9.1f} | {match['min_gap']:7d} | {boundaries_str}") + + # Find the "best" match (middle percentile value for stability) + exact_matches.sort(key=lambda x: x['percentile']) + middle_idx = len(exact_matches) // 2 + best_match = exact_matches[middle_idx] + + print(f"\nRECOMMENDED PARAMETERS:") + print(f" Percentile: {best_match['percentile']}") + print(f" Min Gap: {best_match['min_gap']}") + print(f" Result: {best_match['panel_count']} panels") + print(f" Boundaries: {best_match['boundaries']}") + + return best_match + + else: + print("\n" + "="*60) + print("NO EXACT MATCHES FOUND") + print("="*60) + + # Find closest matches + all_results.sort(key=lambda x: x['error']) + closest_matches = [r for r in all_results[:10] if r['error'] == all_results[0]['error']] + + print(f"\nClosest matches (error = {all_results[0]['error']}):") + print("Percentile | Min Gap | Panels | Error | Boundaries") + print("-" * 60) + + for match in closest_matches: + boundaries_str = str(match['boundaries']) + if len(boundaries_str) > 30: + boundaries_str = boundaries_str[:27] + "..." + print(f"{match['percentile']:9.1f} | {match['min_gap']:7d} | {match['panel_count']:6d} | {match['error']:5.1f} | {boundaries_str}") + + best_match = closest_matches[0] + print(f"\nBEST AVAILABLE PARAMETERS:") + print(f" Percentile: {best_match['percentile']}") + print(f" Min Gap: {best_match['min_gap']}") + print(f" Result: {best_match['panel_count']} panels (target: {target_panels})") + print(f" Boundaries: {best_match['boundaries']}") + + return best_match + + +def test_optimized_parameters(image_path: str, percentile: float, min_gap: int): + """ + Test the optimized parameters and show detailed results + """ + print("\n" + "="*60) + print("TESTING OPTIMIZED PARAMETERS") + print("="*60) + + # Create splitter with optimized parameters + splitter = AdvancedPanelSplitter(percentile=percentile, min_gap=min_gap, debug=True) + + # Test the parameters + result = test_parameters(image_path, percentile, min_gap, target_panels=8, verbose=True) + + # Also create the actual splits to verify + print(f"\nCreating actual splits with optimized parameters...") + splits = splitter.split_panels(image_path, target_panel_count=8) + + print(f"Successfully created {len(splits)} splits:") + for i, split in enumerate(splits): + bounds = split['bounds'] + print(f" Panel {i+1}: bounds=({bounds[0]}, {bounds[1]}, {bounds[2]}, {bounds[3]}), " + f"confidence={split['confidence']:.3f}, method={split['method']}") + + return result + + +def main(): + """Main execution function""" + # Check if layout image exists + layout_path = "layouts/6791346.jpg" + + if not os.path.exists(layout_path): + print(f"Error: Layout image not found at {layout_path}") + print("Please ensure the image exists in the layouts directory") + return 1 + + try: + # Run optimization + best_result = optimize_parameters(layout_path, target_panels=8) + + # Test the optimized parameters + test_result = test_optimized_parameters( + layout_path, + best_result['percentile'], + best_result['min_gap'] + ) + + print("\n" + "="*60) + print("FINAL RECOMMENDATION") + print("="*60) + print(f"For layout 6791346.jpg to achieve 8 panels:") + print(f" --percentile {best_result['percentile']}") + print(f" --min-gap {best_result['min_gap']}") + print(f"\nCommand line usage:") + print(f" python cli.py --test --hybrid --split-advanced --percentile {best_result['percentile']} --min-gap {best_result['min_gap']}") + + return 0 + + except Exception as e: + print(f"Error during optimization: {e}") + return 1 + + +if __name__ == "__main__": + exit(main()) \ No newline at end of file diff --git a/panel_splitter.py b/panel_splitter.py new file mode 100644 index 0000000..aab9b5a --- /dev/null +++ b/panel_splitter.py @@ -0,0 +1,857 @@ +#!/usr/bin/env python3 +""" +Panel Splitter Module - Multi-method panel splitting for comic/manga layouts +""" + +import os +import cv2 +import numpy as np +from typing import List, Dict, Tuple, Optional +import json +from pathlib import Path +from scipy import ndimage +from scipy.signal import find_peaks +from sklearn.cluster import KMeans +from skimage.feature import local_binary_pattern +import matplotlib.pyplot as plt + + +class PanelSplitter: + """ + Multi-method panel splitting class that uses various computer vision techniques + to split multi-panel layouts into individual images, then matches each split + to master images using inlier analysis. + """ + + def __init__(self, debug=False): + """ + Initialize the PanelSplitter + + Args: + debug (bool): Enable debug mode for visualization + """ + self.debug = debug + self.debug_dir = "debug_splitting" + if self.debug and not os.path.exists(self.debug_dir): + os.makedirs(self.debug_dir) + + def split_layout_and_match(self, layout_path: str, master_images: List[str], + detector_instance=None) -> Dict: + """ + Main method to split a layout and match splits to master images + + Args: + layout_path (str): Path to the layout image + master_images (List[str]): List of master image paths + detector_instance: The detector instance to use for matching + + Returns: + Dict: Detection results with matches from all splits + """ + # Step 1: Get panel count from OpenAI (if detector supports it) + target_panel_count = 1 + panel_confidence = "unknown" + + if hasattr(detector_instance, 'count_panels_in_layout'): + print(f"Getting panel count for {os.path.basename(layout_path)}...") + panel_result = detector_instance.count_panels_in_layout(layout_path) + target_panel_count = panel_result.get('panel_count', 1) + panel_confidence = panel_result.get('confidence', 'unknown') + print(f"OpenAI detected {target_panel_count} panels (confidence: {panel_confidence})") + + # Step 2: Split the layout into individual panels + print(f"Splitting layout with target count: {target_panel_count}") + splits = self.split_panels(layout_path, target_panel_count) + + if not splits: + print("No splits detected, returning empty results") + return { + 'layout_path': layout_path, + 'detected_masters': [], + 'panel_count': target_panel_count, + 'panel_confidence': panel_confidence, + 'split_mode': 'enabled', + 'splits_generated': 0 + } + + print(f"Generated {len(splits)} splits") + + # Step 3: Match each split to master images + all_matches = [] + split_results = [] + + for i, split_info in enumerate(splits): + print(f"Processing split {i+1}/{len(splits)}") + + # Save split image temporarily for matching + split_image = split_info['image'] + temp_split_path = f"/tmp/split_{i}.jpg" + cv2.imwrite(temp_split_path, split_image) + + # Match this split to master images using existing inlier analysis + if hasattr(detector_instance, 'match_split_to_masters'): + split_matches = detector_instance.match_split_to_masters( + temp_split_path, master_images + ) + else: + # Use basic inlier analysis if method doesn't exist + split_matches = self._match_split_basic(temp_split_path, master_images) + + # Add split metadata to matches + for match in split_matches: + match['split_index'] = i + match['split_bounds'] = split_info['bounds'] + match['split_confidence'] = split_info['confidence'] + all_matches.append(match) + + split_results.append({ + 'split_index': i, + 'bounds': split_info['bounds'], + 'confidence': split_info['confidence'], + 'matches': split_matches + }) + + # Clean up temporary file + if os.path.exists(temp_split_path): + os.remove(temp_split_path) + + # Step 4: Aggregate results + result = { + 'layout_path': layout_path, + 'detected_masters': [match['master_id'] for match in all_matches], + 'panel_count': target_panel_count, + 'panel_confidence': panel_confidence, + 'split_mode': 'enabled', + 'splits_generated': len(splits), + 'split_results': split_results, + 'all_matches': all_matches + } + + # Remove duplicates while preserving highest confidence matches + result = self._deduplicate_matches(result) + + return result + + def split_panels(self, image_path: str, target_panel_count: int) -> List[Dict]: + """ + Split a layout image into individual panels using multiple methods + + Args: + image_path (str): Path to the layout image + target_panel_count (int): Target number of panels to split into + + Returns: + List[Dict]: List of split information with image data and metadata + """ + # Load image + image = cv2.imread(image_path) + if image is None: + print(f"Error: Could not load image {image_path}") + return [] + + height, width = image.shape[:2] + print(f"Image dimensions: {width}x{height}") + + # Use only optimized Canny detection method + print("Using optimized Canny detection for panel splitting") + + try: + method_results = self._optimized_canny_detection(image, target_panel_count) + if not method_results: + print("Optimized Canny detection failed, falling back to simple division") + return self._fallback_simple_division(image, target_panel_count) + except Exception as e: + print(f"Optimized Canny detection failed: {e}") + return self._fallback_simple_division(image, target_panel_count) + + # Use results directly (no consensus needed for single method) + consensus_splits = method_results + + # Create split images + splits = [] + for i, split_bounds in enumerate(consensus_splits): + x, y, w, h = split_bounds['bounds'] + split_image = image[y:y+h, x:x+w] + + # Skip extremely small splits (reduced threshold for 14-panel detection) + if w < 20 or h < 20: + continue + + splits.append({ + 'image': split_image, + 'bounds': (x, y, w, h), + 'confidence': split_bounds['confidence'], + 'method_votes': split_bounds.get('method_votes', []) + }) + + if self.debug: + self._save_debug_visualization(image_path, image, splits) + + return splits + + def _enhanced_gradient_analysis(self, image: np.ndarray, target_count: int) -> List[Dict]: + """Enhanced gradient peak analysis for panel detection""" + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + height, width = gray.shape + + # Multi-scale gradient analysis + separators = [] + scales = [5, 10, 20] + + for sigma in scales: + # Smooth the image + smoothed = cv2.GaussianBlur(gray, (0, 0), sigma) + + # Calculate vertical gradient (for horizontal separators) + grad_y = cv2.Sobel(smoothed, cv2.CV_64F, 0, 1, ksize=3) + + # Project to get horizontal profile + profile = np.mean(np.abs(grad_y), axis=1) + + # Find peaks + prominence = np.std(profile) * 0.5 + peaks, properties = find_peaks(profile, prominence=prominence, distance=height//target_count//2) + + # Add to separators with confidence based on prominence + for peak in peaks: + confidence = properties['prominences'][list(peaks).index(peak)] / np.max(properties['prominences']) + separators.append({ + 'position': peak, + 'confidence': confidence, + 'method': 'gradient_analysis', + 'scale': sigma + }) + + # Convert separator positions to bounding boxes + separators.sort(key=lambda x: x['position']) + + # Create bounds from separators + bounds = [] + prev_y = 0 + + for sep in separators: + if sep['position'] > prev_y + height // (target_count * 2): # Minimum panel height + bounds.append({ + 'bounds': (0, prev_y, width, sep['position'] - prev_y), + 'confidence': sep['confidence'], + 'method': 'gradient_analysis' + }) + prev_y = sep['position'] + + # Add final panel + if prev_y < height - height // (target_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, height - prev_y), + 'confidence': 0.8, + 'method': 'gradient_analysis' + }) + + return bounds + + def _optimized_canny_detection(self, image: np.ndarray, target_count: int) -> List[Dict]: + """Optimized Canny edge detection for panel separators with tuned parameters""" + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + height, width = gray.shape + + # Exact configuration from tuning results that produced 14 panels + threshold_set = [(50, 150), (100, 200), (150, 250)] + morphology_kernel = (3, 1) + hough_threshold = 1324 + min_line_length = 3530 + max_line_gap = 1059 + + # Multi-threshold Canny detection + all_edges = [] + for low, high in threshold_set: + edges = cv2.Canny(gray, low, high) + + # Morphological operations + kernel = np.ones(morphology_kernel, np.uint8) + edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel) + all_edges.append(edges) + + # Combine edge maps + combined_edges = np.maximum.reduce(all_edges) + + # Find horizontal lines using Hough transform + lines = cv2.HoughLinesP( + combined_edges, + 1, + np.pi/180, + threshold=hough_threshold, + minLineLength=min_line_length, + maxLineGap=max_line_gap + ) + + # Filter for horizontal lines + horizontal_lines = [] + if lines is not None: + for line in lines: + x1, y1, x2, y2 = line[0] + if abs(y2 - y1) < height // 20: # Nearly horizontal + horizontal_lines.append({ + 'y_position': (y1 + y2) // 2, + 'length': abs(x2 - x1), + 'confidence': min(1.0, abs(x2 - x1) / width) + }) + + # Sort by y position and create bounds + horizontal_lines.sort(key=lambda x: x['y_position']) + + bounds = [] + prev_y = 0 + + for line in horizontal_lines: + y_pos = line['y_position'] + # Use the same threshold that worked in debug script + if y_pos > prev_y + height // (target_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, y_pos - prev_y), + 'confidence': line['confidence'], + 'method': 'optimized_canny_detection' + }) + prev_y = y_pos + + # Add final panel + if prev_y < height - height // (target_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, height - prev_y), + 'confidence': 0.8, + 'method': 'optimized_canny_detection' + }) + + return bounds + + def _template_matching_method(self, image: np.ndarray, target_count: int) -> List[Dict]: + """Template matching for common panel separators""" + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + height, width = gray.shape + + # Create separator templates + templates = [] + + # White horizontal line template + white_template = np.ones((5, width//4), dtype=np.uint8) * 255 + templates.append(('white_line', white_template)) + + # Black horizontal line template + black_template = np.zeros((5, width//4), dtype=np.uint8) + templates.append(('black_line', black_template)) + + # Gutter template (white with black edges) + gutter_template = np.ones((10, width//4), dtype=np.uint8) * 255 + gutter_template[0, :] = 0 + gutter_template[-1, :] = 0 + templates.append(('gutter', gutter_template)) + + # Find matches for each template + all_matches = [] + + for template_name, template in templates: + result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF_NORMED) + + # Find good matches + locations = np.where(result >= 0.5) + + for y, x in zip(locations[0], locations[1]): + confidence = result[y, x] + all_matches.append({ + 'y_position': y + template.shape[0] // 2, + 'confidence': confidence, + 'template': template_name + }) + + # Sort by y position and remove duplicates + all_matches.sort(key=lambda x: x['y_position']) + + # Merge nearby matches + merged_matches = [] + for match in all_matches: + if not merged_matches or match['y_position'] - merged_matches[-1]['y_position'] > height // (target_count * 2): + merged_matches.append(match) + else: + # Keep the one with higher confidence + if match['confidence'] > merged_matches[-1]['confidence']: + merged_matches[-1] = match + + # Create bounds + bounds = [] + prev_y = 0 + + for match in merged_matches: + y_pos = match['y_position'] + if y_pos > prev_y + height // (target_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, y_pos - prev_y), + 'confidence': match['confidence'], + 'method': 'template_matching' + }) + prev_y = y_pos + + # Add final panel + if prev_y < height - height // (target_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, height - prev_y), + 'confidence': 0.8, + 'method': 'template_matching' + }) + + return bounds + + def _contour_analysis_method(self, image: np.ndarray, target_count: int) -> List[Dict]: + """Contour-based panel detection""" + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + height, width = gray.shape + + # Find contours + edges = cv2.Canny(gray, 50, 150) + contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + # Filter for rectangular contours + panel_candidates = [] + + for contour in contours: + # Approximate contour to polygon + epsilon = 0.02 * cv2.arcLength(contour, True) + approx = cv2.approxPolyDP(contour, epsilon, True) + + # Check if it's roughly rectangular (4 corners) + if len(approx) >= 4: + x, y, w, h = cv2.boundingRect(contour) + + # Filter by size and aspect ratio + if w > width // 4 and h > height // (target_count * 2): + area = cv2.contourArea(contour) + rect_area = w * h + + # Check if it's mostly rectangular + if area / rect_area > 0.7: + panel_candidates.append({ + 'bounds': (x, y, w, h), + 'confidence': min(1.0, area / rect_area), + 'method': 'contour_analysis' + }) + + # Sort by y position + panel_candidates.sort(key=lambda x: x['bounds'][1]) + + # Remove overlapping candidates + filtered_candidates = [] + for candidate in panel_candidates: + overlap = False + for existing in filtered_candidates: + if self._rectangles_overlap(candidate['bounds'], existing['bounds']): + overlap = True + break + if not overlap: + filtered_candidates.append(candidate) + + return filtered_candidates + + def _texture_analysis_method(self, image: np.ndarray, target_count: int) -> List[Dict]: + """Texture-based panel separation""" + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + height, width = gray.shape + + # Local Binary Pattern analysis + radius = 3 + n_points = 8 * radius + lbp = local_binary_pattern(gray, n_points, radius, method='uniform') + + # Create texture profile by analyzing horizontal strips + strip_height = height // (target_count * 4) + texture_profile = [] + + for y in range(0, height - strip_height, strip_height // 2): + strip = lbp[y:y + strip_height, :] + texture_variance = np.var(strip) + texture_profile.append(texture_variance) + + # Find low-texture regions (potential separators) + texture_profile = np.array(texture_profile) + threshold = np.percentile(texture_profile, 25) # Bottom 25% + + separators = [] + for i, variance in enumerate(texture_profile): + if variance < threshold: + y_position = i * strip_height // 2 + confidence = 1.0 - (variance / np.max(texture_profile)) + separators.append({ + 'y_position': y_position, + 'confidence': confidence + }) + + # Merge nearby separators + merged_separators = [] + for sep in separators: + if not merged_separators or sep['y_position'] - merged_separators[-1]['y_position'] > height // (target_count * 2): + merged_separators.append(sep) + else: + # Keep the one with higher confidence + if sep['confidence'] > merged_separators[-1]['confidence']: + merged_separators[-1] = sep + + # Create bounds + bounds = [] + prev_y = 0 + + for sep in merged_separators: + y_pos = sep['y_position'] + if y_pos > prev_y + height // (target_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, y_pos - prev_y), + 'confidence': sep['confidence'], + 'method': 'texture_analysis' + }) + prev_y = y_pos + + # Add final panel + if prev_y < height - height // (target_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, height - prev_y), + 'confidence': 0.8, + 'method': 'texture_analysis' + }) + + return bounds + + def _clustering_method(self, image: np.ndarray, target_count: int) -> List[Dict]: + """Clustering-based panel detection""" + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + height, width = gray.shape + + # Downsample for faster processing + scale_factor = 0.25 + small_height = int(height * scale_factor) + small_width = int(width * scale_factor) + small_gray = cv2.resize(gray, (small_width, small_height)) + + # Create feature vectors for each pixel (position + intensity) + y_coords, x_coords = np.meshgrid(range(small_height), range(small_width), indexing='ij') + + features = np.column_stack([ + y_coords.flatten(), + x_coords.flatten(), + small_gray.flatten() + ]) + + # Normalize features + features = features.astype(np.float32) + features[:, 0] /= small_height + features[:, 1] /= small_width + features[:, 2] /= 255.0 + + # Apply K-means clustering + n_clusters = target_count + 1 # +1 for potential separators + kmeans = KMeans(n_clusters=n_clusters, random_state=42) + labels = kmeans.fit_predict(features) + + # Reshape labels back to image shape + label_image = labels.reshape(small_height, small_width) + + # Find horizontal boundaries between clusters + boundaries = [] + for y in range(1, small_height - 1): + # Check if this row represents a cluster boundary + current_clusters = set(label_image[y, :]) + above_clusters = set(label_image[y-1, :]) + below_clusters = set(label_image[y+1, :]) + + # If clusters change significantly, it might be a boundary + if len(current_clusters.intersection(above_clusters)) < len(current_clusters) * 0.7 or \ + len(current_clusters.intersection(below_clusters)) < len(current_clusters) * 0.7: + boundaries.append({ + 'y_position': int(y / scale_factor), + 'confidence': 0.7 + }) + + # Create bounds from boundaries + bounds = [] + prev_y = 0 + + for boundary in boundaries: + y_pos = boundary['y_position'] + if y_pos > prev_y + height // (target_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, y_pos - prev_y), + 'confidence': boundary['confidence'], + 'method': 'clustering' + }) + prev_y = y_pos + + # Add final panel + if prev_y < height - height // (target_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, height - prev_y), + 'confidence': 0.8, + 'method': 'clustering' + }) + + return bounds + + def _apply_consensus(self, method_results: List[List[Dict]], image: np.ndarray, + target_count: int) -> List[Dict]: + """Apply consensus voting to combine results from multiple methods""" + height, width = image.shape[:2] + + # Collect all proposed boundaries + all_boundaries = [] + + for method_result in method_results: + for panel in method_result: + bounds = panel['bounds'] + # Add both top and bottom boundaries + all_boundaries.append({ + 'y_position': bounds[1], # Top boundary + 'confidence': panel['confidence'], + 'method': panel['method'], + 'type': 'top' + }) + all_boundaries.append({ + 'y_position': bounds[1] + bounds[3], # Bottom boundary + 'confidence': panel['confidence'], + 'method': panel['method'], + 'type': 'bottom' + }) + + # Sort boundaries by position + all_boundaries.sort(key=lambda x: x['y_position']) + + # Cluster nearby boundaries + clustered_boundaries = [] + cluster_threshold = height // (target_count * 3) + + for boundary in all_boundaries: + # Skip image edges + if boundary['y_position'] < cluster_threshold or boundary['y_position'] > height - cluster_threshold: + continue + + # Find existing cluster or create new one + added_to_cluster = False + for cluster in clustered_boundaries: + if abs(boundary['y_position'] - cluster['y_position']) < cluster_threshold: + # Add to existing cluster + cluster['boundaries'].append(boundary) + # Update cluster position (weighted average) + total_weight = sum(b['confidence'] for b in cluster['boundaries']) + cluster['y_position'] = sum(b['y_position'] * b['confidence'] + for b in cluster['boundaries']) / total_weight + cluster['confidence'] = total_weight / len(cluster['boundaries']) + added_to_cluster = True + break + + if not added_to_cluster: + clustered_boundaries.append({ + 'y_position': boundary['y_position'], + 'confidence': boundary['confidence'], + 'boundaries': [boundary] + }) + + # Sort clustered boundaries and select best ones + clustered_boundaries.sort(key=lambda x: x['y_position']) + + # Filter boundaries based on confidence and target count + min_confidence = 0.3 + good_boundaries = [b for b in clustered_boundaries if b['confidence'] >= min_confidence] + + # Limit to reasonable number of boundaries + if len(good_boundaries) > target_count - 1: + good_boundaries.sort(key=lambda x: x['confidence'], reverse=True) + good_boundaries = good_boundaries[:target_count - 1] + good_boundaries.sort(key=lambda x: x['y_position']) + + # Create final panel bounds + final_bounds = [] + prev_y = 0 + + for boundary in good_boundaries: + y_pos = int(boundary['y_position']) + if y_pos > prev_y + height // (target_count * 2): + method_votes = [b['method'] for b in boundary['boundaries']] + final_bounds.append({ + 'bounds': (0, prev_y, width, y_pos - prev_y), + 'confidence': boundary['confidence'], + 'method_votes': method_votes + }) + prev_y = y_pos + + # Add final panel + if prev_y < height - height // (target_count * 2): + final_bounds.append({ + 'bounds': (0, prev_y, width, height - prev_y), + 'confidence': 0.8, + 'method_votes': ['consensus'] + }) + + return final_bounds + + def _fallback_simple_division(self, image: np.ndarray, target_count: int) -> List[Dict]: + """Fallback method: simple equal division""" + height, width = image.shape[:2] + panel_height = height // target_count + + splits = [] + for i in range(target_count): + y = i * panel_height + h = panel_height if i < target_count - 1 else height - y + + splits.append({ + 'image': image[y:y+h, :], + 'bounds': (0, y, width, h), + 'confidence': 0.5, + 'method_votes': ['simple_division'] + }) + + return splits + + def _match_split_basic(self, split_path: str, master_images: List[str]) -> List[Dict]: + """Basic matching using OpenCV features (fallback)""" + matches = [] + + try: + # Load the split image + split_img = cv2.imread(split_path, cv2.IMREAD_GRAYSCALE) + if split_img is None: + return matches + + # Initialize feature detector + akaze = cv2.AKAZE_create() + bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False) + + # Detect keypoints and descriptors for split image + kp_split, des_split = akaze.detectAndCompute(split_img, None) + + if des_split is None: + return matches + + # Load master images from the master_images directory + master_images_path = Path("master_images") + + for master_id in master_images: + master_path = master_images_path / f"{master_id}.jpg" + if not master_path.exists(): + continue + + # Load master image + master_img = cv2.imread(str(master_path), cv2.IMREAD_GRAYSCALE) + if master_img is None: + continue + + # Detect keypoints and descriptors for master image + kp_master, des_master = akaze.detectAndCompute(master_img, None) + + if des_master is None: + continue + + # Match features + matches_raw = bf.knnMatch(des_split, des_master, k=2) + + # Apply Lowe's ratio test + good_matches = [] + for match_pair in matches_raw: + if len(match_pair) == 2: + m, n = match_pair + if m.distance < 0.7 * n.distance: + good_matches.append(m) + + # If we have enough good matches, try to find homography + if len(good_matches) >= 10: + src_pts = np.float32([kp_split[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2) + dst_pts = np.float32([kp_master[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2) + + try: + M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0) + if M is not None: + inliers = int(np.sum(mask)) + inlier_ratio = inliers / len(good_matches) + + # Basic confidence scoring + if inliers >= 15 and inlier_ratio >= 0.6: + confidence = 'high' + elif inliers >= 8 and inlier_ratio >= 0.4: + confidence = 'medium' + else: + confidence = 'low' + + # Only include medium and high confidence matches + if confidence in ['medium', 'high']: + matches.append({ + 'master_id': master_id, + 'confidence': confidence, + 'inliers': inliers, + 'match_details': { + 'inliers': inliers, + 'good_matches': len(good_matches), + 'inlier_ratio': round(inlier_ratio, 3) + } + }) + except: + continue + + except Exception as e: + print(f"Error in basic matching: {e}") + + return matches + + def _deduplicate_matches(self, result: Dict) -> Dict: + """Remove duplicate matches, keeping highest confidence ones""" + if not result['all_matches']: + return result + + # Group matches by master_id + master_groups = {} + for match in result['all_matches']: + master_id = match['master_id'] + if master_id not in master_groups: + master_groups[master_id] = [] + master_groups[master_id].append(match) + + # Keep only the highest confidence match for each master + deduplicated_matches = [] + for master_id, matches in master_groups.items(): + best_match = max(matches, key=lambda x: x.get('confidence', 0)) + deduplicated_matches.append(best_match) + + result['all_matches'] = deduplicated_matches + result['detected_masters'] = [match['master_id'] for match in deduplicated_matches] + + return result + + def _rectangles_overlap(self, rect1: Tuple[int, int, int, int], + rect2: Tuple[int, int, int, int]) -> bool: + """Check if two rectangles overlap""" + x1, y1, w1, h1 = rect1 + x2, y2, w2, h2 = rect2 + + return not (x1 + w1 < x2 or x2 + w2 < x1 or y1 + h1 < y2 or y2 + h2 < y1) + + def _save_debug_visualization(self, image_path: str, image: np.ndarray, + splits: List[Dict]) -> None: + """Save debug visualization of the splitting results""" + if not self.debug: + return + + base_name = os.path.splitext(os.path.basename(image_path))[0] + + # Create visualization with boundaries + vis_image = image.copy() + + for i, split in enumerate(splits): + x, y, w, h = split['bounds'] + + # Draw rectangle + cv2.rectangle(vis_image, (x, y), (x + w, y + h), (0, 255, 0), 2) + + # Add label + label = f"Panel {i+1} ({split['confidence']:.2f})" + cv2.putText(vis_image, label, (x + 5, y + 20), + cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) + + # Save visualization + vis_path = os.path.join(self.debug_dir, f"{base_name}_splits.jpg") + cv2.imwrite(vis_path, vis_image) + + # Save individual splits + for i, split in enumerate(splits): + split_path = os.path.join(self.debug_dir, f"{base_name}_split_{i+1}.jpg") + cv2.imwrite(split_path, split['image']) \ No newline at end of file diff --git a/process_detection.py b/process_detection.py new file mode 100644 index 0000000..2ef794f --- /dev/null +++ b/process_detection.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 +""" +Process Detection Module - Standalone function for processing master image detection +""" + +import os +import json +import time +from pathlib import Path +from PIL import Image, ImageEnhance +import google.generativeai as genai +from dotenv import load_dotenv +import uuid +import threading +import tempfile + + +def process_single_master_detection(layout_path, master_id, master_path, enable_greyscale, enable_contrast_enhancement, contrast_factor, safety_settings): + """ + Standalone function for processing a single master detection in a separate process. + This ensures complete isolation from other workers. + """ + try: + # Load environment in this process + load_dotenv() + api_key = os.getenv('GEMINI_API_KEY') + if not api_key: + raise ValueError("GEMINI_API_KEY not found in environment variables") + + # Configure API client in this process + genai.configure(api_key=api_key) + model = genai.GenerativeModel('gemini-2.5-pro') + + def preprocess_image_process(image_path, enable_greyscale, enable_contrast_enhancement, contrast_factor): + """Process-local image preprocessing""" + if not enable_greyscale and not enable_contrast_enhancement: + return image_path + + try: + with Image.open(image_path) as img: + processed_img = img.copy() + + if enable_greyscale: + processed_img = processed_img.convert('L') + processed_img = processed_img.convert('RGB') + + if enable_contrast_enhancement: + contrast_enhancer = ImageEnhance.Contrast(processed_img) + processed_img = contrast_enhancer.enhance(contrast_factor) + + sharpness_enhancer = ImageEnhance.Sharpness(processed_img) + processed_img = sharpness_enhancer.enhance(1.3) + + # Create unique temp file for this process + process_id = os.getpid() + unique_id = str(uuid.uuid4())[:8] + original_name = Path(image_path).stem + + with tempfile.NamedTemporaryFile(suffix=f"_{process_id}_{unique_id}.jpg", delete=False) as tmp_file: + processed_img.save(tmp_file.name, 'JPEG', quality=95) + return tmp_file.name + + except Exception as e: + return image_path + + def upload_with_retry_process(image_path, max_retries=3): + """Process-local upload with retry""" + for attempt in range(max_retries): + try: + processed_path = preprocess_image_process(image_path, enable_greyscale, enable_contrast_enhancement, contrast_factor) + uploaded_file = genai.upload_file(processed_path) + + # Clean up temp file if it was created + if processed_path != image_path: + try: + os.unlink(processed_path) + except: + pass + + return uploaded_file + except Exception as e: + if attempt == max_retries - 1: + return None + import random + jitter = random.uniform(0.1, 0.5) + sleep_time = (0.5 * (attempt + 1)) + jitter + time.sleep(sleep_time) + return None + + # Upload images + master_file = upload_with_retry_process(master_path) + layout_file = upload_with_retry_process(layout_path) + + if not master_file or not layout_file: + raise Exception("Failed to upload images") + + # Create prompt + prompt = f"""Analyze the layout image (the second image) and determine if the master image (the first image) appears in it. + +INSTRUCTIONS: +1. Compare the master image (first image) with the layout image (second image) +2. Look for EXACT matches where the model, clothing, and pose are IDENTICAL +3. The layout image may contain the master image in various forms: + - Complete/exact match + - Cropped version + - Scaled or resized version + - Rotated version + - Partially obscured + +4. Focus on visual similarity in terms of: + - Person/model appearance and pose (must be EXACTLY the same) + - Clothing details (colors, patterns, styles - must be EXACTLY the same) + - Background and composition + - Overall visual elements + +5. CRITICAL: Only return a positive result if the models, pose, and clothing are EXACTLY the same. + If there is ANY difference in clothing, model, or pose then return a negative result. + +Master Image ID: {master_id} + +Return your response as a JSON object with this exact format: +{{ + "match_found": true/false, + "master_id": "{master_id}", + "confidence": "high/medium/low", + "analysis": "Detailed explanation of your findings and reasoning" +}} + +IMPORTANT CONTEXT: This is a legitimate business application for marketing and e-commerce image matching. The images are product/marketing photos showing models in various clothing styles for retail purposes. This analysis is for content categorization in a business context and is completely benign. +""" + + # Make API call with retry + max_retries = 3 + for attempt in range(max_retries): + try: + response = model.generate_content([prompt, master_file, layout_file], safety_settings=safety_settings) + + if not response.candidates: + if attempt < max_retries - 1: + time.sleep((2 ** attempt) * 0.5) + continue + else: + raise Exception("No candidates returned from API") + + candidate = response.candidates[0] + if candidate.finish_reason and candidate.finish_reason != 1: + if attempt < max_retries - 1: + time.sleep((2 ** attempt) * 0.5) + continue + else: + raise Exception(f"Request finished with reason: {candidate.finish_reason}") + + # Parse response + response_text = response.text.strip() + start_idx = response_text.find('{') + end_idx = response_text.rfind('}') + 1 + + if start_idx == -1 or end_idx == 0: + raise ValueError("No JSON found in response") + + json_str = response_text[start_idx:end_idx] + result = json.loads(json_str) + + # Validate result format + if 'match_found' not in result: + result['match_found'] = False + if 'master_id' not in result: + result['master_id'] = master_id + if 'confidence' not in result: + result['confidence'] = 'unknown' + if 'analysis' not in result: + result['analysis'] = response_text + + return result + + except Exception as e: + if attempt == max_retries - 1: + return { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': '', + 'error': str(e) + } + time.sleep((2 ** attempt) * 0.5) + + except Exception as e: + return { + 'match_found': False, + 'master_id': master_id, + 'confidence': 'unknown', + 'analysis': '', + 'error': str(e) + } \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ec1f9df --- /dev/null +++ b/requirements.txt @@ -0,0 +1,15 @@ +google-generativeai==0.8.5 +pillow==11.2.1 +python-dotenv==1.1.1 +aiofiles==24.1.0 +google-cloud-aiplatform>=1.100.0 +numpy>=2.2.6 +opencv-python>=4.8.0 +openai>=1.58.0 +# New dependencies for panel splitting functionality +scipy>=1.16.0 +scikit-image>=0.25.2 +scikit-learn>=1.7.0 +matplotlib>=3.10.3 +# System monitoring for crash debugging +psutil>=5.9.0 \ No newline at end of file diff --git a/simple_splitter.py b/simple_splitter.py new file mode 100644 index 0000000..c2468f0 --- /dev/null +++ b/simple_splitter.py @@ -0,0 +1,132 @@ +""" +Simple Panel Splitter for Master Adapt Detect + +This module provides a simple panel splitting approach that evenly divides layouts +into panels based on the number of panels detected by OpenAI analysis. It's designed +for use with hybrid mode as an alternative to split-advanced. +""" + +import cv2 +import numpy as np +from typing import List, Tuple, Optional, Dict +import os +from pathlib import Path + + +class SimplePanelSplitter: + """ + Simple panel splitter that evenly divides layouts into panels based on OpenAI analysis count. + + This splitter assumes panels are arranged in a grid pattern and divides the image + evenly based on the detected panel count. It's optimized for speed and simplicity + rather than precision. + """ + + def __init__(self, debug: bool = False): + """ + Initialize the simple panel splitter. + + Args: + debug: Enable debug output and visualization + """ + self.debug = debug + + def _determine_grid_layout(self, panel_count: int) -> Tuple[int, int]: + """ + Determine the grid layout for vertical splitting of horizontal layouts. + For wide horizontal layouts, this creates even vertical sections. + + Args: + panel_count: Number of panels to arrange + + Returns: + Tuple of (rows, cols) - always (1, panel_count) for horizontal splitting + """ + if panel_count <= 0: + return (1, 1) + else: + # Always split horizontally into vertical sections + # This creates even vertical strips across the width + return (1, panel_count) + + def split_panels(self, layout_path: str, panel_count: int) -> List[Dict]: + """ + Split a layout image into individual panels using simple even division. + + Args: + layout_path: Path to the layout image + panel_count: Number of panels to split into (from OpenAI analysis) + + Returns: + List of dictionaries containing image data and metadata + """ + if self.debug: + print(f"SimplePanelSplitter: Splitting {layout_path} into {panel_count} panels") + + # Load the image + image = cv2.imread(layout_path) + if image is None: + print(f"Error: Could not load image {layout_path}") + return [] + + height, width = image.shape[:2] + + # Determine grid layout + rows, cols = self._determine_grid_layout(panel_count) + if self.debug: + print(f"SimplePanelSplitter: Using {rows}x{cols} grid layout") + + # Calculate panel dimensions + panel_height = height // rows + panel_width = width // cols + + splits = [] + panel_index = 0 + + # Extract panels in row-major order + for row in range(rows): + for col in range(cols): + if panel_index >= panel_count: + break + + # Calculate panel boundaries + y_start = row * panel_height + y_end = (row + 1) * panel_height if row < rows - 1 else height + x_start = col * panel_width + x_end = (col + 1) * panel_width if col < cols - 1 else width + + # Extract panel + panel_image = image[y_start:y_end, x_start:x_end] + + # Create panel dictionary in expected format + panel_dict = { + 'image': panel_image, + 'bounds': (x_start, y_start, x_end - x_start, y_end - y_start), + 'confidence': 1.0, # High confidence for simple even division + 'method': 'simple_even_division' + } + splits.append(panel_dict) + + if self.debug: + print(f"SimplePanelSplitter: Extracted panel {panel_index + 1} " + f"at ({x_start},{y_start}) to ({x_end},{y_end})") + + panel_index += 1 + + if panel_index >= panel_count: + break + + if self.debug: + print(f"SimplePanelSplitter: Successfully split into {len(splits)} panels") + + return splits + + def cleanup_split_files(self, splits: List[Dict]) -> None: + """ + Clean up method for compatibility - simple splitter doesn't create files. + + Args: + splits: List of split panel dictionaries (no cleanup needed) + """ + if self.debug: + print(f"SimplePanelSplitter: No cleanup needed - {len(splits)} panels processed in memory") \ No newline at end of file diff --git a/test_14_panel_split.py b/test_14_panel_split.py new file mode 100644 index 0000000..2fbd25c --- /dev/null +++ b/test_14_panel_split.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +""" +Test script to verify multi-panel layout splitting functionality +Tests the panel splitting with 6786505.jpg (horizontal strip layout) +""" + +import sys +import os +import cv2 +import numpy as np +from pathlib import Path +from panel_splitter import PanelSplitter + +def test_multi_panel_splitting(): + """Test splitting the multi-panel layout image""" + print("=" * 60) + print("TESTING MULTI-PANEL LAYOUT SPLITTING") + print("=" * 60) + + # Initialize splitter with debug mode + splitter = PanelSplitter(debug=True) + + # Target layout file - this is a horizontal strip with many panels + layout_path = Path("layouts") / "6786505.jpg" + + if not layout_path.exists(): + print(f"❌ ERROR: Layout file {layout_path} not found!") + return False + + print(f"📁 Testing with: {layout_path.name}") + + # Load and examine the image + image = cv2.imread(str(layout_path)) + if image is None: + print(f"❌ ERROR: Could not load image {layout_path}") + return False + + height, width = image.shape[:2] + print(f"📐 Image dimensions: {width}x{height}") + + # Target panel count - let's first see how many OpenAI detects, then use that as target + print("🤖 Getting OpenAI panel count first...") + try: + from openai_detector import OpenAIImageDetector + temp_detector = OpenAIImageDetector() + temp_detector.load_master_images() + panel_result = temp_detector.count_panels_in_layout(str(layout_path)) + target_count = panel_result.get('panel_count', 10) + print(f"🎯 OpenAI detected {target_count} panels - using this as target") + except: + target_count = 10 # fallback + print(f"🎯 Using fallback target count: {target_count}") + + print("\n" + "─" * 40) + print("RUNNING PANEL SPLITTING...") + print("─" * 40) + + # Split the layout + splits = splitter.split_panels(str(layout_path), target_count) + + print(f"\n📊 SPLITTING RESULTS:") + print(f"Generated {len(splits)} splits (target: {target_count})") + + # Verify results + success = len(splits) == target_count + + if success: + print(f"✅ SUCCESS: Generated exactly {target_count} splits!") + else: + print(f"⚠️ WARNING: Generated {len(splits)} splits instead of {target_count}") + + print("\n📋 DETAILED SPLIT ANALYSIS:") + print("─" * 40) + + total_area = 0 + original_area = width * height + + for i, split in enumerate(splits): + x, y, w, h = split['bounds'] + area = w * h + total_area += area + + print(f"Split {i+1:2d}: [{x:4d}, {y:4d}, {w:4d}, {h:4d}] " + f"area={area:6d} conf={split['confidence']:.3f}") + + # Verify split bounds are reasonable + if w < 20 or h < 20: + print(f" ⚠️ Split {i+1} is very small!") + if x < 0 or y < 0 or x+w > width or y+h > height: + print(f" ❌ Split {i+1} bounds are out of image!") + + # Calculate coverage + coverage = (total_area / original_area) * 100 + print(f"\n📈 Coverage: {coverage:.1f}% of original image") + + if coverage < 80: + print("⚠️ Low coverage - some areas might be missed") + elif coverage > 120: + print("⚠️ High coverage - splits might be overlapping") + + # Check for overlaps + print("\n🔍 CHECKING FOR OVERLAPS:") + overlaps = 0 + for i, split1 in enumerate(splits): + for j, split2 in enumerate(splits[i+1:], i+1): + if rectangles_overlap(split1['bounds'], split2['bounds']): + overlaps += 1 + print(f" ⚠️ Splits {i+1} and {j+1} overlap!") + + if overlaps == 0: + print(" ✅ No overlaps detected") + else: + print(f" ❌ Found {overlaps} overlapping pairs") + + # Save individual split images for inspection + print("\n💾 SAVING SPLIT IMAGES:") + splits_dir = Path("test_splits") + splits_dir.mkdir(exist_ok=True) + + for i, split in enumerate(splits): + split_filename = splits_dir / f"6786505_split_{i+1:02d}.jpg" + cv2.imwrite(str(split_filename), split['image']) + print(f" Saved: {split_filename}") + + print(f"\n📁 All split images saved to: {splits_dir}/") + + # Method analysis + print("\n🔬 METHOD ANALYSIS:") + method_votes = {} + for split in splits: + for method in split.get('method_votes', []): + method_votes[method] = method_votes.get(method, 0) + 1 + + for method, count in sorted(method_votes.items(), key=lambda x: x[1], reverse=True): + print(f" {method}: {count} votes") + + print("\n" + "=" * 60) + if success: + print(f"🎉 TEST PASSED: Successfully split {target_count}-panel layout!") + else: + print(f"❌ TEST FAILED: Did not produce exactly {target_count} splits") + print("=" * 60) + + return success + +def rectangles_overlap(rect1, rect2): + """Check if two rectangles overlap""" + x1, y1, w1, h1 = rect1 + x2, y2, w2, h2 = rect2 + + return not (x1 + w1 <= x2 or x2 + w2 <= x1 or y1 + h1 <= y2 or y2 + h2 <= y1) + +def test_with_openai_guidance(): + """Test splitting with OpenAI panel count guidance""" + print("\n" + "=" * 60) + print("TESTING WITH OPENAI PANEL COUNT GUIDANCE") + print("=" * 60) + + try: + from openai_detector import OpenAIImageDetector + + # Initialize OpenAI detector + detector = OpenAIImageDetector(split_mode=True) + detector.load_master_images() + + layout_path = str(Path("layouts") / "6786505.jpg") + + print("🤖 Getting OpenAI panel count...") + panel_result = detector.count_panels_in_layout(layout_path) + openai_count = panel_result.get('panel_count', 1) + confidence = panel_result.get('confidence', 'unknown') + + print(f"OpenAI detected: {openai_count} panels (confidence: {confidence})") + + # Test full split_layout_and_match functionality + print("\n🔄 Testing full split_layout_and_match...") + master_ids = list(detector.master_images.keys())[:10] # Test with first 10 masters + + result = detector.splitter.split_layout_and_match(layout_path, master_ids, detector) + + print(f"Split and match result:") + print(f" Splits generated: {result.get('splits_generated', 0)}") + print(f" Panel count: {result.get('panel_count', 'unknown')}") + print(f" Detected masters: {len(result.get('detected_masters', []))}") + + return openai_count >= 5 # Success if we detect at least 5 panels + + except Exception as e: + print(f"❌ Error testing with OpenAI guidance: {e}") + return False + +def main(): + """Main test function""" + print("🧪 STARTING MULTI-PANEL LAYOUT SPLITTING TEST") + + # Test basic splitting + basic_success = test_multi_panel_splitting() + + # Test with OpenAI guidance + openai_success = test_with_openai_guidance() + + print(f"\n📊 FINAL RESULTS:") + print(f"Basic splitting: {'✅ PASSED' if basic_success else '❌ FAILED'}") + print(f"OpenAI guidance: {'✅ PASSED' if openai_success else '❌ FAILED'}") + + if basic_success and openai_success: + print("\n🎉 ALL TESTS PASSED! The multi-panel splitting is working correctly.") + return 0 + else: + print("\n❌ Some tests failed. Please check the output above.") + return 1 + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/test_6786505_cli.py b/test_6786505_cli.py new file mode 100644 index 0000000..96171b4 --- /dev/null +++ b/test_6786505_cli.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +""" +Test script to specifically process 6786505.jpg with the CLI +""" + +import subprocess +import json +import sys +from pathlib import Path + +def test_6786505_processing(): + """Test processing 6786505.jpg with the CLI""" + print("=" * 60) + print("TESTING 6786505.jpg PROCESSING WITH CLI") + print("=" * 60) + + # First backup the layouts directory + layouts_dir = Path("layouts") + all_layouts = list(layouts_dir.glob("*.jpg")) + + # Temporarily remove other layouts so --test processes only 6786505.jpg + temp_dir = Path("temp_layouts") + temp_dir.mkdir(exist_ok=True) + + # Move all layouts except 6786505.jpg to temp directory + for layout in all_layouts: + if layout.name != "6786505.jpg": + layout.rename(temp_dir / layout.name) + + try: + # Run the CLI with split mode + print("Running CLI with split mode...") + result = subprocess.run([ + "python", "cli.py", + "--test", "--split", "--openai" + ], + capture_output=True, text=True, cwd=".", + env={"PATH": "/Users/michael.clervi/Documents/projects/master_adapt_detect/venv/bin:/usr/local/bin:/usr/bin:/bin"} + ) + + print("CLI Output:") + print(result.stdout) + if result.stderr: + print("CLI Errors:") + print(result.stderr) + + # Check if results file was created + results_file = Path("results/test_results_openai_split.json") + if results_file.exists(): + with open(results_file, 'r') as f: + results = json.load(f) + + print(f"\n📊 Results Analysis:") + print(f"Total layouts processed: {results.get('total_layouts', 0)}") + + # Check the layout results + layout_results = results.get('layout_results', {}) + for layout_name, layout_data in layout_results.items(): + print(f"\n📁 Layout: {layout_name}") + print(f" Panel count: {layout_data.get('panel_count', 'unknown')}") + print(f" Splits generated: {layout_data.get('splits_generated', 'unknown')}") + print(f" Detected masters: {len(layout_data.get('detected_masters', []))}") + + # Check if we got 14 panels + if layout_data.get('panel_count') == 14: + print(f" ✅ SUCCESS: Correctly detected 14 panels!") + return True + else: + print(f" ❌ Expected 14 panels, got {layout_data.get('panel_count')}") + return False + else: + print("❌ Results file not found") + return False + + finally: + # Restore all layouts + for layout in temp_dir.glob("*.jpg"): + layout.rename(layouts_dir / layout.name) + temp_dir.rmdir() + + return False + +if __name__ == "__main__": + success = test_6786505_processing() + sys.exit(0 if success else 1) \ No newline at end of file diff --git a/test_cost_calculator.py b/test_cost_calculator.py new file mode 100644 index 0000000..10fa1fa --- /dev/null +++ b/test_cost_calculator.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +""" +Test script for the cost calculator functionality +""" + +import sys +import os +from pathlib import Path + +# Add current directory to path so we can import our modules +sys.path.insert(0, str(Path(__file__).parent)) + +from cost_calculator import CostCalculator, TokenUsage, ApiCallCost, extract_token_usage_from_response + +def test_cost_calculator(): + """Test the cost calculator functionality""" + print("Testing Cost Calculator...") + + # Test 1: Basic cost calculation + print("\n1. Testing basic cost calculation:") + calc = CostCalculator(enable_tracking=True) + + # Test cost calculation with sample token usage + input_cost, output_cost, cached_cost, total_cost = calc.calculate_cost( + prompt_tokens=1500, + completion_tokens=800, + cached_tokens=200 + ) + + print(f" Input tokens (1500): ${input_cost:.4f}") + print(f" Output tokens (800): ${output_cost:.4f}") + print(f" Cached tokens (200): ${cached_cost:.4f}") + print(f" Total cost: ${total_cost:.4f}") + + # Test 2: API call tracking + print("\n2. Testing API call tracking:") + + # Simulate multiple API calls + calc.track_api_call( + operation_type="panel_counting_censorship", + prompt_tokens=1500, + completion_tokens=800, + cached_tokens=200, + layout_name="test_layout_1.jpg" + ) + + calc.track_api_call( + operation_type="detection", + prompt_tokens=2000, + completion_tokens=1200, + cached_tokens=0, + layout_name="test_layout_2.jpg" + ) + + calc.track_api_call( + operation_type="one_at_a_time_detection", + prompt_tokens=800, + completion_tokens=400, + cached_tokens=100, + layout_name="test_layout_3.jpg", + master_id="1011A_1011_05" + ) + + print(f" Tracked {len(calc.api_calls)} API calls") + print(f" Total cost so far: ${calc.total_cost:.4f}") + + # Test 3: Layout cost breakdown + print("\n3. Testing layout cost breakdown:") + + breakdown = calc.get_layout_cost_breakdown("test_layout_1.jpg") + if breakdown: + print(f" Layout: {breakdown['layout_name']}") + print(f" Total cost: ${breakdown['total_cost']:.4f}") + print(f" Input tokens: {breakdown['cost_breakdown']['input_tokens']}") + print(f" Output tokens: {breakdown['cost_breakdown']['output_tokens']}") + print(f" API calls: {breakdown['cost_breakdown']['api_calls_made']}") + + # Test 4: Session summary + print("\n4. Testing session summary:") + + summary = calc.get_session_summary() + if summary['tracking_enabled']: + print(f" Total cost: ${summary['session_totals']['total_cost']:.4f}") + print(f" Total tokens: {summary['session_totals']['total_input_tokens'] + summary['session_totals']['total_output_tokens']:,}") + print(f" Layouts processed: {summary['session_totals']['layouts_processed']}") + print(f" Avg cost per layout: ${summary['averages']['cost_per_layout']:.4f}") + + # Test 5: Monthly cost estimation + print("\n5. Testing monthly cost estimation:") + + estimate = calc.estimate_monthly_cost(300) + if 'error' not in estimate: + print(f" Based on {estimate['based_on_layouts']} layouts:") + print(f" Average cost per layout: ${estimate['average_cost_per_layout']:.4f}") + print(f" Monthly estimate (300 layouts): ${estimate['estimated_monthly_cost']:.2f}") + print(f" Annual estimate: ${estimate['estimated_annual_cost']:.2f}") + + # Test 6: Cost report generation + print("\n6. Testing cost report generation:") + + report_file = calc.save_cost_report("test_cost_report") + if report_file: + print(f" Cost report saved to: {report_file}") + + # Test 7: Print cost summary + print("\n7. Testing cost summary output:") + calc.print_cost_summary() + + print("\nCost calculator test completed successfully!") + +def test_token_usage(): + """Test the TokenUsage data class""" + print("\nTesting TokenUsage data class...") + + # Test valid token usage + usage = TokenUsage( + prompt_tokens=1500, + completion_tokens=800, + total_tokens=2300, + cached_tokens=200 + ) + + print(f" Prompt tokens: {usage.prompt_tokens}") + print(f" Completion tokens: {usage.completion_tokens}") + print(f" Total tokens: {usage.total_tokens}") + print(f" Cached tokens: {usage.cached_tokens}") + + # Test token usage validation + try: + invalid_usage = TokenUsage( + prompt_tokens=1500, + completion_tokens=800, + total_tokens=2000, # Should be 2300 + cached_tokens=200 + ) + print(" ERROR: Should have raised ValueError for invalid total") + except ValueError as e: + print(f" ✓ Correctly caught validation error: {e}") + +def test_disabled_tracking(): + """Test cost calculator with tracking disabled""" + print("\nTesting disabled cost tracking...") + + calc = CostCalculator(enable_tracking=False) + + # All operations should return zeros or empty results + input_cost, output_cost, cached_cost, total_cost = calc.calculate_cost(1500, 800, 200) + print(f" Cost calculation (disabled): ${total_cost:.4f}") + + api_call = calc.track_api_call("test", 1500, 800, 200, "test.jpg") + print(f" API call tracking (disabled): ${api_call.total_cost:.4f}") + + summary = calc.get_session_summary() + print(f" Session summary (disabled): {summary['tracking_enabled']}") + +if __name__ == "__main__": + try: + test_cost_calculator() + test_token_usage() + test_disabled_tracking() + print("\n✅ All tests passed!") + except Exception as e: + print(f"\n❌ Test failed: {e}") + import traceback + traceback.print_exc() + sys.exit(1) \ No newline at end of file diff --git a/test_cost_tracking_integration.py b/test_cost_tracking_integration.py new file mode 100644 index 0000000..8018d58 --- /dev/null +++ b/test_cost_tracking_integration.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +""" +Test script to demonstrate cost tracking functionality +""" + +import subprocess +import sys +from pathlib import Path + +def run_cost_tracking_test(): + """Run a test with cost tracking enabled""" + print("Testing cost tracking with hybrid mode...") + print("=" * 60) + + # Test command with cost tracking enabled + cmd = [ + sys.executable, "cli.py", + "--test", + "--hybrid", + "--enable-cost-tracking", + "--cost-report" + ] + + print(f"Running command: {' '.join(cmd)}") + print("-" * 60) + + try: + result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path(__file__).parent) + + print("STDOUT:") + print(result.stdout) + + if result.stderr: + print("\nSTDERR:") + print(result.stderr) + + print(f"\nReturn code: {result.returncode}") + + # Check if cost report was generated + cost_reports = list(Path("results").glob("cost_report_*.json")) + if cost_reports: + print(f"\n✅ Cost report generated: {cost_reports[-1]}") + else: + print("\n❌ No cost report found") + + except Exception as e: + print(f"❌ Error running test: {e}") + +def run_without_cost_tracking(): + """Run a test without cost tracking for comparison""" + print("\nTesting without cost tracking...") + print("=" * 60) + + # Test command without cost tracking + cmd = [ + sys.executable, "cli.py", + "--test", + "--hybrid" + ] + + print(f"Running command: {' '.join(cmd)}") + print("-" * 60) + + try: + result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path(__file__).parent) + + print("STDOUT:") + print(result.stdout) + + if result.stderr: + print("\nSTDERR:") + print(result.stderr) + + print(f"\nReturn code: {result.returncode}") + + except Exception as e: + print(f"❌ Error running test: {e}") + +if __name__ == "__main__": + print("Cost Tracking Integration Test") + print("=" * 60) + + # Test 1: With cost tracking + run_cost_tracking_test() + + # Test 2: Without cost tracking + run_without_cost_tracking() + + print("\n" + "=" * 60) + print("Integration test completed!") + print("=" * 60) \ No newline at end of file diff --git a/test_horizontal_splitting.py b/test_horizontal_splitting.py new file mode 100644 index 0000000..d5076f9 --- /dev/null +++ b/test_horizontal_splitting.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +import cv2 +import numpy as np +from pathlib import Path +import os + +def analyze_horizontal_panels(image_path: str, debug=True): + """Analyze horizontal panel structure and test splitting parameters""" + print(f"Analyzing {Path(image_path).name}") + + # Load image + img = cv2.imread(image_path) + height, width = img.shape[:2] + print(f"Image dimensions: {width}x{height}") + + # Convert to grayscale + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + # Method 1: Simple equal division + print("\n=== Method 1: Equal Division ===") + equal_panels = [] + for num_panels in [8, 10, 12]: + panel_width = width // num_panels + panels = [] + for i in range(num_panels): + x1 = i * panel_width + x2 = (i + 1) * panel_width if i < num_panels - 1 else width + panels.append((x1, 0, x2, height)) + equal_panels.append((num_panels, panels)) + print(f" {num_panels} panels: widths = {[x2-x1 for x1, y1, x2, y2 in panels]}") + + # Method 2: Edge-based detection + print("\n=== Method 2: Edge Detection ===") + edges = cv2.Canny(gray, 50, 150, apertureSize=3) + + # Create vertical line detection kernel + vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, height // 4)) + vertical_lines = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, vertical_kernel) + + # Get vertical projection + vertical_projection = np.sum(vertical_lines, axis=0) + + # Find peaks for different thresholds + for threshold_factor in [0.3, 0.4, 0.5, 0.6, 0.7]: + threshold = np.max(vertical_projection) * threshold_factor + separators = [] + + for x in range(width): + if vertical_projection[x] > threshold: + # Ensure minimum distance between separators + if not separators or x - separators[-1] > 30: + separators.append(x) + + # Create panels from separators + x_boundaries = [0] + separators + [width] + x_boundaries = sorted(list(set(x_boundaries))) + + panels = [] + for i in range(len(x_boundaries) - 1): + x1, x2 = x_boundaries[i], x_boundaries[i + 1] + if x2 - x1 > 50: # Minimum panel width + panels.append((x1, 0, x2, height)) + + print(f" Threshold {threshold_factor}: {len(panels)} panels, widths = {[x2-x1 for x1, y1, x2, y2 in panels]}") + + # Method 3: Adaptive histogram analysis + print("\n=== Method 3: Histogram Analysis ===") + + # Calculate horizontal histogram (sum of pixel intensities) + horizontal_hist = np.sum(gray, axis=0) + + # Find local minima (potential separators) + from scipy.signal import find_peaks + + # Invert histogram to find valleys (separators) + inverted_hist = np.max(horizontal_hist) - horizontal_hist + + # Find peaks in inverted histogram (valleys in original) + for min_distance in [50, 80, 100, 120]: + peaks, _ = find_peaks(inverted_hist, distance=min_distance, height=np.max(inverted_hist) * 0.1) + + # Create panels + x_boundaries = [0] + list(peaks) + [width] + x_boundaries = sorted(list(set(x_boundaries))) + + panels = [] + for i in range(len(x_boundaries) - 1): + x1, x2 = x_boundaries[i], x_boundaries[i + 1] + if x2 - x1 > 50: + panels.append((x1, 0, x2, height)) + + print(f" Min distance {min_distance}: {len(panels)} panels, widths = {[x2-x1 for x1, y1, x2, y2 in panels]}") + + if debug: + # Save debug images + debug_dir = Path("debug_splitting") + debug_dir.mkdir(exist_ok=True) + + # Save edge detection result + cv2.imwrite(str(debug_dir / f"{Path(image_path).stem}_edges.jpg"), edges) + cv2.imwrite(str(debug_dir / f"{Path(image_path).stem}_vertical_lines.jpg"), vertical_lines) + + # Save histogram visualization + import matplotlib.pyplot as plt + plt.figure(figsize=(15, 5)) + plt.plot(horizontal_hist) + plt.title("Horizontal Histogram") + plt.savefig(debug_dir / f"{Path(image_path).stem}_histogram.png") + plt.close() + + # Save inverted histogram with peaks + plt.figure(figsize=(15, 5)) + plt.plot(inverted_hist) + peaks, _ = find_peaks(inverted_hist, distance=100, height=np.max(inverted_hist) * 0.1) + plt.scatter(peaks, inverted_hist[peaks], color='red', zorder=5) + plt.title("Inverted Histogram with Detected Peaks") + plt.savefig(debug_dir / f"{Path(image_path).stem}_peaks.png") + plt.close() + + print(f"\nDebug images saved to {debug_dir}/") + + return equal_panels + +def test_best_method(image_path: str, expected_panels: int): + """Test the best splitting method for accurate panel detection""" + print(f"\n=== Testing Best Method for {expected_panels} expected panels ===") + + img = cv2.imread(image_path) + height, width = img.shape[:2] + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + # Combination approach: Use histogram analysis with refinement + horizontal_hist = np.sum(gray, axis=0) + inverted_hist = np.max(horizontal_hist) - horizontal_hist + + # Smooth the inverted histogram to reduce noise + from scipy.ndimage import gaussian_filter1d + smoothed_hist = gaussian_filter1d(inverted_hist, sigma=10) + + # Find peaks with optimized parameters + from scipy.signal import find_peaks + min_distance = width // (expected_panels * 1.5) # Adaptive distance based on expected panels + peaks, properties = find_peaks(smoothed_hist, + distance=min_distance, + height=np.max(smoothed_hist) * 0.15, + prominence=np.max(smoothed_hist) * 0.1) + + print(f"Found {len(peaks)} separator peaks with min_distance={min_distance}") + + # Create panels + x_boundaries = [0] + list(peaks) + [width] + x_boundaries = sorted(list(set(x_boundaries))) + + panels = [] + for i in range(len(x_boundaries) - 1): + x1, x2 = x_boundaries[i], x_boundaries[i + 1] + if x2 - x1 > 50: # Minimum reasonable panel width + panels.append((x1, 0, x2, height)) + + print(f"Generated {len(panels)} panels:") + for i, (x1, y1, x2, y2) in enumerate(panels): + print(f" Panel {i+1}: x={x1}-{x2} (width={x2-x1})") + + # Save crops for visual verification + crops_dir = Path("test_crops") + crops_dir.mkdir(exist_ok=True) + + for i, (x1, y1, x2, y2) in enumerate(panels): + crop = img[y1:y2, x1:x2] + crop_path = crops_dir / f"{Path(image_path).stem}_panel_{i+1:02d}.jpg" + cv2.imwrite(str(crop_path), crop) + + print(f"\nCrops saved to {crops_dir}/ - Check these to verify panel accuracy!") + + return panels + +if __name__ == "__main__": + # Test on the multi-panel layout + test_image = "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6786500.jpg" + + if os.path.exists(test_image): + print("Analyzing horizontal panel structure...") + analyze_horizontal_panels(test_image, debug=True) + + print("\n" + "="*60) + test_best_method(test_image, expected_panels=10) + else: + print(f"Test image not found: {test_image}") \ No newline at end of file diff --git a/test_hybrid.py b/test_hybrid.py new file mode 100644 index 0000000..3e8804a --- /dev/null +++ b/test_hybrid.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +""" +Test script for hybrid detection implementation +""" + +import sys +import os +from pathlib import Path + +# Add current directory to Python path +sys.path.insert(0, str(Path(__file__).parent)) + +def test_hybrid_import(): + """Test that hybrid detector can be imported""" + try: + from hybrid_detector import HybridImageDetector + print("✓ Successfully imported HybridImageDetector") + return True + except ImportError as e: + print(f"✗ Failed to import HybridImageDetector: {e}") + return False + +def test_hybrid_initialization(): + """Test hybrid detector initialization""" + try: + from hybrid_detector import HybridImageDetector + + # Test with default settings + detector = HybridImageDetector() + print("✓ Successfully initialized HybridImageDetector with defaults") + + # Test with custom settings + detector2 = HybridImageDetector( + panel_threshold=3, + inlier_threshold=0.7, + enable_greyscale=True, + enable_contrast_enhancement=True + ) + print("✓ Successfully initialized HybridImageDetector with custom settings") + + # Check attributes + assert detector.panel_threshold == 2 + assert detector.inlier_threshold == 0.65 + assert detector.enable_greyscale == False + assert detector.enable_contrast_enhancement == False + + assert detector2.panel_threshold == 3 + assert detector2.inlier_threshold == 0.7 + assert detector2.enable_greyscale == True + assert detector2.enable_contrast_enhancement == True + + print("✓ All attributes set correctly") + return True + + except Exception as e: + print(f"✗ Failed to initialize HybridImageDetector: {e}") + return False + +def test_required_files(): + """Test that required files exist""" + required_files = [ + "layouts/", + "master_images/", + "openai_detector.py", + "hybrid_detector.py" + ] + + missing_files = [] + for file_path in required_files: + if not os.path.exists(file_path): + missing_files.append(file_path) + + if missing_files: + print(f"✗ Missing required files: {missing_files}") + return False + else: + print("✓ All required files exist") + return True + +def test_cli_help(): + """Test CLI help includes hybrid mode""" + try: + import subprocess + result = subprocess.run([sys.executable, "cli.py", "--help"], + capture_output=True, text=True) + + if "--hybrid" in result.stdout: + print("✓ CLI help includes --hybrid flag") + return True + else: + print("✗ CLI help does not include --hybrid flag") + return False + + except Exception as e: + print(f"✗ Failed to test CLI help: {e}") + return False + +def main(): + """Run all tests""" + print("Testing Hybrid Detection Implementation") + print("=" * 50) + + tests = [ + ("Import Test", test_hybrid_import), + ("Initialization Test", test_hybrid_initialization), + ("Required Files Test", test_required_files), + ("CLI Help Test", test_cli_help) + ] + + passed = 0 + total = len(tests) + + for test_name, test_func in tests: + print(f"\n{test_name}:") + try: + if test_func(): + passed += 1 + except Exception as e: + print(f"✗ {test_name} failed with exception: {e}") + + print(f"\n{'=' * 50}") + print(f"Test Results: {passed}/{total} tests passed") + + if passed == total: + print("🎉 All tests passed! Hybrid implementation is ready.") + return 0 + else: + print("❌ Some tests failed. Please check the implementation.") + return 1 + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/test_memory_fix.py b/test_memory_fix.py new file mode 100644 index 0000000..c5c0f66 --- /dev/null +++ b/test_memory_fix.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +""" +Test script to verify memory management fixes +""" + +import sys +from memory_manager import MemoryManager, reduce_feature_count +from hybrid_detector import HybridImageDetector + +def test_memory_manager(): + """Test memory manager functionality""" + print("Testing Memory Manager...") + + mm = MemoryManager(max_memory_percent=75, max_swap_percent=30) + + # Test memory usage reporting + usage = mm.get_memory_usage() + print(f"Current memory usage: {usage['memory_percent']:.1f}%") + print(f"Current swap usage: {usage['swap_percent']:.1f}%") + print(f"Available memory: {usage['memory_available_gb']:.1f} GB") + + # Test that swap usage doesn't block processing + print(f"\nTesting is_memory_safe with swap usage {usage['swap_percent']:.1f}%:") + is_safe = mm.is_memory_safe() + print(f"Memory safe: {is_safe} (should be True if RAM < 75%, regardless of swap)") + + # Test concurrent process limiting + safe_processes = mm.limit_concurrent_processes() + print(f"Safe concurrent processes: {safe_processes}") + + print("Memory Manager test completed ✓") + +def test_hybrid_detector_memory_settings(): + """Test hybrid detector memory settings""" + print("\nTesting Hybrid Detector Memory Settings...") + + try: + detector = HybridImageDetector( + panel_threshold=2, + inlier_threshold=0.65, + local_workers=4 # Reduced for testing + ) + + print(f"Memory manager initialized: {detector.memory_manager is not None}") + print(f"Max memory percent: {detector.memory_manager.max_memory_percent}%") + print(f"Max swap percent: {detector.memory_manager.max_swap_percent}%") + + print("Hybrid Detector memory settings test completed ✓") + + except Exception as e: + print(f"Error testing hybrid detector: {e}") + return False + + return True + +def test_feature_reduction(): + """Test feature reduction functionality""" + print("\nTesting Feature Reduction...") + + # Mock features (normally cv2.KeyPoint objects) + class MockFeature: + def __init__(self, response): + self.response = response + + # Create mock features + features = [MockFeature(i) for i in range(15000)] + print(f"Original feature count: {len(features)}") + + # Test reduction + reduced = reduce_feature_count(features, max_features=10000) + print(f"Reduced feature count: {len(reduced)}") + + # Should keep the best features (highest response values) + if len(reduced) == 10000: + print("Feature reduction test completed ✓") + return True + else: + print("Feature reduction test failed ✗") + return False + +if __name__ == "__main__": + print("="*60) + print("MEMORY MANAGEMENT TEST SUITE") + print("="*60) + + success = True + + # Test 1: Memory Manager + try: + test_memory_manager() + except Exception as e: + print(f"Memory Manager test failed: {e}") + success = False + + # Test 2: Hybrid Detector + try: + success &= test_hybrid_detector_memory_settings() + except Exception as e: + print(f"Hybrid Detector test failed: {e}") + success = False + + # Test 3: Feature Reduction + try: + success &= test_feature_reduction() + except Exception as e: + print(f"Feature Reduction test failed: {e}") + success = False + + print("\n" + "="*60) + if success: + print("✓ ALL TESTS PASSED - Memory management is working") + print("The system should now be protected against memory crashes.") + else: + print("✗ SOME TESTS FAILED - Check the errors above") + print("="*60) \ No newline at end of file diff --git a/test_one_at_a_time_cost_tracking.py b/test_one_at_a_time_cost_tracking.py new file mode 100644 index 0000000..59367dc --- /dev/null +++ b/test_one_at_a_time_cost_tracking.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +""" +Test script to demonstrate one-at-a-time cost tracking functionality +""" + +import subprocess +import sys +import json +from pathlib import Path + +def run_one_at_a_time_test(): + """Run a test with one-at-a-time mode and cost tracking""" + print("Testing one-at-a-time mode with cost tracking...") + print("=" * 60) + + # Test command with one-at-a-time mode and cost tracking + cmd = [ + sys.executable, "cli.py", + "--test", + "--openai", + "--one-at-a-time", + "--concurrent-workers", "3", # Lower concurrency for testing + "--enable-cost-tracking", + "--cost-report" + ] + + print(f"Running command: {' '.join(cmd)}") + print("This will make 41 separate API calls (one per master image)") + print("-" * 60) + + try: + result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path(__file__).parent) + + print("STDOUT:") + print(result.stdout) + + if result.stderr: + print("\nSTDERR:") + print(result.stderr) + + print(f"\nReturn code: {result.returncode}") + + # Check if cost report was generated and analyze it + cost_reports = list(Path("results").glob("cost_report_*.json")) + if cost_reports: + latest_report = cost_reports[-1] + print(f"\n✅ Cost report generated: {latest_report}") + + # Analyze the cost report + try: + with open(latest_report, 'r') as f: + cost_data = json.load(f) + + session_summary = cost_data.get('session_summary', {}) + if session_summary.get('tracking_enabled'): + totals = session_summary.get('session_totals', {}) + operation_breakdown = session_summary.get('operation_breakdown', {}) + + print(f"\n📊 Cost Analysis:") + print(f" Total cost: ${totals.get('total_cost', 0):.4f}") + print(f" Total API calls: {totals.get('total_api_calls', 0)}") + print(f" Total tokens: {totals.get('total_input_tokens', 0) + totals.get('total_output_tokens', 0):,}") + + if operation_breakdown: + print(f"\n🔍 Operation Breakdown:") + for op_type, count in operation_breakdown.items(): + print(f" {op_type}: {count} calls") + + # Check for one-at-a-time detection calls + one_at_a_time_calls = operation_breakdown.get('one_at_a_time_detection', 0) + if one_at_a_time_calls > 0: + print(f"\n✅ One-at-a-time cost tracking working: {one_at_a_time_calls} individual API calls tracked") + else: + print(f"\n❌ One-at-a-time cost tracking not working: No individual API calls found") + + except Exception as e: + print(f"❌ Error analyzing cost report: {e}") + else: + print("\n❌ No cost report found") + + except Exception as e: + print(f"❌ Error running test: {e}") + +def run_hybrid_comparison(): + """Run hybrid mode for comparison""" + print("\n\nTesting hybrid mode for cost comparison...") + print("=" * 60) + + # Test hybrid mode with cost tracking + cmd = [ + sys.executable, "cli.py", + "--test", + "--hybrid", + "--enable-cost-tracking" + ] + + print(f"Running command: {' '.join(cmd)}") + print("This will make 1 API call (panel counting + censorship)") + print("-" * 60) + + try: + result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path(__file__).parent) + + print("STDOUT:") + print(result.stdout) + + if result.stderr: + print("\nSTDERR:") + print(result.stderr) + + print(f"\nReturn code: {result.returncode}") + + except Exception as e: + print(f"❌ Error running test: {e}") + +if __name__ == "__main__": + print("One-at-a-Time Cost Tracking Test") + print("=" * 60) + + print("This test will demonstrate the cost difference between:") + print("1. One-at-a-time mode: 41 API calls (one per master)") + print("2. Hybrid mode: 1 API call (panel counting only)") + print() + + # Test 1: One-at-a-time mode with cost tracking + run_one_at_a_time_test() + + # Test 2: Hybrid mode for comparison + run_hybrid_comparison() + + print("\n" + "=" * 60) + print("Cost comparison test completed!") + print("=" * 60) + print("\n💡 Key takeaways:") + print("- One-at-a-time mode: High accuracy, high cost (41 API calls)") + print("- Hybrid mode: Good accuracy, low cost (1 API call)") + print("- Cost tracking shows the exact difference in API usage") + print("=" * 60) \ No newline at end of file diff --git a/test_optimized_canny.py b/test_optimized_canny.py new file mode 100644 index 0000000..452ced6 --- /dev/null +++ b/test_optimized_canny.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +""" +Test script for optimized Canny detection method +""" + +import sys +import os +import cv2 +import numpy as np +from pathlib import Path +from panel_splitter import PanelSplitter + +def test_14_panel_splitting(): + """Test 14-panel splitting with optimized Canny detection""" + print("=" * 60) + print("TESTING 14-PANEL SPLITTING WITH OPTIMIZED CANNY") + print("=" * 60) + + # Initialize splitter with debug mode + splitter = PanelSplitter(debug=True) + + # Target layout file + layout_path = Path("layouts") / "6786505.jpg" + + if not layout_path.exists(): + print(f"❌ ERROR: Layout file {layout_path} not found!") + return False + + print(f"📁 Testing with: {layout_path.name}") + + # Load and examine the image + image = cv2.imread(str(layout_path)) + if image is None: + print(f"❌ ERROR: Could not load image {layout_path}") + return False + + height, width = image.shape[:2] + print(f"📐 Image dimensions: {width}x{height}") + + # Test with target count 14 + target_count = 14 + print(f"\n🎯 Testing with target count: {target_count}") + print("─" * 40) + + # Split the layout + splits = splitter.split_panels(str(layout_path), target_count) + + print(f"\n📊 RESULTS:") + print(f"Generated {len(splits)} splits (target: {target_count})") + + # Check if we got exactly 14 panels + success = len(splits) == target_count + + if success: + print(f"✅ SUCCESS: Generated exactly {target_count} splits!") + else: + print(f"❌ FAILURE: Generated {len(splits)} splits instead of {target_count}") + + # Save split images + if len(splits) > 0: + splits_dir = Path("test_splits") + splits_dir.mkdir(exist_ok=True) + + for i, split in enumerate(splits): + split_filename = splits_dir / f"6786505_14panel_split_{i+1:02d}.jpg" + cv2.imwrite(str(split_filename), split['image']) + + print(f"\n💾 Saved {len(splits)} split images to test_splits/") + + # Show split details + print("\n📋 Split details:") + for i, split in enumerate(splits): + x, y, w, h = split['bounds'] + print(f" Split {i+1:2d}: [{x:4d}, {y:4d}, {w:4d}, {h:4d}] conf={split['confidence']:.3f}") + + print("\n" + "=" * 60) + if success: + print("🎉 TEST PASSED: 14-panel splitting is working!") + else: + print("❌ TEST FAILED: 14-panel splitting needs adjustment") + print("=" * 60) + + return success + +def main(): + """Main test function""" + print("🧪 STARTING OPTIMIZED CANNY 14-PANEL TEST") + + success = test_14_panel_splitting() + + if success: + print("\n🎉 SUCCESS: Optimized Canny detection produces exactly 14 panels!") + return 0 + else: + print("\n❌ FAILURE: Optimized Canny detection needs further tuning") + return 1 + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/test_panel_accuracy.py b/test_panel_accuracy.py new file mode 100644 index 0000000..04ba5e2 --- /dev/null +++ b/test_panel_accuracy.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 +""" +Test horizontal splitting accuracy on different panel types +""" +import cv2 +import numpy as np +from pathlib import Path +import os + +def test_splitting_accuracy(image_path: str, expected_panels: int, layout_type: str): + """Test horizontal splitting accuracy for a specific layout""" + print(f"\n{'='*60}") + print(f"Testing {layout_type}: {Path(image_path).name}") + print(f"Expected panels: {expected_panels}") + print(f"{'='*60}") + + # Load image + img = cv2.imread(image_path) + height, width = img.shape[:2] + print(f"Image dimensions: {width}x{height}") + + # Test current algorithm + crops = test_current_algorithm(img, width, height, expected_panels) + + # Save crop previews + save_crop_previews(img, crops, image_path, expected_panels) + + # Analyze accuracy + analyze_accuracy(crops, expected_panels, layout_type) + + return crops + +def test_current_algorithm(img, width: int, height: int, expected_panels: int): + """Test the current horizontal splitting algorithm""" + crops = [] + + # Current algorithm logic + if width > height * 1.2: # Wide image, horizontal panels + print(f"Using horizontal splitting for {width}x{height} image") + + # Convert to grayscale for analysis + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + # Calculate horizontal histogram + horizontal_hist = np.sum(gray, axis=0) + inverted_hist = np.max(horizontal_hist) - horizontal_hist + + # Smooth the inverted histogram + from scipy.ndimage import gaussian_filter1d + smoothed_hist = gaussian_filter1d(inverted_hist, sigma=10) + + # Current parameters + expected_panels_est = min(15, max(6, width // 800)) + min_distance = width // (expected_panels_est * 1.5) + + print(f"Algorithm estimates {expected_panels_est} panels, min_distance={min_distance}") + + # Find peaks + from scipy.signal import find_peaks + peaks, properties = find_peaks(smoothed_hist, + distance=min_distance, + height=np.max(smoothed_hist) * 0.15, + prominence=np.max(smoothed_hist) * 0.1) + + print(f"Found {len(peaks)} separator peaks") + + # Create panels + x_boundaries = [0] + list(peaks) + [width] + x_boundaries = sorted(list(set(x_boundaries))) + + for i in range(len(x_boundaries) - 1): + x1, x2 = x_boundaries[i], x_boundaries[i + 1] + if x2 - x1 >= 200: # min_crop_size + crops.append({ + 'bbox': (x1, 0, x2, height), + 'width': x2 - x1, + 'height': height, + 'crop_id': f"horizontal_{i}" + }) + + print(f"Generated {len(crops)} crops") + else: + print("Image not wide enough for horizontal splitting") + crops.append({ + 'bbox': (0, 0, width, height), + 'width': width, + 'height': height, + 'crop_id': "single" + }) + + return crops + +def save_crop_previews(img, crops, image_path: str, expected_panels: int): + """Save individual crop images for visual verification""" + base_name = Path(image_path).stem + crops_dir = Path("panel_test_crops") + crops_dir.mkdir(exist_ok=True) + + print(f"\nSaving {len(crops)} crop previews to {crops_dir}/") + + for i, crop in enumerate(crops): + x1, y1, x2, y2 = crop['bbox'] + cropped = img[y1:y2, x1:x2] + + crop_filename = f"{base_name}_expected{expected_panels}_crop{i+1:02d}.jpg" + crop_path = crops_dir / crop_filename + cv2.imwrite(str(crop_path), cropped) + + print(f" Crop {i+1}: {crop['width']}px wide -> {crop_filename}") + +def analyze_accuracy(crops, expected_panels: int, layout_type: str): + """Analyze how well the splitting matches expectations""" + detected_panels = len(crops) + + print(f"\n--- ACCURACY ANALYSIS ---") + print(f"Layout type: {layout_type}") + print(f"Expected panels: {expected_panels}") + print(f"Detected panels: {detected_panels}") + + if detected_panels == expected_panels: + print("✅ PERFECT MATCH!") + elif abs(detected_panels - expected_panels) <= 1: + print("✅ CLOSE MATCH (within 1)") + elif detected_panels < expected_panels: + print("❌ UNDER-SEGMENTATION (missing splits)") + else: + print("❌ OVER-SEGMENTATION (too many splits)") + + # Analyze crop sizes + widths = [crop['width'] for crop in crops] + avg_width = np.mean(widths) + std_width = np.std(widths) + + print(f"Crop widths: {widths}") + print(f"Average width: {avg_width:.0f}px (±{std_width:.0f}px)") + + # Check for suspiciously small or large crops + min_reasonable = 300 # Minimum reasonable panel width + max_reasonable = 2000 # Maximum reasonable panel width + + small_crops = [w for w in widths if w < min_reasonable] + large_crops = [w for w in widths if w > max_reasonable] + + if small_crops: + print(f"⚠️ Warning: {len(small_crops)} suspiciously small crops: {small_crops}") + if large_crops: + print(f"⚠️ Warning: {len(large_crops)} suspiciously large crops: {large_crops}") + +def main(): + """Test horizontal splitting on various layout types""" + + test_cases = [ + # Single panels (should not be split) + { + "path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6785934.jpg", + "expected": 1, + "type": "Single Panel" + }, + { + "path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6813573.jpg", + "expected": 1, + "type": "Single Panel" + }, + + # Double panels + { + "path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6785852.jpg", + "expected": 2, + "type": "Double Panel" + }, + + # 4-panel layouts + { + "path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6799150.jpg", + "expected": 4, + "type": "4-Panel Layout" + }, + { + "path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6813643.jpg", + "expected": 4, + "type": "4-Panel Layout" + }, + + # Multi-panel layouts + { + "path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6791144.jpg", + "expected": 8, + "type": "Multi-Panel Layout" + }, + { + "path": "/Users/michael.clervi/Documents/projects/master_adapt_detect/layouts/6786505.jpg", + "expected": 10, + "type": "Multi-Panel Layout" + } + ] + + print("HORIZONTAL SPLITTING ACCURACY TEST") + print("="*60) + + results = [] + + for test_case in test_cases: + if os.path.exists(test_case["path"]): + crops = test_splitting_accuracy( + test_case["path"], + test_case["expected"], + test_case["type"] + ) + + results.append({ + "file": Path(test_case["path"]).name, + "type": test_case["type"], + "expected": test_case["expected"], + "detected": len(crops), + "accurate": abs(len(crops) - test_case["expected"]) <= 1 + }) + else: + print(f"⚠️ File not found: {test_case['path']}") + + # Summary + print(f"\n{'='*60}") + print("SUMMARY") + print(f"{'='*60}") + + accurate_count = sum(1 for r in results if r["accurate"]) + total_count = len(results) + + print(f"Accurate results: {accurate_count}/{total_count} ({accurate_count/total_count*100:.1f}%)") + print() + + for result in results: + status = "✅" if result["accurate"] else "❌" + print(f"{status} {result['file']}: {result['detected']}/{result['expected']} panels ({result['type']})") + + print(f"\nCrop previews saved to: panel_test_crops/") + print("Review the crop images to verify splitting accuracy!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_parallel_implementation.py b/test_parallel_implementation.py new file mode 100644 index 0000000..37ed853 --- /dev/null +++ b/test_parallel_implementation.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +""" +Test script for parallel layout processing implementation +""" + +import sys +import os +from pathlib import Path + +# Add current directory to path +sys.path.insert(0, os.getcwd()) + +def test_parallel_processing(): + """Test the parallel processing implementation""" + + print("Testing parallel layout processing implementation...") + + # Test 1: Import all classes + try: + from hybrid_detector import HybridImageDetector, InlierAnalysisCoordinator, ProgressTracker + print("✓ Successfully imported all classes") + except ImportError as e: + print(f"✗ Import error: {e}") + return False + + # Test 2: Create InlierAnalysisCoordinator + try: + from memory_manager import MemoryManager + memory_manager = MemoryManager() + coordinator = InlierAnalysisCoordinator( + local_workers=2, + memory_manager=memory_manager, + min_good_matches=10 + ) + print("✓ Successfully created InlierAnalysisCoordinator") + except Exception as e: + print(f"✗ Error creating coordinator: {e}") + return False + + # Test 3: Create ProgressTracker + try: + tracker = ProgressTracker(total_layouts=100) + info = tracker.get_progress_info() + print(f"✓ Successfully created ProgressTracker (total: {info['total']})") + except Exception as e: + print(f"✗ Error creating progress tracker: {e}") + return False + + # Test 4: Create HybridImageDetector with parallel processing + try: + detector = HybridImageDetector( + panel_threshold=2, + inlier_threshold=0.65, + parallel_layouts=True, + layout_workers=2, + max_concurrent_layouts=2 + ) + print("✓ Successfully created HybridImageDetector with parallel processing") + print(f" - Parallel layouts: {detector.parallel_layouts}") + print(f" - Layout workers: {detector.layout_workers}") + print(f" - Max concurrent layouts: {detector.max_concurrent_layouts}") + except Exception as e: + print(f"✗ Error creating detector: {e}") + return False + + # Test 5: Test coordinator start/stop + try: + coordinator.start() + print("✓ Successfully started coordinator") + + # Test queue size + queue_size = coordinator.get_queue_size() + print(f" - Queue size: {queue_size}") + + coordinator.stop() + print("✓ Successfully stopped coordinator") + except Exception as e: + print(f"✗ Error with coordinator lifecycle: {e}") + return False + + # Test 6: Test memory monitoring + try: + memory_adjusted = detector._monitor_memory_and_adjust_workers() + print(f"✓ Memory monitoring executed (adjustments made: {memory_adjusted})") + except Exception as e: + print(f"✗ Error with memory monitoring: {e}") + return False + + # Test 7: Test error handling + try: + error_result = detector._handle_worker_failure("test_layout.jpg", Exception("test error")) + print(f"✓ Error handling executed (result has error: {'error' in error_result})") + except Exception as e: + print(f"✗ Error with error handling: {e}") + return False + + print("\n🎉 All tests passed! Parallel processing implementation is working correctly.") + return True + +def test_cli_integration(): + """Test CLI integration""" + + print("\nTesting CLI integration...") + + # Test parsing with parallel arguments + try: + from cli import parse_arguments + + # Mock sys.argv for testing + import sys + original_argv = sys.argv + + # Test with parallel processing arguments + sys.argv = ['cli.py', '--test', '--hybrid', '--parallel-layouts', '--layout-workers', '4'] + + try: + args = parse_arguments() + print("✓ Successfully parsed parallel processing arguments") + print(f" - Parallel layouts: {args.parallel_layouts}") + print(f" - Layout workers: {args.layout_workers}") + print(f" - Max concurrent layouts: {args.max_concurrent_layouts}") + except SystemExit: + # parse_arguments calls sys.exit() if help is requested + pass + finally: + sys.argv = original_argv + + except Exception as e: + print(f"✗ Error testing CLI integration: {e}") + return False + + print("✓ CLI integration tests passed!") + return True + +if __name__ == "__main__": + success = test_parallel_processing() + + if success: + success = test_cli_integration() + + if success: + print("\n🚀 Implementation is ready for production use!") + print("\nUsage examples:") + print(" python cli.py --test --hybrid --parallel-layouts") + print(" python cli.py --limit 10 --hybrid --parallel-layouts --layout-workers 4") + print(" python cli.py --all --hybrid --parallel-layouts --layout-workers 6") + else: + print("\n❌ Implementation needs fixes before production use.") + sys.exit(1) \ No newline at end of file diff --git a/test_simple_split.py b/test_simple_split.py new file mode 100644 index 0000000..7696ed0 --- /dev/null +++ b/test_simple_split.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +""" +Simple test script to verify panel splitting functionality +Tests the panel splitting with 6786505.jpg (horizontal strip layout) +""" + +import sys +import os +import cv2 +import numpy as np +from pathlib import Path +from panel_splitter import PanelSplitter + +def test_simple_splitting(): + """Test splitting without OpenAI guidance""" + print("=" * 60) + print("TESTING SIMPLE PANEL SPLITTING") + print("=" * 60) + + # Initialize splitter with debug mode + splitter = PanelSplitter(debug=True) + + # Target layout file - this is a horizontal strip with many panels + layout_path = Path("layouts") / "6786505.jpg" + + if not layout_path.exists(): + print(f"❌ ERROR: Layout file {layout_path} not found!") + return False + + print(f"📁 Testing with: {layout_path.name}") + + # Load and examine the image + image = cv2.imread(str(layout_path)) + if image is None: + print(f"❌ ERROR: Could not load image {layout_path}") + return False + + height, width = image.shape[:2] + print(f"📐 Image dimensions: {width}x{height}") + + # Test with different target counts + test_counts = [5, 8, 10, 12] + + for target_count in test_counts: + print(f"\n🎯 Testing with target count: {target_count}") + print("─" * 40) + + # Split the layout + splits = splitter.split_panels(str(layout_path), target_count) + + print(f"Generated {len(splits)} splits") + + if len(splits) > 0: + print("✅ Successfully generated splits!") + + # Save split images + splits_dir = Path("test_splits") + splits_dir.mkdir(exist_ok=True) + + for i, split in enumerate(splits): + split_filename = splits_dir / f"6786505_target{target_count}_split_{i+1:02d}.jpg" + cv2.imwrite(str(split_filename), split['image']) + + print(f" Saved {len(splits)} split images to test_splits/") + + # Show split details + for i, split in enumerate(splits): + x, y, w, h = split['bounds'] + print(f" Split {i+1:2d}: [{x:4d}, {y:4d}, {w:4d}, {h:4d}] conf={split['confidence']:.3f}") + else: + print("❌ No splits generated") + + print("\n" + "=" * 60) + print("🎉 SIMPLE SPLITTING TEST COMPLETED!") + print("Check the test_splits/ directory for generated images.") + print("=" * 60) + + return True + +def test_individual_methods(): + """Test individual splitting methods""" + print("\n" + "=" * 60) + print("TESTING INDIVIDUAL SPLITTING METHODS") + print("=" * 60) + + splitter = PanelSplitter(debug=True) + layout_path = Path("layouts") / "6786505.jpg" + + image = cv2.imread(str(layout_path)) + if image is None: + print("❌ Could not load image") + return False + + target_count = 8 + methods = [ + splitter._enhanced_gradient_analysis, + splitter._advanced_canny_detection, + splitter._template_matching_method, + splitter._contour_analysis_method, + splitter._texture_analysis_method, + splitter._clustering_method + ] + + for method in methods: + print(f"\n🔬 Testing {method.__name__}...") + try: + result = method(image, target_count) + if result: + print(f" ✅ Generated {len(result)} boundaries") + for i, boundary in enumerate(result): + bounds = boundary['bounds'] + print(f" {i+1}: [{bounds[0]:4d}, {bounds[1]:4d}, {bounds[2]:4d}, {bounds[3]:4d}] conf={boundary['confidence']:.3f}") + else: + print(" ❌ No boundaries generated") + except Exception as e: + print(f" ❌ Error: {e}") + + return True + +def main(): + """Main test function""" + print("🧪 STARTING SIMPLE PANEL SPLITTING TEST") + + # Test basic splitting + simple_success = test_simple_splitting() + + # Test individual methods + methods_success = test_individual_methods() + + print(f"\n📊 FINAL RESULTS:") + print(f"Simple splitting: {'✅ PASSED' if simple_success else '❌ FAILED'}") + print(f"Individual methods: {'✅ PASSED' if methods_success else '❌ FAILED'}") + + if simple_success and methods_success: + print("\n🎉 ALL TESTS PASSED! The panel splitting is working correctly.") + return 0 + else: + print("\n❌ Some tests failed. Please check the output above.") + return 1 + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/test_split_mode.py b/test_split_mode.py new file mode 100644 index 0000000..94f8d80 --- /dev/null +++ b/test_split_mode.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +""" +Test script for the new --split mode functionality +""" + +import sys +import os +from pathlib import Path +from panel_splitter import PanelSplitter + +def test_basic_splitting(): + """Test basic panel splitting functionality""" + print("Testing basic panel splitting...") + + # Initialize splitter + splitter = PanelSplitter(debug=True) + + # Test with a sample layout image + layouts_path = Path("layouts") + layout_files = list(layouts_path.glob("*.jpg")) + + if not layout_files: + print("No layout images found in layouts/ directory") + return + + # Test with first layout + test_layout = layout_files[0] + print(f"Testing with: {test_layout.name}") + + # Split panels with target count of 2 + target_count = 2 + splits = splitter.split_panels(str(test_layout), target_count) + + print(f"Generated {len(splits)} splits") + for i, split in enumerate(splits): + print(f" Split {i+1}: bounds={split['bounds']}, confidence={split['confidence']:.3f}") + + print("Basic splitting test completed!") + +def test_cli_integration(): + """Test CLI integration with --split flag""" + print("\nTesting CLI integration...") + print("You can now test the --split flag with:") + print(" python cli.py --test --split") + print(" python cli.py --test --openai --split") + print(" python cli.py --test --vector-mode --split") + print(" python cli.py --test --hybrid --split") + +if __name__ == "__main__": + test_basic_splitting() + test_cli_integration() \ No newline at end of file diff --git a/tune_14_panel_split.py b/tune_14_panel_split.py new file mode 100644 index 0000000..9abc6a4 --- /dev/null +++ b/tune_14_panel_split.py @@ -0,0 +1,646 @@ +#!/usr/bin/env python3 +""" +Parameter tuning script for 14-panel splitting +Iteratively adjusts CV method parameters to achieve exactly 14 panels for 6786505.jpg +""" + +import sys +import os +import cv2 +import numpy as np +from pathlib import Path +from panel_splitter import PanelSplitter +from typing import Dict, List, Tuple, Any +import json +from itertools import product +import time +import multiprocessing as mp +from functools import partial + +def convert_numpy_types(obj): + """Convert NumPy types to native Python types for JSON serialization""" + if isinstance(obj, np.integer): + return int(obj) + elif isinstance(obj, np.floating): + return float(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + elif isinstance(obj, dict): + return {key: convert_numpy_types(value) for key, value in obj.items()} + elif isinstance(obj, list): + return [convert_numpy_types(item) for item in obj] + elif isinstance(obj, tuple): + return tuple(convert_numpy_types(item) for item in obj) + else: + return obj + +def test_gradient_config_worker(args): + """Worker function for gradient analysis parameter testing""" + config, image_path, target_panel_count, width, height = args + + try: + # Load image + image = cv2.imread(image_path) + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + separators = [] + for sigma in config['sigma_scales']: + # Smooth the image + smoothed = cv2.GaussianBlur(gray, (0, 0), sigma) + + # Calculate vertical gradient (for horizontal separators) + grad_y = cv2.Sobel(smoothed, cv2.CV_64F, 0, 1, ksize=3) + + # Project to get horizontal profile + profile = np.mean(np.abs(grad_y), axis=1) + + # Find peaks with tuned parameters + prominence = np.std(profile) * config['prominence_factor'] + distance = int(height / target_panel_count * config['distance_factor']) + + from scipy.signal import find_peaks + peaks, properties = find_peaks(profile, prominence=prominence, distance=distance) + + # Add to separators + for peak in peaks: + if len(properties['prominences']) > 0: + prom_idx = list(peaks).index(peak) + if prom_idx < len(properties['prominences']): + confidence = properties['prominences'][prom_idx] / np.max(properties['prominences']) + separators.append({ + 'position': peak, + 'confidence': confidence, + 'sigma': sigma + }) + + # Convert to bounds + separators.sort(key=lambda x: x['position']) + + bounds = [] + prev_y = 0 + + for sep in separators: + if sep['position'] > prev_y + height // (target_panel_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, sep['position'] - prev_y), + 'confidence': sep['confidence'] + }) + prev_y = sep['position'] + + # Add final panel + if prev_y < height - height // (target_panel_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, height - prev_y), + 'confidence': 0.8 + }) + + return { + 'method': 'gradient_analysis', + 'config': config, + 'panel_count': len(bounds), + 'bounds': bounds, + 'success': len(bounds) == target_panel_count + } + + except Exception as e: + return { + 'method': 'gradient_analysis', + 'config': config, + 'panel_count': 0, + 'bounds': [], + 'success': False, + 'error': str(e) + } + +def test_canny_config_worker(args): + """Worker function for Canny edge detection parameter testing""" + config, image_path, target_panel_count, width, height = args + + try: + # Load image + image = cv2.imread(image_path) + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + # Multi-threshold Canny detection + all_edges = [] + for low, high in config['threshold_set']: + edges = cv2.Canny(gray, low, high) + + # Morphological operations + kernel = np.ones(config['morphology_kernel'], np.uint8) + edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel) + all_edges.append(edges) + + # Combine edge maps + combined_edges = np.maximum.reduce(all_edges) + + # Find horizontal lines using Hough transform + lines = cv2.HoughLinesP( + combined_edges, + 1, + np.pi/180, + threshold=config['hough_threshold'], + minLineLength=config['min_line_length'], + maxLineGap=config['max_line_gap'] + ) + + # Filter for horizontal lines + horizontal_lines = [] + if lines is not None: + for line in lines: + x1, y1, x2, y2 = line[0] + if abs(y2 - y1) < height // 20: # Nearly horizontal + horizontal_lines.append({ + 'y_position': (y1 + y2) // 2, + 'length': abs(x2 - x1), + 'confidence': min(1.0, abs(x2 - x1) / width) + }) + + # Sort by y position and create bounds + horizontal_lines.sort(key=lambda x: x['y_position']) + + bounds = [] + prev_y = 0 + + for line in horizontal_lines: + y_pos = line['y_position'] + if y_pos > prev_y + height // (target_panel_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, y_pos - prev_y), + 'confidence': line['confidence'] + }) + prev_y = y_pos + + # Add final panel + if prev_y < height - height // (target_panel_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, height - prev_y), + 'confidence': 0.8 + }) + + return { + 'method': 'canny_detection', + 'config': config, + 'panel_count': len(bounds), + 'bounds': bounds, + 'success': len(bounds) == target_panel_count + } + + except Exception as e: + return { + 'method': 'canny_detection', + 'config': config, + 'panel_count': 0, + 'bounds': [], + 'success': False, + 'error': str(e) + } + +def test_template_config_worker(args): + """Worker function for template matching parameter testing""" + config, image_path, target_panel_count, width, height = args + + try: + # Load image + image = cv2.imread(image_path) + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + tc = config['template_config'] + + # Create templates + templates = [] + template_width = width // tc['template_width_fraction'] + + # White horizontal line template + white_template = np.ones((tc['white_line_height'], template_width), dtype=np.uint8) * 255 + templates.append(('white_line', white_template)) + + # Black horizontal line template + black_template = np.zeros((tc['black_line_height'], template_width), dtype=np.uint8) + templates.append(('black_line', black_template)) + + # Gutter template (white with black edges) + gutter_template = np.ones((tc['gutter_height'], template_width), dtype=np.uint8) * 255 + gutter_template[0, :] = 0 + gutter_template[-1, :] = 0 + templates.append(('gutter', gutter_template)) + + # Find matches + all_matches = [] + + for template_name, template in templates: + result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF_NORMED) + + # Find good matches + locations = np.where(result >= tc['match_threshold']) + + for y, x in zip(locations[0], locations[1]): + confidence = result[y, x] + all_matches.append({ + 'y_position': y + template.shape[0] // 2, + 'confidence': confidence, + 'template': template_name + }) + + # Sort and merge nearby matches + all_matches.sort(key=lambda x: x['y_position']) + + merged_matches = [] + for match in all_matches: + if not merged_matches or match['y_position'] - merged_matches[-1]['y_position'] > config['merge_distance']: + merged_matches.append(match) + else: + # Keep the one with higher confidence + if match['confidence'] > merged_matches[-1]['confidence']: + merged_matches[-1] = match + + # Create bounds + bounds = [] + prev_y = 0 + + for match in merged_matches: + y_pos = match['y_position'] + if y_pos > prev_y + height // (target_panel_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, y_pos - prev_y), + 'confidence': match['confidence'] + }) + prev_y = y_pos + + # Add final panel + if prev_y < height - height // (target_panel_count * 2): + bounds.append({ + 'bounds': (0, prev_y, width, height - prev_y), + 'confidence': 0.8 + }) + + return { + 'method': 'template_matching', + 'config': config, + 'panel_count': len(bounds), + 'bounds': bounds, + 'success': len(bounds) == target_panel_count + } + + except Exception as e: + return { + 'method': 'template_matching', + 'config': config, + 'panel_count': 0, + 'bounds': [], + 'success': False, + 'error': str(e) + } + +class ParameterTuner: + def __init__(self, target_image_path: str, target_panel_count: int = 14): + self.target_image_path = target_image_path + self.target_panel_count = target_panel_count + self.image = cv2.imread(target_image_path) + self.height, self.width = self.image.shape[:2] + + # Results storage + self.results = [] + self.best_configs = [] + + # Multiprocessing setup + self.num_workers = mp.cpu_count() + + print(f"🎯 Target: {target_panel_count} panels for {Path(target_image_path).name}") + print(f"📐 Image dimensions: {self.width}x{self.height}") + print(f"🚀 Using {self.num_workers} parallel workers") + + def test_gradient_analysis_params(self): + """Test Enhanced Gradient Analysis with different parameters using multiprocessing""" + print("\n🔬 TUNING GRADIENT ANALYSIS PARAMETERS") + print("=" * 50) + + # Parameter ranges to test + sigma_ranges = [ + [3, 7, 15], # Fine scale + [5, 10, 20], # Current default + [7, 15, 25], # Coarse scale + [5, 12, 18], # Medium scale + [4, 8, 16], # Balanced scale + ] + + prominence_factors = [0.3, 0.4, 0.5, 0.6, 0.7] + distance_factors = [0.8, 1.0, 1.2, 1.5, 2.0] + + # Create all configuration combinations + configs = [] + for sigma_set in sigma_ranges: + for prom_factor in prominence_factors: + for dist_factor in distance_factors: + config = { + 'method': 'gradient_analysis', + 'sigma_scales': sigma_set, + 'prominence_factor': prom_factor, + 'distance_factor': dist_factor + } + configs.append(config) + + print(f"Testing {len(configs)} gradient analysis configurations...") + + # Prepare arguments for multiprocessing + args_list = [ + (config, self.target_image_path, self.target_panel_count, self.width, self.height) + for config in configs + ] + + # Use multiprocessing to test configurations + with mp.Pool(processes=self.num_workers) as pool: + method_results = pool.map(test_gradient_config_worker, args_list) + + # Check for exact matches + exact_matches = [r for r in method_results if r['success']] + for result in exact_matches: + print(f"✅ EXACT MATCH: Panels: {result['panel_count']}") + self.best_configs.append(result) + + # Find best results + best_results = sorted(method_results, key=lambda x: abs(x['panel_count'] - self.target_panel_count))[:5] + print(f"\n🏆 Top 5 Gradient Analysis Results:") + for i, result in enumerate(best_results, 1): + print(f" {i}. Panels: {result['panel_count']}") + + return method_results + + def test_canny_params(self): + """Test Advanced Canny Edge Detection with different parameters using multiprocessing""" + print("\n🔬 TUNING CANNY EDGE DETECTION PARAMETERS") + print("=" * 50) + + # Parameter ranges + threshold_sets = [ + [(30, 100), (80, 160), (120, 200)], # Low sensitivity + [(50, 150), (100, 200), (150, 250)], # Current default + [(70, 180), (120, 220), (170, 280)], # High sensitivity + [(40, 120), (90, 180), (140, 240)], # Balanced + ] + + morphology_kernels = [ + (1, 1), (3, 1), (5, 1), (7, 1), (9, 1) # Vertical kernels for horizontal lines + ] + + hough_thresholds = [ + self.width // 8, # Low threshold + self.width // 6, # Medium-low + self.width // 4, # Current default + self.width // 3, # High threshold + ] + + min_line_lengths = [ + self.width // 5, # Short lines + self.width // 4, # Medium-short + self.width // 3, # Current default + self.width // 2, # Long lines + ] + + max_line_gaps = [ + self.width // 20, # Small gaps + self.width // 15, # Medium gaps + self.width // 10, # Current default + self.width // 8, # Large gaps + ] + + # Create all configuration combinations + configs = [] + for thresh_set in threshold_sets: + for kernel in morphology_kernels: + for hough_thresh in hough_thresholds: + for min_len in min_line_lengths: + for max_gap in max_line_gaps: + config = { + 'method': 'canny_detection', + 'threshold_set': thresh_set, + 'morphology_kernel': kernel, + 'hough_threshold': hough_thresh, + 'min_line_length': min_len, + 'max_line_gap': max_gap + } + configs.append(config) + + print(f"Testing {len(configs)} canny detection configurations...") + + # Prepare arguments for multiprocessing + args_list = [ + (config, self.target_image_path, self.target_panel_count, self.width, self.height) + for config in configs + ] + + # Use multiprocessing to test configurations + with mp.Pool(processes=self.num_workers) as pool: + method_results = pool.map(test_canny_config_worker, args_list) + + # Check for exact matches + exact_matches = [r for r in method_results if r['success']] + for result in exact_matches: + print(f"✅ EXACT MATCH: Panels: {result['panel_count']}") + self.best_configs.append(result) + + # Find best results + best_results = sorted(method_results, key=lambda x: abs(x['panel_count'] - self.target_panel_count))[:5] + print(f"\n🏆 Top 5 Canny Detection Results:") + for i, result in enumerate(best_results, 1): + print(f" {i}. Panels: {result['panel_count']}") + + return method_results + + def test_template_matching_params(self): + """Test Template Matching with different parameters using multiprocessing""" + print("\n🔬 TUNING TEMPLATE MATCHING PARAMETERS") + print("=" * 50) + + # Template configurations + template_configs = [ + { + 'white_line_height': 3, + 'black_line_height': 3, + 'gutter_height': 6, + 'template_width_fraction': 4, + 'match_threshold': 0.4 + }, + { + 'white_line_height': 5, + 'black_line_height': 5, + 'gutter_height': 10, + 'template_width_fraction': 4, + 'match_threshold': 0.5 + }, + { + 'white_line_height': 7, + 'black_line_height': 7, + 'gutter_height': 14, + 'template_width_fraction': 4, + 'match_threshold': 0.6 + }, + { + 'white_line_height': 4, + 'black_line_height': 4, + 'gutter_height': 8, + 'template_width_fraction': 3, + 'match_threshold': 0.45 + }, + { + 'white_line_height': 6, + 'black_line_height': 6, + 'gutter_height': 12, + 'template_width_fraction': 5, + 'match_threshold': 0.55 + } + ] + + merge_distances = [ + self.height // (self.target_panel_count * 3), + self.height // (self.target_panel_count * 2), + self.height // (self.target_panel_count * 1.5), + ] + + # Create all configuration combinations + configs = [] + for template_config in template_configs: + for merge_dist in merge_distances: + config = { + 'method': 'template_matching', + 'template_config': template_config, + 'merge_distance': merge_dist + } + configs.append(config) + + print(f"Testing {len(configs)} template matching configurations...") + + # Prepare arguments for multiprocessing + args_list = [ + (config, self.target_image_path, self.target_panel_count, self.width, self.height) + for config in configs + ] + + # Use multiprocessing to test configurations + with mp.Pool(processes=self.num_workers) as pool: + method_results = pool.map(test_template_config_worker, args_list) + + # Check for exact matches + exact_matches = [r for r in method_results if r['success']] + for result in exact_matches: + print(f"✅ EXACT MATCH: Panels: {result['panel_count']}") + self.best_configs.append(result) + + # Find best results + best_results = sorted(method_results, key=lambda x: abs(x['panel_count'] - self.target_panel_count))[:5] + print(f"\n🏆 Top 5 Template Matching Results:") + for i, result in enumerate(best_results, 1): + print(f" {i}. Panels: {result['panel_count']}") + + return method_results + + def test_consensus_params(self): + """Test consensus system with different parameters (simplified for multiprocessing)""" + print("\n🔬 TUNING CONSENSUS SYSTEM PARAMETERS") + print("=" * 50) + + # For now, return empty results to speed up testing + # Focus on the main methods that are already working + print("Skipping consensus tuning - focusing on main methods") + return [] + + def run_full_tuning(self): + """Run the complete parameter tuning process""" + print("🚀 STARTING COMPREHENSIVE PARAMETER TUNING") + print("=" * 60) + + start_time = time.time() + + # Test all methods + gradient_results = self.test_gradient_analysis_params() + canny_results = self.test_canny_params() + template_results = self.test_template_matching_params() + consensus_results = self.test_consensus_params() + + # Combine all results + all_results = gradient_results + canny_results + template_results + consensus_results + + # Find the absolute best configurations + exact_matches = [r for r in all_results if r['success']] + close_matches = sorted([r for r in all_results if not r['success']], + key=lambda x: abs(x['panel_count'] - self.target_panel_count))[:10] + + # Generate summary + elapsed_time = time.time() - start_time + + print(f"\n" + "=" * 60) + print(f"🏁 TUNING COMPLETE - Time: {elapsed_time:.1f}s") + print(f"=" * 60) + + print(f"\n🎯 EXACT MATCHES ({len(exact_matches)} found):") + for i, match in enumerate(exact_matches, 1): + print(f" {i}. Method: {match['method']}") + print(f" Config: {match['config']}") + print(f" Panel Count: {match['panel_count']}") + print() + + print(f"\n📊 CLOSE MATCHES (Top 10):") + for i, match in enumerate(close_matches, 1): + print(f" {i}. Method: {match['method']}, Panels: {match['panel_count']}") + print(f" Config: {match['config']}") + print() + + # Save results to file + results_file = "tuning_results_14_panel.json" + output_data = { + 'target_panel_count': self.target_panel_count, + 'image_path': self.target_image_path, + 'image_dimensions': {'width': self.width, 'height': self.height}, + 'exact_matches': exact_matches, + 'close_matches': close_matches, + 'all_results': all_results, + 'tuning_time': elapsed_time, + 'total_configurations_tested': len(all_results) + } + + # Convert NumPy types to native Python types for JSON serialization + output_data = convert_numpy_types(output_data) + + with open(results_file, 'w') as f: + json.dump(output_data, f, indent=2) + + print(f"💾 Results saved to: {results_file}") + + return exact_matches, close_matches + +def main(): + """Main tuning function""" + print("🎯 14-PANEL SPLITTING PARAMETER TUNING") + print("=" * 60) + + target_image = "layouts/6786505.jpg" + target_panels = 14 + + if not Path(target_image).exists(): + print(f"❌ ERROR: Target image {target_image} not found!") + return 1 + + # Initialize tuner + tuner = ParameterTuner(target_image, target_panels) + + # Run tuning + exact_matches, close_matches = tuner.run_full_tuning() + + if exact_matches: + print(f"\n🎉 SUCCESS: Found {len(exact_matches)} configurations that produce exactly {target_panels} panels!") + print("\n🔧 RECOMMENDED SETTINGS:") + print("Update your PanelSplitter class with these optimal parameters:") + + for i, match in enumerate(exact_matches[:3], 1): # Show top 3 + print(f"\n Option {i} - {match['method']}:") + print(f" {match['config']}") + else: + print(f"\n⚠️ No exact matches found. Best alternatives:") + for i, match in enumerate(close_matches[:3], 1): + print(f" {i}. {match['method']}: {match['panel_count']} panels") + + return 0 + +if __name__ == "__main__": + # Required for multiprocessing on macOS/Windows + mp.set_start_method('spawn', force=True) + sys.exit(main()) \ No newline at end of file diff --git a/vector_detector.py b/vector_detector.py new file mode 100644 index 0000000..4ceeeac --- /dev/null +++ b/vector_detector.py @@ -0,0 +1,776 @@ +#!/usr/bin/env python3 +""" +Vector-based Image Detection Module +Extracted from image_detector.py - Contains VectorImageDetector class +Uses Google Vertex AI Multimodal Embeddings for image similarity detection +""" + +import os +import json +import time +from pathlib import Path +from typing import List, Dict, Optional +import numpy as np +import pickle +from google.cloud import aiplatform +from vertexai.vision_models import MultiModalEmbeddingModel +import cv2 +from panel_splitter import PanelSplitter + + +class VectorImageDetector: + def __init__(self, similarity_threshold=0.75, splitting_mode="none", min_crop_size=200, crop_padding=20, split_mode=False): + """Initialize the vector-based image detector using Google Vertex AI Multimodal Embeddings""" + print("Initializing Vector Image Detector with Google Vertex AI...") + + # Initialize Vertex AI + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "service-account.json" + aiplatform.init(project="optical-414516", location="us-central1") + + # Initialize multimodal embedding model + self.model = MultiModalEmbeddingModel.from_pretrained("multimodalembedding@001") + + # Configuration + self.similarity_threshold = similarity_threshold + self.splitting_mode = splitting_mode + self.min_crop_size = min_crop_size + self.crop_padding = crop_padding + self.split_mode = split_mode + + # Split mode configuration + if self.split_mode: + self.splitter = PanelSplitter(debug=True) + print("Split mode enabled: Will split multi-panel layouts before matching") + + # Paths + self.master_images_path = Path("master_images") + self.layouts_path = Path("layouts") + self.results_path = Path("results") + self.embeddings_cache_path = Path("embeddings_cache") + self.crops_debug_path = Path("crops_debug") + + # Create directories + self.results_path.mkdir(exist_ok=True) + self.embeddings_cache_path.mkdir(exist_ok=True) + self.crops_debug_path.mkdir(exist_ok=True) + + # Master images data + self.master_images = {} + self.master_files = {} + self.master_embeddings = {} + + print(f"✓ Vector detector initialized with similarity threshold: {similarity_threshold}") + print(f"✓ Splitting mode: {splitting_mode}, Min crop size: {min_crop_size}px") + + def load_master_images(self) -> Dict[str, str]: + """Load all master images and create ID mapping using filenames""" + print("Loading master images...") + + master_files = list(self.master_images_path.glob("*.jpg")) + print(f"Found {len(master_files)} master images") + + for file_path in master_files: + master_id = file_path.stem + self.master_images[master_id] = str(file_path) + self.master_files[master_id] = file_path.name + + return self.master_images + + def generate_image_embedding(self, image_path: str) -> np.ndarray: + """Generate 1408-dimensional embedding for an image using Vertex AI""" + try: + from vertexai.vision_models import Image as VertexImage + + # Create Vertex AI Image object directly from file path + vertex_image = VertexImage.load_from_file(image_path) + + # Get embedding from Vertex AI + response = self.model.get_embeddings(image=vertex_image) + + # Extract the embedding vector (1408 dimensions) + embedding = np.array(response.image_embedding) + + return embedding + + except Exception as e: + print(f"Error generating embedding for {Path(image_path).name}: {e}") + return None + + def save_embedding_cache(self, embeddings: Dict, filename: str): + """Save embeddings to cache file""" + cache_file = self.embeddings_cache_path / f"{filename}.pkl" + with open(cache_file, 'wb') as f: + pickle.dump(embeddings, f) + print(f"Embeddings cached to: {cache_file}") + + def load_embedding_cache(self, filename: str) -> Optional[Dict]: + """Load embeddings from cache file""" + cache_file = self.embeddings_cache_path / f"{filename}.pkl" + if cache_file.exists(): + try: + with open(cache_file, 'rb') as f: + embeddings = pickle.load(f) + print(f"Loaded cached embeddings from: {cache_file}") + return embeddings + except Exception as e: + print(f"Error loading cached embeddings: {e}") + return None + + def generate_master_embeddings(self, force_regenerate=False) -> Dict[str, np.ndarray]: + """Generate embeddings for all master images (with caching)""" + cache_filename = "master_embeddings" + + # Try to load from cache first + if not force_regenerate: + cached_embeddings = self.load_embedding_cache(cache_filename) + if cached_embeddings is not None: + # Verify all master images are in cache + if set(cached_embeddings.keys()) == set(self.master_images.keys()): + self.master_embeddings = cached_embeddings + print(f"✓ Using cached embeddings for {len(cached_embeddings)} master images") + return self.master_embeddings + else: + print("Cache incomplete, regenerating embeddings...") + + print(f"Generating embeddings for {len(self.master_images)} master images...") + self.master_embeddings = {} + + for i, (master_id, image_path) in enumerate(self.master_images.items(), 1): + print(f" {i}/{len(self.master_images)}: Generating embedding for {master_id}") + + embedding = self.generate_image_embedding(image_path) + if embedding is not None: + self.master_embeddings[master_id] = embedding + + # Small delay to avoid rate limiting + if i < len(self.master_images): + time.sleep(0.1) + + # Cache the embeddings + if self.master_embeddings: + self.save_embedding_cache(self.master_embeddings, cache_filename) + + print(f"✓ Generated embeddings for {len(self.master_embeddings)} master images") + return self.master_embeddings + + def compute_cosine_similarity(self, embedding1: np.ndarray, embedding2: np.ndarray) -> float: + """Compute cosine similarity between two embeddings""" + # Normalize the embeddings + norm1 = np.linalg.norm(embedding1) + norm2 = np.linalg.norm(embedding2) + + if norm1 == 0 or norm2 == 0: + return 0.0 + + # Compute cosine similarity + similarity = np.dot(embedding1, embedding2) / (norm1 * norm2) + return float(similarity) + + def detect_layout_type(self, image_path: str) -> str: + """Analyze layout image to determine if it's single image or composite""" + try: + img = cv2.imread(image_path) + height, width = img.shape[:2] + + # Convert to grayscale for analysis + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + # Detect edges to find potential separators + edges = cv2.Canny(gray, 50, 150, apertureSize=3) + + # Look for strong vertical lines (panel separators) + vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, height // 10)) + vertical_lines = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, vertical_kernel) + + # Look for strong horizontal lines (row separators) + horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (width // 10, 1)) + horizontal_lines = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, horizontal_kernel) + + # Count significant vertical and horizontal structures + vertical_density = np.sum(vertical_lines) / (height * width) + horizontal_density = np.sum(horizontal_lines) / (height * width) + + # Determine layout type based on structure + if vertical_density > 0.01 or horizontal_density > 0.01: + return "composite" + else: + return "single" + + except Exception as e: + print(f"Error analyzing layout type for {Path(image_path).name}: {e}") + return "single" # Default to single if analysis fails + + def split_image_by_grid(self, image_path: str) -> List[Dict]: + """Split composite image into individual components using grid detection""" + try: + layout_name = Path(image_path).name + print(f" Analyzing grid structure for {layout_name}") + + # Load image + img = cv2.imread(image_path) + height, width = img.shape[:2] + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + # Edge detection for finding separators + edges = cv2.Canny(gray, 30, 100, apertureSize=3) + + # Detect vertical separators (for horizontal panels) + vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, height // 8)) + vertical_lines = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, vertical_kernel) + + # Find vertical separator positions + vertical_projection = np.sum(vertical_lines, axis=0) + vertical_threshold = np.max(vertical_projection) * 0.6 # More strict threshold + vertical_separators = [] + + for x in range(width): + if vertical_projection[x] > vertical_threshold: + # Check if this is a new separator (not adjacent to previous) + if not vertical_separators or x - vertical_separators[-1] > 50: # Larger gap requirement + vertical_separators.append(x) + + # Detect horizontal separators (for stacked layouts) + horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (width // 8, 1)) + horizontal_lines = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, horizontal_kernel) + + # Find horizontal separator positions + horizontal_projection = np.sum(horizontal_lines, axis=1) + horizontal_threshold = np.max(horizontal_projection) * 0.6 # More strict threshold + horizontal_separators = [] + + for y in range(height): + if horizontal_projection[y] > horizontal_threshold: + if not horizontal_separators or y - horizontal_separators[-1] > 50: # Larger gap requirement + horizontal_separators.append(y) + + # Generate crop regions based on detected separators + crops = [] + + # Add image boundaries to separator lists + x_boundaries = [0] + vertical_separators + [width] + y_boundaries = [0] + horizontal_separators + [height] + + # Remove duplicates and sort + x_boundaries = sorted(list(set(x_boundaries))) + y_boundaries = sorted(list(set(y_boundaries))) + + print(f" Found {len(x_boundaries)-1} x {len(y_boundaries)-1} grid sections") + + # For horizontal layouts, prefer fallback splitting if grid creates too many small sections + total_sections = (len(x_boundaries)-1) * (len(y_boundaries)-1) + is_wide_horizontal = width > height * 1.5 + + if is_wide_horizontal and total_sections > 20: + print(f" Grid too complex ({total_sections} sections), using horizontal splitting instead") + crops = self.fallback_split_image(img, width, height) + else: + # Generate all possible rectangular crops + for i in range(len(y_boundaries) - 1): + for j in range(len(x_boundaries) - 1): + y1, y2 = y_boundaries[i], y_boundaries[i + 1] + x1, x2 = x_boundaries[j], x_boundaries[j + 1] + + # Add padding and ensure boundaries + x1 = max(0, x1 - self.crop_padding) + y1 = max(0, y1 - self.crop_padding) + x2 = min(width, x2 + self.crop_padding) + y2 = min(height, y2 + self.crop_padding) + + crop_width = x2 - x1 + crop_height = y2 - y1 + + # Filter out crops that are too small + if crop_width >= self.min_crop_size and crop_height >= self.min_crop_size: + crop_area = crop_width * crop_height + total_area = width * height + area_ratio = crop_area / total_area + + crops.append({ + 'bbox': (x1, y1, x2, y2), + 'width': crop_width, + 'height': crop_height, + 'area_ratio': area_ratio, + 'crop_id': f"grid_{i}_{j}" + }) + + # If no good crops found, try fallback splitting + if not crops: + print(f" No grid detected, trying fallback splitting") + crops = self.fallback_split_image(img, width, height) + + print(f" Generated {len(crops)} crops for analysis") + return crops + + except Exception as e: + print(f"Error splitting image {Path(image_path).name}: {e}") + return [] + + def fallback_split_image(self, img, width: int, height: int) -> List[Dict]: + """Improved horizontal splitting focusing on major structural separators""" + crops = [] + + # Only process wide images for horizontal splitting + if width <= height * 1.2: + print(f" Image not wide enough for horizontal splitting, treating as single panel") + crops.append({ + 'bbox': (0, 0, width, height), + 'width': width, + 'height': height, + 'area_ratio': 1.0, + 'crop_id': "single" + }) + return crops + + print(f" Using improved horizontal splitting for {width}x{height} image") + + # Convert to grayscale + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + # Method 1: Structural edge detection for full-height separators + edges = cv2.Canny(gray, 30, 100) + vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, height // 3)) + vertical_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, vertical_kernel) + edge_projection = np.sum(vertical_edges, axis=0) + + # Method 2: Intensity histogram analysis + horizontal_hist = np.sum(gray, axis=0) + inverted_hist = np.max(horizontal_hist) - horizontal_hist + + # Smooth both signals + from scipy.ndimage import gaussian_filter1d + smoothed_edges = gaussian_filter1d(edge_projection, sigma=15) + smoothed_hist = gaussian_filter1d(inverted_hist, sigma=15) + + # Adaptive parameters based on image size + if width < 2000: + min_panel_width = width // 4 # At least 25% per panel + max_panels = 3 + elif width < 5000: + min_panel_width = width // 6 # At least 16% per panel + max_panels = 6 + else: + min_panel_width = width // 12 # At least 8% per panel + max_panels = 15 + + print(f" Min panel width: {min_panel_width}px, Max panels: {max_panels}") + + # Find separator candidates with conservative thresholds + edge_threshold = np.max(smoothed_edges) * 0.5 # Higher threshold for stronger edges + hist_threshold = np.max(smoothed_hist) * 0.4 # Higher threshold for deeper valleys + + from scipy.signal import find_peaks + + # Edge-based separators + edge_peaks, _ = find_peaks(smoothed_edges, + distance=min_panel_width, + height=edge_threshold, + prominence=np.max(smoothed_edges) * 0.3) + + # Histogram-based separators + hist_peaks, _ = find_peaks(smoothed_hist, + distance=min_panel_width, + height=hist_threshold, + prominence=np.max(smoothed_hist) * 0.2) + + print(f" Edge peaks: {len(edge_peaks)}, Histogram peaks: {len(hist_peaks)}") + + # Combine separators and filter boundary areas + all_separators = set(edge_peaks) | set(hist_peaks) + boundary_margin = width * 0.1 # 10% margin from edges + valid_separators = [s for s in all_separators + if boundary_margin < s < width - boundary_margin] + + # Remove separators too close to each other + valid_separators = sorted(valid_separators) + final_separators = [] + for sep in valid_separators: + if not final_separators or sep - final_separators[-1] >= min_panel_width: + final_separators.append(sep) + + # Limit to reasonable number of panels and keep strongest separators + if len(final_separators) >= max_panels: + separator_scores = [] + for sep in final_separators: + edge_score = smoothed_edges[sep] if sep < len(smoothed_edges) else 0 + hist_score = smoothed_hist[sep] if sep < len(smoothed_hist) else 0 + combined_score = edge_score + hist_score + separator_scores.append((sep, combined_score)) + + separator_scores.sort(key=lambda x: x[1], reverse=True) + final_separators = [s[0] for s in separator_scores[:max_panels-1]] + final_separators.sort() + + print(f" Final separators: {final_separators}") + + # Create crops + x_boundaries = [0] + final_separators + [width] + + for i in range(len(x_boundaries) - 1): + x1, x2 = x_boundaries[i], x_boundaries[i + 1] + + if x2 - x1 >= self.min_crop_size: + crops.append({ + 'bbox': (x1, 0, x2, height), + 'width': x2 - x1, + 'height': height, + 'area_ratio': (x2 - x1) / width, + 'crop_id': f"panel_{i}" + }) + + print(f" Generated {len(crops)} improved horizontal crops") + + return crops + + def save_crop_debug_images(self, image_path: str, crops: List[Dict]): + """Save cropped images for debugging purposes""" + try: + layout_name = Path(image_path).stem + img = cv2.imread(image_path) + + for i, crop in enumerate(crops): + x1, y1, x2, y2 = crop['bbox'] + cropped = img[y1:y2, x1:x2] + + debug_filename = f"{layout_name}_crop_{i}_{crop['crop_id']}.jpg" + debug_path = self.crops_debug_path / debug_filename + cv2.imwrite(str(debug_path), cropped) + + except Exception as e: + print(f"Warning: Failed to save debug crops: {e}") + + def generate_crop_embedding(self, image_path: str, crop_info: Dict) -> Optional[np.ndarray]: + """Generate embedding for a specific crop of an image""" + try: + # Load full image + img = cv2.imread(image_path) + + # Extract crop region + x1, y1, x2, y2 = crop_info['bbox'] + cropped_img = img[y1:y2, x1:x2] + + # Save crop to temporary file for embedding generation + temp_crop_path = self.crops_debug_path / f"temp_crop.jpg" + cv2.imwrite(str(temp_crop_path), cropped_img) + + # Generate embedding for crop + embedding = self.generate_image_embedding(str(temp_crop_path)) + + # Clean up temp file + if temp_crop_path.exists(): + temp_crop_path.unlink() + + return embedding + + except Exception as e: + print(f"Error generating crop embedding: {e}") + return None + + def detect_masters_in_layout_vector(self, layout_path: str, layout_index: int, total_layouts: int) -> Dict: + """Detect which master images appear in a layout using vector similarity with optional splitting""" + layout_name = Path(layout_path).name + print(f"Processing {layout_index}/{total_layouts}: {layout_name} (Vector mode: {self.splitting_mode})") + + try: + # Step 1: Determine if we should use splitting + if self.splitting_mode == "none": + return self.detect_whole_image(layout_path, layout_name) + elif self.splitting_mode == "auto": + layout_type = self.detect_layout_type(layout_path) + if layout_type == "single": + return self.detect_whole_image(layout_path, layout_name) + else: + return self.detect_with_splitting(layout_path, layout_name) + elif self.splitting_mode == "grid": + return self.detect_with_splitting(layout_path, layout_name) + else: + # Default to whole image + return self.detect_whole_image(layout_path, layout_name) + + except Exception as e: + error_msg = f"Error analyzing {layout_name} with vector embeddings: {e}" + print(error_msg) + return { + 'detected_masters': [], + 'detected_master_ids': [], + 'detected_master_filenames': [], + 'analysis': 'Vector embedding analysis failed', + 'error': str(e), + 'processing_mode': f'vector_embedding_{self.splitting_mode}' + } + + def detect_whole_image(self, layout_path: str, layout_name: str) -> Dict: + """Detect masters using whole image comparison""" + print(f" Processing whole image: {layout_name}") + + # Generate embedding for layout image + layout_embedding = self.generate_image_embedding(layout_path) + + if layout_embedding is None: + raise Exception("Failed to generate layout embedding") + + # Compare with all master embeddings + similarities = {} + detected_masters = [] + + print(f" Comparing against {len(self.master_embeddings)} master images...") + for master_id, master_embedding in self.master_embeddings.items(): + similarity = self.compute_cosine_similarity(layout_embedding, master_embedding) + similarities[master_id] = similarity + + if similarity >= self.similarity_threshold: + detected_masters.append(master_id) + + # Sort detected masters by similarity (highest first) + detected_masters.sort(key=lambda x: similarities[x], reverse=True) + + # Create analysis text + top_similarities = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:5] + analysis_parts = [ + f"Whole image vector analysis using Google Vertex AI embeddings (1408 dimensions).", + f"Similarity threshold: {self.similarity_threshold}", + f"Found {len(detected_masters)} matches above threshold.", + f"Top 5 similarities: " + ", ".join([f"{mid}({sim:.3f})" for mid, sim in top_similarities]) + ] + analysis = " ".join(analysis_parts) + + print(f"✓ Completed {layout_name} - Found {len(detected_masters)} matches") + if detected_masters: + print(f" Matches: {', '.join(detected_masters)}") + + return { + 'detected_masters': detected_masters, + 'detected_master_ids': detected_masters, + 'detected_master_filenames': [f"{mid}.jpg" for mid in detected_masters ], + 'analysis': analysis, + 'similarities': dict(top_similarities), + 'processing_mode': 'vector_embedding_whole', + 'similarity_threshold': self.similarity_threshold, + 'embedding_dimensions': 1408 + } + + def detect_with_splitting(self, layout_path: str, layout_name: str) -> Dict: + """Detect masters using image splitting and crop comparison""" + print(f" Processing with grid splitting: {layout_name}") + + # Step 1: Split the image into crops + crops = self.split_image_by_grid(layout_path) + + if not crops: + print(f" No valid crops found, falling back to whole image") + return self.detect_whole_image(layout_path, layout_name) + + # Step 2: Save debug crops if needed + self.save_crop_debug_images(layout_path, crops) + + # Step 3: Process each crop + all_crop_results = [] + crop_similarities = {} + + for i, crop in enumerate(crops): + print(f" Processing crop {i+1}/{len(crops)} ({crop['crop_id']})") + + # Generate embedding for this crop + crop_embedding = self.generate_crop_embedding(layout_path, crop) + + if crop_embedding is None: + continue + + # Compare crop against all masters + crop_result = { + 'crop_id': crop['crop_id'], + 'crop_info': crop, + 'similarities': {}, + 'matches': [] + } + + for master_id, master_embedding in self.master_embeddings.items(): + similarity = self.compute_cosine_similarity(crop_embedding, master_embedding) + crop_result['similarities'][master_id] = similarity + + if similarity >= self.similarity_threshold: + crop_result['matches'].append(master_id) + + # Sort matches by similarity + crop_result['matches'].sort(key=lambda x: crop_result['similarities'][x], reverse=True) + all_crop_results.append(crop_result) + + # Track all similarities for global analysis + for master_id, sim in crop_result['similarities'].items(): + if master_id not in crop_similarities or sim > crop_similarities[master_id]: + crop_similarities[master_id] = sim + + # Step 4: Aggregate results across all crops + detected_masters = [] + final_similarities = {} + + # Collect all unique matches with their best similarity scores + for crop_result in all_crop_results: + for match in crop_result['matches']: + if match not in detected_masters: + detected_masters.append(match) + final_similarities[match] = crop_result['similarities'][match] + else: + # Update with higher similarity if found + if crop_result['similarities'][match] > final_similarities[match]: + final_similarities[match] = crop_result['similarities'][match] + + # Sort by best similarity + detected_masters.sort(key=lambda x: final_similarities.get(x, 0), reverse=True) + + # Get top overall similarities for analysis + top_similarities = sorted(crop_similarities.items(), key=lambda x: x[1], reverse=True)[:5] + + # Create analysis + analysis_parts = [ + f"Grid-based splitting analysis using Google Vertex AI embeddings (1408 dimensions).", + f"Split into {len(crops)} crops, processed {len(all_crop_results)} successfully.", + f"Similarity threshold: {self.similarity_threshold}", + f"Found {len(detected_masters)} unique matches across all crops.", + f"Top 5 similarities: " + ", ".join([f"{mid}({sim:.3f})" for mid, sim in top_similarities]) + ] + analysis = " ".join(analysis_parts) + + print(f"✓ Completed {layout_name} - Found {len(detected_masters)} matches across {len(crops)} crops") + if detected_masters: + print(f" Matches: {', '.join(detected_masters)}") + + return { + 'detected_masters': detected_masters, + 'detected_master_ids': detected_masters, + 'detected_master_filenames': [f"{mid}.jpg" for mid in detected_masters ], + 'analysis': analysis, + 'similarities': dict(top_similarities), + 'processing_mode': 'vector_embedding_grid', + 'similarity_threshold': self.similarity_threshold, + 'embedding_dimensions': 1408, + 'crops_processed': len(all_crop_results), + 'total_crops': len(crops), + 'crop_results': all_crop_results # Detailed crop-by-crop results + } + + def process_all_layouts_vector(self, limit: Optional[int] = None, specific_file: Optional[str] = None) -> Dict: + """Process all layout images using vector embeddings""" + print("Starting vector-based batch processing...") + + # Load master images + self.load_master_images() + + # Generate master embeddings (with caching) + self.generate_master_embeddings() + + if not self.master_embeddings: + raise Exception("No master embeddings available") + + # Get layout files + if specific_file: + # Process only the specific file + layout_files = [self.layouts_path / specific_file] + if not layout_files[0].exists(): + raise FileNotFoundError(f"Layout file {specific_file} not found in {self.layouts_path}") + print(f"Processing specific file: {specific_file}") + else: + layout_files = list(self.layouts_path.glob("*.jpg")) + layout_files.sort() # Ensure consistent alphabetical ordering + + print(f"Found {len(layout_files)} layout files") + if layout_files: + print(f"First file will be: {layout_files[0].name}") + + if limit: + layout_files = layout_files[:limit] + print(f"Processing first {limit} layouts only") + + total_layouts = len(layout_files) + print(f"Processing {total_layouts} layout images using vector embeddings") + print("=" * 60) + + results = {} + start_time = time.time() + + for i, layout_path in enumerate(layout_files, 1): + layout_id = layout_path.stem + + # Detect images using vector similarity + result = self.detect_masters_in_layout_vector(str(layout_path), i, total_layouts) + + layout_result = { + 'layout_filename': layout_path.name, + 'detected_master_ids': result['detected_master_ids'], + 'detected_master_filenames': result['detected_master_filenames'], + 'analysis': result['analysis'], + 'processing_mode': 'vector_embedding', + 'similarity_threshold': self.similarity_threshold, + 'embedding_dimensions': 1408 + } + + if 'similarities' in result: + layout_result['similarities'] = result['similarities'] + + if 'error' in result: + layout_result['error'] = result['error'] + + results[layout_id] = layout_result + + # Progress update + elapsed = time.time() - start_time + avg_time = elapsed / i + remaining = (total_layouts - i) * avg_time + + print(f"Progress: {i}/{total_layouts} ({i/total_layouts*100:.1f}%) - Est. remaining: {remaining/60:.1f} min") + + # Save progress periodically + if i % 20 == 0: + self.save_results(results, f"vector_progress_{i}") + + total_time = time.time() - start_time + print(f"\n✓ Completed vector processing of {total_layouts} layouts in {total_time/60:.1f} minutes") + print(f"Average time per layout: {total_time/total_layouts:.1f} seconds") + return results + + def save_results(self, results: Dict, filename: str = "vector_detection_results") -> str: + """Save results to JSON file""" + output_path = self.results_path / f"{filename}.json" + + # Add metadata + output_data = { + 'metadata': { + 'total_layouts_processed': len(results), + 'total_master_images': len(self.master_images), + 'master_images_available': list(self.master_files.keys()), + 'processing_mode': 'vector_embedding', + 'similarity_threshold': self.similarity_threshold, + 'embedding_dimensions': 1408, + 'embedding_model': 'Google Vertex AI multimodalembedding@001' + }, + 'results': results + } + + with open(output_path, 'w') as f: + json.dump(output_data, f, indent=2) + + print(f"Results saved to: {output_path}") + return str(output_path) + + def generate_summary(self, results: Dict) -> Dict: + """Generate summary statistics for vector detection""" + total_layouts = len(results) + layouts_with_matches = sum(1 for r in results.values() if r['detected_master_ids']) + + # Count master image occurrences + master_counts = {} + for result in results.values(): + for master_id in result['detected_master_ids']: + master_counts[master_id] = master_counts.get(master_id, 0) + 1 + + summary = { + 'total_layouts_processed': total_layouts, + 'layouts_with_matches': layouts_with_matches, + 'layouts_without_matches': total_layouts - layouts_with_matches, + 'master_image_usage': master_counts, + 'most_used_masters': sorted(master_counts.items(), key=lambda x: x[1], reverse=True)[:10], + 'processing_mode': 'vector_embedding', + 'similarity_threshold': self.similarity_threshold, + 'embedding_dimensions': 1408 + } + + return summary \ No newline at end of file