61 lines
No EOL
2.9 KiB
Python
61 lines
No EOL
2.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Fix for stalled parallel processing - reduces bottleneck by adjusting worker counts
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
import os
|
|
|
|
def main():
|
|
print("🔧 Parallel Processing Stall Fix")
|
|
print("=" * 50)
|
|
|
|
print("\n📊 ANALYSIS OF THE STALL:")
|
|
print("1. Inlier analysis queue has consistent 3 items (bottleneck)")
|
|
print("2. Each inlier analysis takes 60-167 seconds (very slow)")
|
|
print("3. 4 layout workers are waiting for 1 serial inlier analysis")
|
|
print("4. High swap usage (72.7%) triggering unnecessary memory pressure")
|
|
print("5. Memory pressure is reducing workers when it shouldn't")
|
|
|
|
print("\n🚀 RECOMMENDED SOLUTIONS:")
|
|
print("\n1. IMMEDIATE FIX (restart with reduced workers):")
|
|
print(" python cli.py --all --hybrid --split-simple --refinement-mode \\")
|
|
print(" --inlier-threshold 0.15 --inlier-ratio-threshold 0.2 \\")
|
|
print(" --fallback-one-at-a-time --enable-cost-tracking --cost-report \\")
|
|
print(" --parallel-layouts --layout-workers 2")
|
|
print(" (Reduces from 4 to 2 layout workers to reduce queue pressure)")
|
|
|
|
print("\n2. CONSERVATIVE FIX (single layout worker):")
|
|
print(" python cli.py --all --hybrid --split-simple --refinement-mode \\")
|
|
print(" --inlier-threshold 0.15 --inlier-ratio-threshold 0.2 \\")
|
|
print(" --fallback-one-at-a-time --enable-cost-tracking --cost-report \\")
|
|
print(" --parallel-layouts --layout-workers 1")
|
|
print(" (Essentially sequential with queue coordination)")
|
|
|
|
print("\n3. OPTIMAL FIX (disable parallel layouts for now):")
|
|
print(" python cli.py --all --hybrid --split-simple --refinement-mode \\")
|
|
print(" --inlier-threshold 0.15 --inlier-ratio-threshold 0.2 \\")
|
|
print(" --fallback-one-at-a-time --enable-cost-tracking --cost-report")
|
|
print(" (Use original sequential processing - more reliable)")
|
|
|
|
print("\n💡 TECHNICAL EXPLANATIONS:")
|
|
print("- The 72.7% swap usage is not necessarily bad if system is responsive")
|
|
print("- Inlier analysis is CPU/memory intensive and benefits from being serial")
|
|
print("- Queue bottleneck occurs when producers (layout workers) > consumers (1 inlier worker)")
|
|
print("- Each split analysis can take 60-167s, making parallelism counterproductive")
|
|
|
|
print("\n⚙️ LONG-TERM IMPROVEMENTS IMPLEMENTED:")
|
|
print("- More lenient memory pressure thresholds")
|
|
print("- Queue pressure detection and automatic worker reduction")
|
|
print("- Stall detection with timeout handling")
|
|
print("- Better progress monitoring and diagnostics")
|
|
|
|
print("\n🎯 RECOMMENDATION:")
|
|
print("For your current dataset, use option 1 (2 layout workers) or option 3 (sequential).")
|
|
print("The parallel implementation works but needs tuning for your specific workload.")
|
|
|
|
print("\n" + "=" * 50)
|
|
|
|
if __name__ == "__main__":
|
|
main() |