video-master-adapt/batch_match.py
nickviljoen 891c36bbfb Add standalone desktop application with web interface
Major Features:
- 🖥️ Standalone desktop app (VideoMatcher.app) - double-click to run
- 🎨 Black & gold branded UI (Montserrat font, #FFC407 accent)
- 📁 Local file browser for master/adaptation folders
-  Fast mode processing (10-20x faster, disables AKAZE/AI Vision)
- 🤖 Smart AI Vision fallback (auto-retry when no matches found)
- 📊 Real-time progress bars (fingerprinting & matching)
- 💾 Local processing (no cloud, no authentication)
- 📤 CSV export with master filenames

Web Application (Enterprise):
- 🌐 Flask web app with Azure AD authentication
- 📦 Box.com integration for cloud storage
- 🐳 Docker support for deployment
- 🔐 JWT validation with httpOnly cookies
- 🎯 REST API endpoints

Enhancements:
- Fixed master filename lookup (was showing "Unknown")
- Automatic fingerprint recovery (detects missing files)
- Improved CSV format (master file next to adaptation)
- Port conflict handling (auto-finds available port)
- Environment variable fixes for standalone mode

Documentation:
- Updated README with standalone app section
- Added 10+ guide documents (UI improvements, fingerprint recovery, etc.)
- Build instructions with PyInstaller
- Comprehensive troubleshooting guide

Technical:
- PyInstaller build configuration (video_matcher.spec)
- Launcher with environment setup (launcher.py)
- Mock authentication for standalone mode
- Video matcher service layer
- Metadata parser and AKAZE video matching

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-31 09:49:04 +02:00

519 lines
17 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Batch match adaptations from a folder and generate HTML report.
"""
import sys
import json
from pathlib import Path
from datetime import datetime
from rich.console import Console
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
# Add src to path
sys.path.insert(0, str(Path(__file__).parent / "src"))
from video_matcher.matcher import VideoMatcher
console = Console()
# Common video file extensions
VIDEO_EXTENSIONS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv', '.wmv', '.m4v'}
def generate_html_report(results, output_path, folder_path):
"""Generate an HTML report from matching results."""
html_content = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Video Matching Report - {datetime.now().strftime('%Y-%m-%d %H:%M')}</title>
<style>
* {{
margin: 0;
padding: 0;
box-sizing: border-box;
}}
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 20px;
color: #333;
}}
.container {{
max-width: 1400px;
margin: 0 auto;
}}
.header {{
background: white;
padding: 30px;
border-radius: 15px;
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
margin-bottom: 30px;
}}
.header h1 {{
color: #667eea;
margin-bottom: 10px;
font-size: 2.5em;
}}
.header .meta {{
color: #666;
font-size: 0.95em;
}}
.summary {{
background: white;
padding: 25px;
border-radius: 15px;
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
margin-bottom: 30px;
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
}}
.stat {{
text-align: center;
padding: 15px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
border-radius: 10px;
color: white;
}}
.stat-value {{
font-size: 2.5em;
font-weight: bold;
display: block;
}}
.stat-label {{
font-size: 0.9em;
opacity: 0.9;
margin-top: 5px;
}}
.adaptation {{
background: white;
padding: 25px;
border-radius: 15px;
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
margin-bottom: 25px;
transition: transform 0.2s;
}}
.adaptation:hover {{
transform: translateY(-2px);
box-shadow: 0 15px 40px rgba(0,0,0,0.25);
}}
.adaptation-header {{
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 20px;
padding-bottom: 15px;
border-bottom: 2px solid #f0f0f0;
}}
.adaptation-name {{
font-size: 1.4em;
font-weight: 600;
color: #333;
word-break: break-all;
}}
.match-count {{
background: #667eea;
color: white;
padding: 8px 16px;
border-radius: 20px;
font-weight: 600;
font-size: 0.9em;
}}
.no-matches {{
background: #ff6b6b;
}}
.matches-list {{
display: grid;
gap: 15px;
}}
.match-item {{
background: #f8f9fa;
padding: 20px;
border-radius: 10px;
border-left: 5px solid #667eea;
transition: all 0.2s;
}}
.match-item:hover {{
background: #e9ecef;
border-left-width: 8px;
}}
.match-header {{
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 15px;
}}
.master-name {{
font-weight: 600;
color: #333;
font-size: 1.1em;
}}
.confidence-badge {{
padding: 6px 14px;
border-radius: 15px;
font-weight: 600;
font-size: 0.85em;
text-transform: uppercase;
letter-spacing: 0.5px;
}}
.confidence-very-high {{ background: #51cf66; color: white; }}
.confidence-high {{ background: #69db7c; color: white; }}
.confidence-medium {{ background: #ffd43b; color: #333; }}
.confidence-low {{ background: #ff922b; color: white; }}
.confidence-very-low {{ background: #ff6b6b; color: white; }}
.match-details {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
gap: 12px;
margin-top: 12px;
}}
.detail-item {{
background: white;
padding: 10px;
border-radius: 8px;
text-align: center;
}}
.detail-label {{
font-size: 0.8em;
color: #666;
text-transform: uppercase;
letter-spacing: 0.5px;
margin-bottom: 5px;
}}
.detail-value {{
font-size: 1.2em;
font-weight: 600;
color: #667eea;
}}
.no-match-message {{
text-align: center;
padding: 30px;
color: #999;
font-style: italic;
}}
.error-message {{
background: #ffe0e0;
border-left: 5px solid #ff6b6b;
padding: 15px;
border-radius: 8px;
color: #c92a2a;
}}
.footer {{
text-align: center;
padding: 20px;
color: white;
margin-top: 30px;
font-size: 0.9em;
}}
.progress-bar {{
width: 100%;
height: 8px;
background: #e9ecef;
border-radius: 4px;
overflow: hidden;
margin-top: 8px;
}}
.progress-fill {{
height: 100%;
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
transition: width 0.3s ease;
}}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🎬 Video Matching Report</h1>
<div class="meta">
<strong>Generated:</strong> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}<br>
<strong>Source Folder:</strong> {folder_path}
</div>
</div>
<div class="summary">
<div class="stat">
<span class="stat-value">{len(results)}</span>
<span class="stat-label">Adaptations Processed</span>
</div>
<div class="stat">
<span class="stat-value">{sum(1 for r in results if r['matches'])}</span>
<span class="stat-label">Matched</span>
</div>
<div class="stat">
<span class="stat-value">{sum(1 for r in results if not r['matches'])}</span>
<span class="stat-label">No Matches</span>
</div>
<div class="stat">
<span class="stat-value">{sum(len(r['matches']) for r in results)}</span>
<span class="stat-label">Total Master Matches</span>
</div>
<div class="stat">
<span class="stat-value">{sum(1 for r in results for m in r.get('matches', []) if m.get('matching_method') == 'akaze')}</span>
<span class="stat-label">AKAZE Matches</span>
</div>
<div class="stat">
<span class="stat-value">{sum(1 for r in results for m in r.get('matches', []) if m.get('matching_method') == 'ai_vision')}</span>
<span class="stat-label">AI Vision Matches</span>
</div>
</div>
"""
# Add each adaptation result
for result in results:
adaptation_name = result['adaptation_name']
matches = result['matches']
error = result.get('error')
match_class = 'no-matches' if not matches else ''
match_count = len(matches) if matches else 0
html_content += f"""
<div class="adaptation">
<div class="adaptation-header">
<div class="adaptation-name">{adaptation_name}</div>
<div class="match-count {match_class}">
{match_count} Match{'es' if match_count != 1 else ''}
</div>
</div>
"""
if error:
html_content += f"""
<div class="error-message">
<strong>Error:</strong> {error}
</div>
"""
elif not matches:
html_content += """
<div class="no-match-message">
No matching masters found above threshold
</div>
"""
else:
html_content += """
<div class="matches-list">
"""
for idx, match in enumerate(matches, 1):
confidence = match['confidence'].lower().replace(' ', '-')
html_content += f"""
<div class="match-item">
<div class="match-header">
<div class="master-name">
#{idx} {match['master_id']}
</div>
<div class="confidence-badge confidence-{confidence}">
{match['confidence']}
</div>
</div>
<div class="match-details">
<div class="detail-item">
<div class="detail-label">Duration</div>
<div class="detail-value">{match['master_duration']:.0f}s</div>
</div>
<div class="detail-item">
<div class="detail-label">Video Match</div>
<div class="detail-value">{match['video_percentage']:.1f}%</div>
</div>
<div class="detail-item">
<div class="detail-label">Frames</div>
<div class="detail-value">{match['matching_frames']}/{match['total_frames']}</div>
</div>
<div class="detail-item">
<div class="detail-label">Combined Score</div>
<div class="detail-value">{match['combined_score']:.1%}</div>
</div>
<div class="detail-item">
<div class="detail-label">Method</div>
<div class="detail-value" style="font-size: 0.9em;">
{match.get('matching_method', 'hash').upper().replace('_', ' ')}
</div>
</div>
</div>
<div class="progress-bar">
<div class="progress-fill" style="width: {match['video_percentage']:.0f}%"></div>
</div>
</div>
"""
html_content += """
</div>
"""
html_content += """
</div>
"""
html_content += """
<div class="footer">
Generated by Video Master-Adaptation Detection Tool<br>
🚀 Enhanced with AKAZE feature matching + metadata filtering + AI Vision fallback
</div>
</div>
</body>
</html>
"""
# Write HTML file
with open(output_path, 'w', encoding='utf-8') as f:
f.write(html_content)
def batch_match_folder(folder_path, threshold=0.80, frame_threshold=0.80, min_avg_similarity=0.90, output_file=None):
"""
Match all videos in a folder against masters and generate report.
Args:
folder_path: Path to folder containing adaptation videos
threshold: Minimum percentage match threshold
frame_threshold: Frame similarity threshold
min_avg_similarity: Minimum average similarity of matched frames
output_file: Output HTML file path (default: auto-generated)
"""
folder_path = Path(folder_path)
if not folder_path.exists():
console.print(f"[red]✗[/red] Folder not found: {folder_path}")
return
if not folder_path.is_dir():
console.print(f"[red]✗[/red] Not a directory: {folder_path}")
return
# Find all video files
video_files = []
for ext in VIDEO_EXTENSIONS:
video_files.extend(folder_path.glob(f"*{ext}"))
video_files.extend(folder_path.glob(f"*{ext.upper()}"))
if not video_files:
console.print(f"[yellow]No video files found in {folder_path}[/yellow]")
return
console.print(f"\n[bold]Found {len(video_files)} video file(s) to process[/bold]\n")
# Initialize matcher
matcher = VideoMatcher()
# Check if we have masters
masters = matcher.list_masters()
if not masters:
console.print("[red]✗[/red] No master videos found in library.")
console.print("Use 'python cli.py add-master' to add masters first.")
return
console.print(f"[cyan]Comparing against {len(masters)} master(s)...[/cyan]\n")
# Process each video
results = []
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
console=console
) as progress:
task = progress.add_task("[cyan]Processing adaptations...", total=len(video_files))
for video_file in video_files:
progress.update(task, description=f"[cyan]Processing {video_file.name}...")
try:
matches = matcher.match_adaptation(
str(video_file),
threshold=threshold,
frame_threshold=frame_threshold,
min_avg_similarity=min_avg_similarity
)
results.append({
'adaptation_name': video_file.name,
'adaptation_path': str(video_file),
'matches': matches,
'error': None
})
except Exception as e:
console.print(f"[red]✗[/red] Error processing {video_file.name}: {e}")
results.append({
'adaptation_name': video_file.name,
'adaptation_path': str(video_file),
'matches': [],
'error': str(e)
})
progress.advance(task)
# Generate output filename if not specified
if output_file is None:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_file = f"matching_report_{timestamp}.html"
output_path = Path(output_file)
# Generate HTML report
console.print(f"\n[cyan]Generating HTML report...[/cyan]")
generate_html_report(results, output_path, str(folder_path))
# Summary
console.print(f"\n[bold green]✓ Report generated successfully![/bold green]")
console.print(f"\n[bold]Summary:[/bold]")
console.print(f" Total adaptations: {len(results)}")
console.print(f" Matched: {sum(1 for r in results if r['matches'])}")
console.print(f" No matches: {sum(1 for r in results if not r['matches'])}")
console.print(f" Total master matches: {sum(len(r['matches']) for r in results)}")
console.print(f"\n[bold cyan]📄 Report saved to:[/bold cyan] {output_path.absolute()}")
console.print(f"\n[dim]Open in browser: file://{output_path.absolute()}[/dim]")
if __name__ == '__main__':
import click
@click.command()
@click.argument('folder_path', type=click.Path(exists=True))
@click.option('--threshold', '-t', default=0.80, type=float, help='Minimum percentage match (0-1)')
@click.option('--frame-threshold', '-f', default=0.80, type=float, help='Frame similarity threshold (0-1)')
@click.option('--min-avg-similarity', '-m', default=0.90, type=float, help='Minimum average similarity of matched frames (0-1)')
@click.option('--output', '-o', default=None, help='Output HTML file path')
def main(folder_path, threshold, frame_threshold, min_avg_similarity, output):
"""Batch match all videos in a folder and generate HTML report."""
batch_match_folder(folder_path, threshold, frame_threshold, min_avg_similarity, output)
main()