#!/usr/bin/env python3 """ Video Master-Adaptation Detection CLI A proof-of-concept tool to detect which master video files were used to create adaptation videos. """ import sys import click from pathlib import Path from rich.console import Console from rich.table import Table from rich import box # Add src to path sys.path.insert(0, str(Path(__file__).parent / "src")) from video_matcher.matcher import VideoMatcher console = Console() @click.group() @click.version_option(version="0.1.0") def cli(): """Video Master-Adaptation Detection Tool""" pass @cli.command() @click.argument('video_path', type=click.Path(exists=True)) @click.option('--id', 'master_id', help='Custom ID for the master video') def add_master(video_path, master_id): """Add a master video to the library.""" try: matcher = VideoMatcher() matcher.add_master(video_path, master_id) console.print("[green]✓[/green] Master video added successfully") except Exception as e: console.print(f"[red]✗[/red] Error: {e}") sys.exit(1) @cli.command() def list_masters(): """List all master videos in the library.""" try: matcher = VideoMatcher() masters = matcher.list_masters() if not masters: console.print("[yellow]No master videos found.[/yellow]") console.print("Use 'add-master' to add master videos.") return table = Table(title="Master Videos", box=box.ROUNDED) table.add_column("ID", style="cyan") table.add_column("Filename", style="green") table.add_column("Duration", style="yellow") table.add_column("Path", style="dim") for master in masters: duration = f"{master.get('duration', 0):.1f}s" table.add_row( master['master_id'], master['filename'], duration, master['path'] ) console.print(table) except Exception as e: console.print(f"[red]✗[/red] Error: {e}") sys.exit(1) @cli.command() @click.argument('video_path', type=click.Path(exists=True)) @click.option('--threshold', '-t', default=0.80, type=float, help='Minimum percentage of matching frames (0-1, e.g., 0.80 = 80%)') @click.option('--frame-threshold', '-f', default=0.80, type=float, help='Similarity threshold for individual frames (0-1)') @click.option('--min-avg-similarity', '-m', default=0.90, type=float, help='Minimum average similarity of matched frames (0-1)') def match(video_path, threshold, frame_threshold, min_avg_similarity): """Match an adaptation video against master videos using spatial-only matching. This method ignores temporal order and can handle: - Speed changes (slow-mo, time-lapse) - Shot reordering - Non-linear edits - Different durations """ try: matcher = VideoMatcher() # Check if we have any masters masters = matcher.list_masters() if not masters: console.print("[red]✗[/red] No master videos found in library.") console.print("Use 'add-master' to add master videos first.") sys.exit(1) # Perform matching matches = matcher.match_adaptation(video_path, threshold=threshold, frame_threshold=frame_threshold, min_avg_similarity=min_avg_similarity) if not matches: console.print(f"\n[yellow]No matches found above threshold {threshold}[/yellow]") return # Display results console.print(f"\n[green]Found {len(matches)} master(s) matching this adaptation:[/green]\n") table = Table(box=box.ROUNDED) table.add_column("Rank", style="cyan", justify="right") table.add_column("Master ID", style="green") table.add_column("Duration", style="dim", justify="right") table.add_column("Video Match", style="yellow", justify="right") table.add_column("Frames", style="blue", justify="center") table.add_column("Combined", style="cyan", justify="right") table.add_column("Confidence", style="bold") table.add_column("Method", style="magenta") for idx, match in enumerate(matches, 1): confidence = match['confidence'] matching_method = match.get('matching_method', 'perceptual_hash') # Color code confidence if confidence in ["Very High", "High"]: conf_style = "green" elif confidence == "Medium": conf_style = "yellow" else: conf_style = "red" # Method display method_display = "AI Vision" if matching_method == "ai_vision" else "Hash" table.add_row( str(idx), match['master_id'], f"{match['master_duration']:.0f}s", f"{match['video_percentage']:.1f}%", f"{match['matching_frames']}/{match['total_frames']}", f"{match['combined_score']:.3f}", f"[{conf_style}]{confidence}[/{conf_style}]", method_display ) console.print(table) # Show summary if matches: top = matches[0] console.print(f"\n[bold]Best Match:[/bold]") console.print(f" Master: [green]{top['master_id']}[/green]") console.print(f" Duration: {top['master_duration']:.0f}s") console.print(f" Video frames matched: {top['video_percentage']:.1f}% ({top['matching_frames']}/{top['total_frames']} frames)") console.print(f" Average frame similarity: {top['average_frame_similarity']:.1%}") console.print(f" Audio similarity: {top['audio_similarity']:.3f}") console.print(f" Combined confidence: {top['combined_score']:.1%}") # Show AI vision specific info if applicable if top.get('matching_method') == 'ai_vision': console.print(f"\n[bold magenta]AI Vision Analysis:[/bold magenta]") console.print(f" Method: GPT-4V (OpenAI)") if top.get('is_crop'): console.print(f" Format: [yellow]Adaptation is cropped from master[/yellow]") if top.get('ai_explanation'): console.print(f"\n [dim]AI Reasoning:[/dim]") # Show only the explanation part, not the full structured response explanation = top['ai_explanation'] if 'EXPLANATION:' in explanation: explanation = explanation.split('EXPLANATION:')[1].strip() # Limit to first 200 chars for brevity if len(explanation) > 200: explanation = explanation[:200] + "..." console.print(f" [dim]{explanation}[/dim]") if len(matches) > 1: # Group by score to show ties top_score = top['combined_score'] tied_matches = [m for m in matches if m['combined_score'] == top_score] if len(tied_matches) > 1: console.print(f"\n[bold yellow]Note:[/bold yellow] {len(tied_matches)} masters have identical scores.") console.print(f" Ranked by duration (longest master selected as likely source):") for match in tied_matches[:5]: # Show top 5 ties console.print(f" • {match['master_id']} ({match['master_duration']:.0f}s)") other_matches = [m for m in matches[1:] if m['combined_score'] != top_score] if other_matches: console.print(f"\n[bold]Other Potential Matches:[/bold]") for match in other_matches[:10]: # Limit to top 10 console.print(f" • {match['master_id']} ({match['master_duration']:.0f}s): {match['video_percentage']:.1f}% video, {match['combined_score']:.1%} combined") console.print(f"\n[dim]Path: {top['master_path']}[/dim]") except Exception as e: console.print(f"[red]✗[/red] Error: {e}") import traceback traceback.print_exc() sys.exit(1) @cli.command() def clear(): """Clear all master videos from the library.""" if click.confirm('Are you sure you want to clear all masters?'): try: matcher = VideoMatcher() matcher.clear_masters() console.print("[green]✓[/green] All masters cleared") except Exception as e: console.print(f"[red]✗[/red] Error: {e}") sys.exit(1) @cli.command() @click.argument('folder_path', type=click.Path(exists=True)) @click.option('--threshold', '-t', default=0.80, type=float, help='Minimum percentage match (0-1)') @click.option('--frame-threshold', '-f', default=0.80, type=float, help='Frame similarity threshold (0-1)') @click.option('--min-avg-similarity', '-m', default=0.90, type=float, help='Minimum average similarity of matched frames (0-1)') @click.option('--output', '-o', default=None, help='Output HTML file path') def batch_match(folder_path, threshold, frame_threshold, min_avg_similarity, output): """Match all videos in a folder and generate HTML report. This will process all video files in the specified folder, match them against your master videos, and generate a beautiful HTML report showing which masters were used for each adaptation. """ try: from pathlib import Path from datetime import datetime from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn folder_path = Path(folder_path) # Common video extensions VIDEO_EXTENSIONS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv', '.wmv', '.m4v'} # Find all video files video_files = [] for ext in VIDEO_EXTENSIONS: video_files.extend(folder_path.glob(f"*{ext}")) video_files.extend(folder_path.glob(f"*{ext.upper()}")) if not video_files: console.print(f"[yellow]No video files found in {folder_path}[/yellow]") return console.print(f"\n[bold]Found {len(video_files)} video file(s) to process[/bold]\n") # Initialize matcher matcher = VideoMatcher() # Check if we have masters masters = matcher.list_masters() if not masters: console.print("[red]✗[/red] No master videos found in library.") console.print("Use 'add-master' to add masters first.") sys.exit(1) console.print(f"[cyan]Comparing against {len(masters)} master(s)...[/cyan]\n") # Process each video results = [] with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), console=console ) as progress: task = progress.add_task("[cyan]Processing adaptations...", total=len(video_files)) for video_file in video_files: progress.update(task, description=f"[cyan]Processing {video_file.name}...") try: matches = matcher.match_adaptation( str(video_file), threshold=threshold, frame_threshold=frame_threshold, min_avg_similarity=min_avg_similarity ) results.append({ 'adaptation_name': video_file.name, 'adaptation_path': str(video_file), 'matches': matches, 'error': None }) except Exception as e: console.print(f"[red]✗[/red] Error processing {video_file.name}: {e}") results.append({ 'adaptation_name': video_file.name, 'adaptation_path': str(video_file), 'matches': [], 'error': str(e) }) progress.advance(task) # Generate output filename if not specified if output is None: timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') output = f"matching_report_{timestamp}.html" output_path = Path(output) # Generate HTML report (inline to avoid import issues) console.print(f"\n[cyan]Generating HTML report...[/cyan]") # Import the generation function from batch_match sys.path.insert(0, str(Path(__file__).parent)) from batch_match import generate_html_report generate_html_report(results, output_path, str(folder_path)) # Summary console.print(f"\n[bold green]✓ Report generated successfully![/bold green]") console.print(f"\n[bold]Summary:[/bold]") console.print(f" Total adaptations: {len(results)}") console.print(f" Matched: {sum(1 for r in results if r['matches'])}") console.print(f" No matches: {sum(1 for r in results if not r['matches'])}") console.print(f" Total master matches: {sum(len(r['matches']) for r in results)}") console.print(f"\n[bold cyan]📄 Report saved to:[/bold cyan] {output_path.absolute()}") console.print(f"\n[dim]Open in browser: file://{output_path.absolute()}[/dim]") except Exception as e: console.print(f"[red]✗[/red] Error: {e}") import traceback traceback.print_exc() sys.exit(1) @cli.command() def status(): """Show system status and configuration.""" console.print("[bold]Video Master Detection - System Status[/bold]\n") # Check dependencies table = Table(title="Dependencies", box=box.SIMPLE) table.add_column("Component", style="cyan") table.add_column("Status", style="green") # FFmpeg import shutil ffmpeg_available = shutil.which('ffmpeg') is not None table.add_row("FFmpeg", "✓ Available" if ffmpeg_available else "✗ Not found") # Chromaprint try: import acoustid table.add_row("Chromaprint/AcoustID", "✓ Available") except ImportError: table.add_row("Chromaprint/AcoustID", "✗ Not available") # TMK try: import tmkpy table.add_row("TMK", "✓ Available") except ImportError: table.add_row("TMK", "✗ Not available (using basic hashing)") console.print(table) # Show library stats matcher = VideoMatcher() masters = matcher.list_masters() console.print(f"\n[bold]Library Statistics[/bold]") console.print(f" Master videos: {len(masters)}") if not ffmpeg_available: console.print("\n[yellow]⚠ Warning: FFmpeg not found. Please install FFmpeg.[/yellow]") if __name__ == '__main__': cli()