video-master-adapt/cli.py
2025-10-15 16:25:04 +02:00

382 lines
15 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Video Master-Adaptation Detection CLI
A proof-of-concept tool to detect which master video files were used
to create adaptation videos.
"""
import sys
import click
from pathlib import Path
from rich.console import Console
from rich.table import Table
from rich import box
# Add src to path
sys.path.insert(0, str(Path(__file__).parent / "src"))
from video_matcher.matcher import VideoMatcher
console = Console()
@click.group()
@click.version_option(version="0.1.0")
def cli():
"""Video Master-Adaptation Detection Tool"""
pass
@cli.command()
@click.argument('video_path', type=click.Path(exists=True))
@click.option('--id', 'master_id', help='Custom ID for the master video')
def add_master(video_path, master_id):
"""Add a master video to the library."""
try:
matcher = VideoMatcher()
matcher.add_master(video_path, master_id)
console.print("[green]✓[/green] Master video added successfully")
except Exception as e:
console.print(f"[red]✗[/red] Error: {e}")
sys.exit(1)
@cli.command()
def list_masters():
"""List all master videos in the library."""
try:
matcher = VideoMatcher()
masters = matcher.list_masters()
if not masters:
console.print("[yellow]No master videos found.[/yellow]")
console.print("Use 'add-master' to add master videos.")
return
table = Table(title="Master Videos", box=box.ROUNDED)
table.add_column("ID", style="cyan")
table.add_column("Filename", style="green")
table.add_column("Duration", style="yellow")
table.add_column("Path", style="dim")
for master in masters:
duration = f"{master.get('duration', 0):.1f}s"
table.add_row(
master['master_id'],
master['filename'],
duration,
master['path']
)
console.print(table)
except Exception as e:
console.print(f"[red]✗[/red] Error: {e}")
sys.exit(1)
@cli.command()
@click.argument('video_path', type=click.Path(exists=True))
@click.option('--threshold', '-t', default=0.80, type=float, help='Minimum percentage of matching frames (0-1, e.g., 0.80 = 80%)')
@click.option('--frame-threshold', '-f', default=0.80, type=float, help='Similarity threshold for individual frames (0-1)')
@click.option('--min-avg-similarity', '-m', default=0.90, type=float, help='Minimum average similarity of matched frames (0-1)')
def match(video_path, threshold, frame_threshold, min_avg_similarity):
"""Match an adaptation video against master videos using spatial-only matching.
This method ignores temporal order and can handle:
- Speed changes (slow-mo, time-lapse)
- Shot reordering
- Non-linear edits
- Different durations
"""
try:
matcher = VideoMatcher()
# Check if we have any masters
masters = matcher.list_masters()
if not masters:
console.print("[red]✗[/red] No master videos found in library.")
console.print("Use 'add-master' to add master videos first.")
sys.exit(1)
# Perform matching
matches = matcher.match_adaptation(video_path, threshold=threshold, frame_threshold=frame_threshold, min_avg_similarity=min_avg_similarity)
if not matches:
console.print(f"\n[yellow]No matches found above threshold {threshold}[/yellow]")
return
# Display results
console.print(f"\n[green]Found {len(matches)} master(s) matching this adaptation:[/green]\n")
table = Table(box=box.ROUNDED)
table.add_column("Rank", style="cyan", justify="right")
table.add_column("Master ID", style="green")
table.add_column("Duration", style="dim", justify="right")
table.add_column("Video Match", style="yellow", justify="right")
table.add_column("Frames", style="blue", justify="center")
table.add_column("Combined", style="cyan", justify="right")
table.add_column("Confidence", style="bold")
table.add_column("Method", style="magenta")
for idx, match in enumerate(matches, 1):
confidence = match['confidence']
matching_method = match.get('matching_method', 'perceptual_hash')
# Color code confidence
if confidence in ["Very High", "High"]:
conf_style = "green"
elif confidence == "Medium":
conf_style = "yellow"
else:
conf_style = "red"
# Method display
method_display = "AI Vision" if matching_method == "ai_vision" else "Hash"
table.add_row(
str(idx),
match['master_id'],
f"{match['master_duration']:.0f}s",
f"{match['video_percentage']:.1f}%",
f"{match['matching_frames']}/{match['total_frames']}",
f"{match['combined_score']:.3f}",
f"[{conf_style}]{confidence}[/{conf_style}]",
method_display
)
console.print(table)
# Show summary
if matches:
top = matches[0]
console.print(f"\n[bold]Best Match:[/bold]")
console.print(f" Master: [green]{top['master_id']}[/green]")
console.print(f" Duration: {top['master_duration']:.0f}s")
console.print(f" Video frames matched: {top['video_percentage']:.1f}% ({top['matching_frames']}/{top['total_frames']} frames)")
console.print(f" Average frame similarity: {top['average_frame_similarity']:.1%}")
console.print(f" Audio similarity: {top['audio_similarity']:.3f}")
console.print(f" Combined confidence: {top['combined_score']:.1%}")
# Show AI vision specific info if applicable
if top.get('matching_method') == 'ai_vision':
console.print(f"\n[bold magenta]AI Vision Analysis:[/bold magenta]")
console.print(f" Method: GPT-4V (OpenAI)")
if top.get('is_crop'):
console.print(f" Format: [yellow]Adaptation is cropped from master[/yellow]")
if top.get('ai_explanation'):
console.print(f"\n [dim]AI Reasoning:[/dim]")
# Show only the explanation part, not the full structured response
explanation = top['ai_explanation']
if 'EXPLANATION:' in explanation:
explanation = explanation.split('EXPLANATION:')[1].strip()
# Limit to first 200 chars for brevity
if len(explanation) > 200:
explanation = explanation[:200] + "..."
console.print(f" [dim]{explanation}[/dim]")
if len(matches) > 1:
# Group by score to show ties
top_score = top['combined_score']
tied_matches = [m for m in matches if m['combined_score'] == top_score]
if len(tied_matches) > 1:
console.print(f"\n[bold yellow]Note:[/bold yellow] {len(tied_matches)} masters have identical scores.")
console.print(f" Ranked by duration (longest master selected as likely source):")
for match in tied_matches[:5]: # Show top 5 ties
console.print(f"{match['master_id']} ({match['master_duration']:.0f}s)")
other_matches = [m for m in matches[1:] if m['combined_score'] != top_score]
if other_matches:
console.print(f"\n[bold]Other Potential Matches:[/bold]")
for match in other_matches[:10]: # Limit to top 10
console.print(f"{match['master_id']} ({match['master_duration']:.0f}s): {match['video_percentage']:.1f}% video, {match['combined_score']:.1%} combined")
console.print(f"\n[dim]Path: {top['master_path']}[/dim]")
except Exception as e:
console.print(f"[red]✗[/red] Error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
@cli.command()
def clear():
"""Clear all master videos from the library."""
if click.confirm('Are you sure you want to clear all masters?'):
try:
matcher = VideoMatcher()
matcher.clear_masters()
console.print("[green]✓[/green] All masters cleared")
except Exception as e:
console.print(f"[red]✗[/red] Error: {e}")
sys.exit(1)
@cli.command()
@click.argument('folder_path', type=click.Path(exists=True))
@click.option('--threshold', '-t', default=0.80, type=float, help='Minimum percentage match (0-1)')
@click.option('--frame-threshold', '-f', default=0.80, type=float, help='Frame similarity threshold (0-1)')
@click.option('--min-avg-similarity', '-m', default=0.90, type=float, help='Minimum average similarity of matched frames (0-1)')
@click.option('--output', '-o', default=None, help='Output HTML file path')
def batch_match(folder_path, threshold, frame_threshold, min_avg_similarity, output):
"""Match all videos in a folder and generate HTML report.
This will process all video files in the specified folder, match them
against your master videos, and generate a beautiful HTML report showing
which masters were used for each adaptation.
"""
try:
from pathlib import Path
from datetime import datetime
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
folder_path = Path(folder_path)
# Common video extensions
VIDEO_EXTENSIONS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv', '.wmv', '.m4v'}
# Find all video files
video_files = []
for ext in VIDEO_EXTENSIONS:
video_files.extend(folder_path.glob(f"*{ext}"))
video_files.extend(folder_path.glob(f"*{ext.upper()}"))
if not video_files:
console.print(f"[yellow]No video files found in {folder_path}[/yellow]")
return
console.print(f"\n[bold]Found {len(video_files)} video file(s) to process[/bold]\n")
# Initialize matcher
matcher = VideoMatcher()
# Check if we have masters
masters = matcher.list_masters()
if not masters:
console.print("[red]✗[/red] No master videos found in library.")
console.print("Use 'add-master' to add masters first.")
sys.exit(1)
console.print(f"[cyan]Comparing against {len(masters)} master(s)...[/cyan]\n")
# Process each video
results = []
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
console=console
) as progress:
task = progress.add_task("[cyan]Processing adaptations...", total=len(video_files))
for video_file in video_files:
progress.update(task, description=f"[cyan]Processing {video_file.name}...")
try:
matches = matcher.match_adaptation(
str(video_file),
threshold=threshold,
frame_threshold=frame_threshold,
min_avg_similarity=min_avg_similarity
)
results.append({
'adaptation_name': video_file.name,
'adaptation_path': str(video_file),
'matches': matches,
'error': None
})
except Exception as e:
console.print(f"[red]✗[/red] Error processing {video_file.name}: {e}")
results.append({
'adaptation_name': video_file.name,
'adaptation_path': str(video_file),
'matches': [],
'error': str(e)
})
progress.advance(task)
# Generate output filename if not specified
if output is None:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output = f"matching_report_{timestamp}.html"
output_path = Path(output)
# Generate HTML report (inline to avoid import issues)
console.print(f"\n[cyan]Generating HTML report...[/cyan]")
# Import the generation function from batch_match
sys.path.insert(0, str(Path(__file__).parent))
from batch_match import generate_html_report
generate_html_report(results, output_path, str(folder_path))
# Summary
console.print(f"\n[bold green]✓ Report generated successfully![/bold green]")
console.print(f"\n[bold]Summary:[/bold]")
console.print(f" Total adaptations: {len(results)}")
console.print(f" Matched: {sum(1 for r in results if r['matches'])}")
console.print(f" No matches: {sum(1 for r in results if not r['matches'])}")
console.print(f" Total master matches: {sum(len(r['matches']) for r in results)}")
console.print(f"\n[bold cyan]📄 Report saved to:[/bold cyan] {output_path.absolute()}")
console.print(f"\n[dim]Open in browser: file://{output_path.absolute()}[/dim]")
except Exception as e:
console.print(f"[red]✗[/red] Error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
@cli.command()
def status():
"""Show system status and configuration."""
console.print("[bold]Video Master Detection - System Status[/bold]\n")
# Check dependencies
table = Table(title="Dependencies", box=box.SIMPLE)
table.add_column("Component", style="cyan")
table.add_column("Status", style="green")
# FFmpeg
import shutil
ffmpeg_available = shutil.which('ffmpeg') is not None
table.add_row("FFmpeg", "✓ Available" if ffmpeg_available else "✗ Not found")
# Chromaprint
try:
import acoustid
table.add_row("Chromaprint/AcoustID", "✓ Available")
except ImportError:
table.add_row("Chromaprint/AcoustID", "✗ Not available")
# TMK
try:
import tmkpy
table.add_row("TMK", "✓ Available")
except ImportError:
table.add_row("TMK", "✗ Not available (using basic hashing)")
console.print(table)
# Show library stats
matcher = VideoMatcher()
masters = matcher.list_masters()
console.print(f"\n[bold]Library Statistics[/bold]")
console.print(f" Master videos: {len(masters)}")
if not ffmpeg_available:
console.print("\n[yellow]⚠ Warning: FFmpeg not found. Please install FFmpeg.[/yellow]")
if __name__ == '__main__':
cli()