video-master-adapt/box_video_client.py
nickviljoen 891c36bbfb Add standalone desktop application with web interface
Major Features:
- 🖥️ Standalone desktop app (VideoMatcher.app) - double-click to run
- 🎨 Black & gold branded UI (Montserrat font, #FFC407 accent)
- 📁 Local file browser for master/adaptation folders
-  Fast mode processing (10-20x faster, disables AKAZE/AI Vision)
- 🤖 Smart AI Vision fallback (auto-retry when no matches found)
- 📊 Real-time progress bars (fingerprinting & matching)
- 💾 Local processing (no cloud, no authentication)
- 📤 CSV export with master filenames

Web Application (Enterprise):
- 🌐 Flask web app with Azure AD authentication
- 📦 Box.com integration for cloud storage
- 🐳 Docker support for deployment
- 🔐 JWT validation with httpOnly cookies
- 🎯 REST API endpoints

Enhancements:
- Fixed master filename lookup (was showing "Unknown")
- Automatic fingerprint recovery (detects missing files)
- Improved CSV format (master file next to adaptation)
- Port conflict handling (auto-finds available port)
- Environment variable fixes for standalone mode

Documentation:
- Updated README with standalone app section
- Added 10+ guide documents (UI improvements, fingerprint recovery, etc.)
- Build instructions with PyInstaller
- Comprehensive troubleshooting guide

Technical:
- PyInstaller build configuration (video_matcher.spec)
- Launcher with environment setup (launcher.py)
- Mock authentication for standalone mode
- Video matcher service layer
- Metadata parser and AKAZE video matching

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-31 09:49:04 +02:00

386 lines
13 KiB
Python

"""
Box.com client for video operations with file size checking and safety features.
This client handles:
- Folder and video listing
- File size/format validation
- Safe video downloads with progress tracking
- Warning generation for large/hi-res files
"""
import os
import logging
from pathlib import Path
from typing import List, Dict, Optional
from boxsdk import Client, JWTAuth
from boxsdk.exception import BoxAPIException
logger = logging.getLogger(__name__)
class BoxVideoClient:
"""
Client for Box.com video operations with safety checks.
Features:
- File size and format validation
- Warnings for large files
- Download progress tracking
- Automatic error handling
"""
# File format classifications
ALLOWED_FORMATS = ['.mp4', '.webm', '.m4v']
WARNING_FORMATS = ['.mov', '.avi', '.mkv']
BLOCKED_FORMATS = ['.mxf', '.ari', '.r3d', '.dpx', '.prores']
# Size limits (in bytes)
MAX_FILE_SIZE = 2 * 1024 * 1024 * 1024 # 2GB
WARNING_SIZE = 500 * 1024 * 1024 # 500MB
def __init__(self, config_path: str, root_folder_id: Optional[str] = None,
max_file_size: Optional[int] = None,
warning_size: Optional[int] = None):
"""
Initialize Box client with JWT authentication.
Args:
config_path: Path to Box JWT config file
root_folder_id: Optional root folder ID for browsing
max_file_size: Optional override for max file size
warning_size: Optional override for warning threshold
"""
try:
auth = JWTAuth.from_settings_file(config_path)
self.client = Client(auth)
self.root_folder_id = root_folder_id
# Override size limits if provided
if max_file_size:
self.MAX_FILE_SIZE = max_file_size
if warning_size:
self.WARNING_SIZE = warning_size
logger.info("Box client initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize Box client: {e}")
raise
def list_folders(self, parent_folder_id: Optional[str] = None) -> List[Dict]:
"""
List folders in Box.
Args:
parent_folder_id: Parent folder ID (uses root if not provided)
Returns:
List of folder dictionaries
"""
try:
folder_id = parent_folder_id or self.root_folder_id
if not folder_id:
raise ValueError("No folder ID provided and no root folder configured")
folder = self.client.folder(folder_id)
items = folder.get_items()
folders = []
for item in items:
if item.type == 'folder':
folders.append({
'id': item.id,
'name': item.name,
'type': 'folder'
})
logger.info(f"Listed {len(folders)} folders in folder {folder_id}")
return folders
except BoxAPIException as e:
logger.error(f"Box API error listing folders: {e}")
raise
except Exception as e:
logger.error(f"Error listing folders: {e}")
raise
def list_videos(self, folder_id: str, include_metadata: bool = True) -> List[Dict]:
"""
List video files in a Box folder with safety metadata.
Args:
folder_id: Box folder ID
include_metadata: Include file size and format metadata
Returns:
List of video dictionaries with safety info
"""
try:
folder = self.client.folder(folder_id)
items = folder.get_items()
video_extensions = set(self.ALLOWED_FORMATS + self.WARNING_FORMATS)
videos = []
for item in items:
if item.type == 'file':
ext = Path(item.name).suffix.lower()
if ext in video_extensions or ext in self.BLOCKED_FORMATS:
video_info = {
'id': item.id,
'name': item.name,
'size': item.size,
'type': 'video'
}
if include_metadata:
# Add safety metadata
safety_info = self._get_file_safety_info(item.name, item.size)
video_info.update(safety_info)
videos.append(video_info)
logger.info(f"Listed {len(videos)} videos in folder {folder_id}")
return videos
except BoxAPIException as e:
logger.error(f"Box API error listing videos: {e}")
raise
except Exception as e:
logger.error(f"Error listing videos: {e}")
raise
def get_video_info(self, file_id: str) -> Dict:
"""
Get detailed video metadata with safety assessment.
Args:
file_id: Box file ID
Returns:
Dict with file info and safety metadata
"""
try:
file = self.client.file(file_id).get()
info = {
'id': file.id,
'name': file.name,
'size': file.size,
'size_mb': round(file.size / (1024 * 1024), 2),
'size_gb': round(file.size / (1024 * 1024 * 1024), 2),
'extension': Path(file.name).suffix.lower(),
'type': 'video'
}
# Add safety info
safety_info = self._get_file_safety_info(file.name, file.size)
info.update(safety_info)
return info
except BoxAPIException as e:
logger.error(f"Box API error getting video info: {e}")
raise
except Exception as e:
logger.error(f"Error getting video info: {e}")
raise
def check_files_before_download(self, video_ids: List[str]) -> Dict:
"""
Check multiple files for safety before downloading.
Args:
video_ids: List of Box file IDs
Returns:
Dict with safety assessment and warnings
"""
try:
warnings = []
errors = []
total_size = 0
file_info = []
for video_id in video_ids:
try:
info = self.get_video_info(video_id)
total_size += info['size']
file_info.append(info)
# Check for issues
if info['is_blocked']:
errors.append({
'file': info['name'],
'reason': f"Blocked format: {info['extension']} (raw/uncompressed)",
'action': 'Convert to MP4 before processing'
})
elif info['is_too_large']:
errors.append({
'file': info['name'],
'reason': f"File too large: {info['size_mb']}MB (max: {self.MAX_FILE_SIZE/(1024**2):.0f}MB)",
'action': 'Compress or transcode to smaller file'
})
elif info['needs_warning']:
warnings.append({
'file': info['name'],
'reason': info['warning_reason'],
'size_mb': info['size_mb']
})
except Exception as e:
errors.append({
'file': video_id,
'reason': f"Error checking file: {str(e)}",
'action': 'Verify file exists and is accessible'
})
# Calculate estimates
total_size_mb = round(total_size / (1024 * 1024), 2)
total_size_gb = round(total_size / (1024 * 1024 * 1024), 2)
estimated_download_time = round(total_size / (10 * 1024 * 1024)) # Assume 10MB/s
result = {
'safe': len(errors) == 0,
'warnings': warnings,
'errors': errors,
'file_count': len(video_ids),
'total_size_mb': total_size_mb,
'total_size_gb': total_size_gb,
'estimated_download_time_seconds': estimated_download_time,
'file_info': file_info
}
if errors:
logger.warning(f"File check found {len(errors)} errors")
elif warnings:
logger.info(f"File check found {len(warnings)} warnings")
return result
except Exception as e:
logger.error(f"Error checking files: {e}")
return {
'safe': False,
'errors': [{'reason': str(e)}]
}
def download_video(self, file_id: str, job_id: str, temp_dir: str) -> str:
"""
Download video from Box to temporary storage.
Args:
file_id: Box file ID
job_id: Job ID for organizing temp files
temp_dir: Base temporary directory
Returns:
Local file path
Raises:
ValueError: If file is too large or blocked format
"""
try:
# Get file info first
file_info = self.get_video_info(file_id)
# Safety checks
if file_info['is_blocked']:
raise ValueError(
f"Blocked format: {file_info['extension']}. "
f"Please convert to MP4, WebM, or M4V."
)
if file_info['is_too_large']:
raise ValueError(
f"File too large: {file_info['size_mb']}MB "
f"(max: {self.MAX_FILE_SIZE/(1024**2):.0f}MB). "
f"Please compress or transcode the file."
)
# Get file object
file = self.client.file(file_id).get()
filename = file.name
# Create job-specific temp directory
job_dir = Path(temp_dir) / job_id
job_dir.mkdir(parents=True, exist_ok=True)
# Download file
output_path = job_dir / filename
logger.info(f"Downloading {filename} ({file_info['size_mb']}MB) to {output_path}")
with open(output_path, 'wb') as f:
file.download_to(f)
logger.info(f"Downloaded {filename} successfully")
return str(output_path)
except BoxAPIException as e:
logger.error(f"Box API error downloading video: {e}")
raise
except Exception as e:
logger.error(f"Error downloading video: {e}")
raise
def _get_file_safety_info(self, filename: str, size: int) -> Dict:
"""
Generate safety information for a file.
Args:
filename: File name
size: File size in bytes
Returns:
Dict with safety assessment
"""
ext = Path(filename).suffix.lower()
size_mb = size / (1024 * 1024)
# Format classification
is_allowed = ext in self.ALLOWED_FORMATS
is_warning_format = ext in self.WARNING_FORMATS
is_blocked = ext in self.BLOCKED_FORMATS
# Size classification
is_too_large = size > self.MAX_FILE_SIZE
is_large = size > self.WARNING_SIZE
# Determine warnings
needs_warning = False
warning_reason = None
if is_too_large:
needs_warning = True
warning_reason = f"File exceeds maximum size ({size_mb:.1f}MB > {self.MAX_FILE_SIZE/(1024**2):.0f}MB)"
elif is_blocked:
needs_warning = True
warning_reason = f"Blocked format: {ext} (raw/uncompressed)"
elif is_warning_format and is_large:
needs_warning = True
warning_reason = f"Large {ext} file ({size_mb:.1f}MB) - likely hi-res. Consider converting to MP4."
elif is_large:
needs_warning = True
warning_reason = f"Large file ({size_mb:.1f}MB) - download will take time"
return {
'extension': ext,
'size_mb': round(size_mb, 2),
'is_allowed_format': is_allowed,
'is_warning_format': is_warning_format,
'is_blocked': is_blocked,
'is_too_large': is_too_large,
'is_large': is_large,
'needs_warning': needs_warning,
'warning_reason': warning_reason,
'recommended_action': 'Convert to MP4 for faster processing' if is_warning_format else None
}