#!/usr/bin/env python3 """ Oliver Metadata Tool - Web Interface Universal metadata creation and management tool for files. Flask-based web app for local or server deployment. Supports multiple metadata sources: Excel, AI, manual entry, and file import. """ from flask import Flask, render_template, request, jsonify, send_file from werkzeug.utils import secure_filename # noqa: F401 - kept as fallback from pathlib import Path import os import tempfile import threading import webbrowser from time import sleep import shutil import unicodedata from src.file_detector import FileDetector, FileType from src.excel_metadata_lookup import ExcelMetadataLookup def safe_filename(filename): """Sanitize filename while preserving Unicode characters (Chinese, Japanese, Korean).""" # Normalize unicode filename = unicodedata.normalize('NFC', filename) # Remove path separators and null bytes filename = filename.replace('/', '_').replace('\\', '_').replace('\x00', '') # Remove leading/trailing dots and spaces filename = filename.strip('. ') # If empty, use default if not filename: filename = 'unnamed_file' return filename from src.extractors.pdf_extractor import PDFExtractor from src.extractors.image_extractor import ImageExtractor from src.extractors.office_extractor import OfficeExtractor from src.extractors.video_extractor import VideoExtractor from src.updaters.pdf_updater import PDFUpdater from src.updaters.image_updater import ImageUpdater from src.updaters.office_updater import OfficeUpdater from src.updaters.video_updater import VideoUpdater app = Flask(__name__) app.config['MAX_CONTENT_LENGTH'] = 500 * 1024 * 1024 # 500MB max file size app.config['UPLOAD_FOLDER'] = tempfile.mkdtemp() # Excel file path for metadata lookup EXCEL_PATH = Path(__file__).parent / "Celum ID to Adobe Asset Path Mapping Spreadsheet (1).xlsx" # Initialize metadata lookup from Excel metadata_lookup = None # Initialize extractors and updaters extractors = { FileType.PDF: PDFExtractor(), FileType.IMAGE: ImageExtractor(), FileType.OFFICE_DOC: OfficeExtractor(), FileType.OFFICE_SHEET: OfficeExtractor(), FileType.OFFICE_PRESENTATION: OfficeExtractor(), FileType.VIDEO: VideoExtractor() } updaters = { FileType.PDF: PDFUpdater(), FileType.IMAGE: ImageUpdater(), FileType.OFFICE_DOC: OfficeUpdater(), FileType.OFFICE_SHEET: OfficeUpdater(), FileType.OFFICE_PRESENTATION: OfficeUpdater(), FileType.VIDEO: VideoUpdater() } # Store file processing sessions sessions = {} def get_metadata_lookup(): """Get or create metadata lookup instance.""" global metadata_lookup if metadata_lookup is None: metadata_lookup = ExcelMetadataLookup(str(EXCEL_PATH)) return metadata_lookup @app.route('/') def index(): """Main page.""" return render_template('index.html') @app.route('/upload', methods=['POST']) def upload_file(): """Handle multiple file uploads and metadata lookup from Excel.""" if 'files' not in request.files: return jsonify({'error': 'No files provided'}), 400 files = request.files.getlist('files') if not files or files[0].filename == '': return jsonify({'error': 'No files selected'}), 400 results = [] session_id = str(len(sessions) + 1) sessions[session_id] = {'files': []} # Get metadata lookup lookup = get_metadata_lookup() for file in files: try: # Save uploaded file filename = safe_filename(file.filename) filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(filepath) # Detect file type file_type = FileDetector.detect_file_type(filepath) if file_type == FileType.UNSUPPORTED: results.append({ 'filename': filename, 'error': 'Unsupported file type' }) continue # Get extractor for this file type extractor = extractors.get(file_type) if not extractor: results.append({ 'filename': filename, 'error': 'No extractor available' }) continue # Read current metadata from file old_metadata = extractor.read_metadata(filepath) # Lookup metadata from Excel by filename excel_data = lookup.lookup_by_filename(filename) if excel_data: # Use Excel data for metadata new_metadata = { 'title': excel_data.get('title', ''), 'subject': excel_data.get('description', ''), # External Description/Alt Text 'keywords': '' # Not used from Excel } else: # No Excel data found - use filename as fallback new_metadata = { 'title': Path(filename).stem, 'subject': f'No metadata found in Excel for {filename}', 'keywords': '' } file_info = { 'success': True, 'filename': filename, 'file_type': file_type.value, 'current_metadata': old_metadata, 'suggested_metadata': new_metadata, 'filepath': filepath, 'excel_found': excel_data is not None } results.append(file_info) sessions[session_id]['files'].append(file_info) except Exception as e: results.append({ 'filename': file.filename, 'error': str(e) }) return jsonify({ 'success': True, 'session_id': session_id, 'files': results }) @app.route('/update', methods=['POST']) def update_metadata(): """Update file metadata from Excel and save to chosen location.""" data = request.json filepath = data.get('filepath') output_dir = data.get('output_dir', '') # User-selected output directory if not filepath or not os.path.exists(filepath): return jsonify({'error': 'File not found'}), 404 try: # Detect file type file_type = FileDetector.detect_file_type(filepath) if file_type == FileType.UNSUPPORTED: return jsonify({'error': 'Unsupported file type'}), 400 # Get updater updater = updaters.get(file_type) if not updater: return jsonify({'error': 'No updater available for this file type'}), 400 # Lookup metadata from Excel filename = Path(filepath).name lookup = get_metadata_lookup() excel_data = lookup.lookup_by_filename(filename) if excel_data: new_metadata = { 'title': excel_data.get('title', ''), 'subject': excel_data.get('description', ''), # External Description/Alt Text 'keywords': '' } else: return jsonify({'error': f'No metadata found in Excel for {filename}'}), 400 # Copy file to output directory if specified if output_dir and os.path.isdir(output_dir): output_path = os.path.join(output_dir, filename) shutil.copy2(filepath, output_path) target_file = output_path else: target_file = filepath # Update the file metadata WITHOUT changing filename success = updater.update_metadata(target_file, new_metadata, backup=False) if not success: return jsonify({'error': 'Failed to update metadata'}), 500 # Verify update verified = updater.verify_metadata(target_file, new_metadata) return jsonify({ 'success': True, 'message': 'Metadata updated successfully', 'verified': verified, 'output_path': target_file, 'metadata': new_metadata }) except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/download/') def download_file(filename): """Download processed file.""" filepath = os.path.join(app.config['UPLOAD_FOLDER'], safe_filename(filename)) if os.path.exists(filepath): return send_file(filepath, as_attachment=True) return jsonify({'error': 'File not found'}), 404 @app.route('/stats') def get_stats(): """Get Excel metadata statistics.""" try: lookup = get_metadata_lookup() stats = lookup.get_stats() return jsonify({ 'success': True, 'stats': stats }) except Exception as e: return jsonify({'error': str(e)}), 500 def open_browser(): """Open browser after short delay.""" sleep(1.5) webbrowser.open('http://localhost:5001') if __name__ == '__main__': print("="*60) print("Universal Metadata Tool - Web Interface") print("="*60) print("\nMetadata source: Excel file (Celum ID mapping)") print(f"Excel file: {EXCEL_PATH.name}") print("\nStarting server...") print("Opening browser at http://localhost:5001") print("\nPress Ctrl+C to stop the server") print("="*60) # Open browser in background threading.Thread(target=open_browser, daemon=True).start() # Run Flask app app.run(debug=False, port=5001, host='127.0.0.1')