#!/usr/bin/env python3 """ Oliver Metadata Tool - Web Interface Universal metadata creation and management tool for files. Flask-based web app for local or server deployment. Supports multiple metadata sources: Excel, AI, manual entry, and file import. """ from flask import Flask, render_template, request, jsonify, send_file from werkzeug.utils import secure_filename # noqa: F401 - kept as fallback from pathlib import Path import os import tempfile import threading import webbrowser from time import sleep import shutil import unicodedata from src.file_detector import FileDetector, FileType from src.excel_metadata_lookup import ExcelMetadataLookup from src.config import Config from src.metadata_analyzer import MetadataAnalyzer from src.metadata_importer import MetadataImporter def safe_filename(filename): """Sanitize filename while preserving Unicode characters (Chinese, Japanese, Korean).""" # Normalize unicode filename = unicodedata.normalize('NFC', filename) # Remove path separators and null bytes filename = filename.replace('/', '_').replace('\\', '_').replace('\x00', '') # Remove leading/trailing dots and spaces filename = filename.strip('. ') # If empty, use default if not filename: filename = 'unnamed_file' return filename from src.extractors.pdf_extractor import PDFExtractor from src.extractors.image_extractor import ImageExtractor from src.extractors.office_extractor import OfficeExtractor from src.extractors.video_extractor import VideoExtractor from src.updaters.pdf_updater import PDFUpdater from src.updaters.image_updater import ImageUpdater from src.updaters.office_updater import OfficeUpdater from src.updaters.video_updater import VideoUpdater app = Flask(__name__) app.config['MAX_CONTENT_LENGTH'] = 500 * 1024 * 1024 # 500MB max file size app.config['UPLOAD_FOLDER'] = tempfile.mkdtemp() # Excel file path for metadata lookup EXCEL_PATH = Path(__file__).parent / "Celum ID to Adobe Asset Path Mapping Spreadsheet (1).xlsx" # Initialize metadata lookup from Excel metadata_lookup = None # Initialize AI analyzer (lazy initialization) ai_analyzer = None # Initialize extractors and updaters extractors = { FileType.PDF: PDFExtractor(), FileType.IMAGE: ImageExtractor(), FileType.OFFICE_DOC: OfficeExtractor(), FileType.OFFICE_SHEET: OfficeExtractor(), FileType.OFFICE_PRESENTATION: OfficeExtractor(), FileType.VIDEO: VideoExtractor() } updaters = { FileType.PDF: PDFUpdater(), FileType.IMAGE: ImageUpdater(), FileType.OFFICE_DOC: OfficeUpdater(), FileType.OFFICE_SHEET: OfficeUpdater(), FileType.OFFICE_PRESENTATION: OfficeUpdater(), FileType.VIDEO: VideoUpdater() } # Store file processing sessions sessions = {} # Store imported metadata from external files imported_metadata = {} def get_metadata_lookup(): """Get or create metadata lookup instance.""" global metadata_lookup if metadata_lookup is None: metadata_lookup = ExcelMetadataLookup(str(EXCEL_PATH)) return metadata_lookup def get_ai_analyzer(): """Get or create AI analyzer instance.""" global ai_analyzer if ai_analyzer is None: if Config.OPENAI_API_KEY: try: ai_analyzer = MetadataAnalyzer() logger = __import__('logging').getLogger(__name__) logger.info("AI analyzer initialized successfully") except Exception as e: logger = __import__('logging').getLogger(__name__) logger.error(f"Failed to initialize AI analyzer: {e}") return None else: return None return ai_analyzer @app.route('/') def index(): """Main page.""" return render_template('index.html') @app.route('/upload', methods=['POST']) def upload_file(): """Handle multiple file uploads and metadata lookup from Excel.""" if 'files' not in request.files: return jsonify({'error': 'No files provided'}), 400 files = request.files.getlist('files') if not files or files[0].filename == '': return jsonify({'error': 'No files selected'}), 400 # Get metadata source choice (excel, manual, ai, import) metadata_source = request.form.get('metadata_source', 'excel') import_session_id = request.form.get('import_session_id', '') # For import source results = [] session_id = str(len(sessions) + 1) sessions[session_id] = { 'files': [], 'metadata_source': metadata_source, 'import_session_id': import_session_id } # Get metadata lookup (only if using Excel source) lookup = get_metadata_lookup() if metadata_source == 'excel' else None # Get imported metadata (only if using import source) import_map = None if metadata_source == 'import' and import_session_id and import_session_id in imported_metadata: import_map = imported_metadata[import_session_id] importer = MetadataImporter() elif metadata_source == 'import': # Import source selected but no import session available return jsonify({'error': 'Please import a metadata file first using the Import button'}), 400 for file in files: try: # Save uploaded file filename = safe_filename(file.filename) filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(filepath) # Detect file type file_type = FileDetector.detect_file_type(filepath) if file_type == FileType.UNSUPPORTED: results.append({ 'filename': filename, 'error': 'Unsupported file type' }) continue # Get extractor for this file type extractor = extractors.get(file_type) if not extractor: results.append({ 'filename': filename, 'error': 'No extractor available' }) continue # Read current metadata from file old_metadata = extractor.read_metadata(filepath) # Generate metadata based on chosen source excel_found = False new_metadata = {'title': '', 'subject': '', 'keywords': ''} if metadata_source == 'excel' and lookup: # Lookup metadata from Excel by filename excel_data = lookup.lookup_by_filename(filename) if excel_data: new_metadata = { 'title': excel_data.get('title', ''), 'subject': excel_data.get('description', ''), 'keywords': '' } excel_found = True else: # No Excel data found - use filename as fallback new_metadata = { 'title': Path(filename).stem, 'subject': f'No metadata found in Excel for {filename}', 'keywords': '' } elif metadata_source == 'manual': # Return empty metadata for user to fill manually new_metadata = { 'title': Path(filename).stem, # Suggest filename 'subject': '', 'keywords': '' } elif metadata_source == 'ai': # AI generation using MetadataAnalyzer analyzer = get_ai_analyzer() if analyzer: try: # Extract content from file content = extractor.extract_content(str(filepath)) if not content or len(content.strip()) < 10: # Not enough content for AI analysis new_metadata = { 'title': Path(filename).stem, 'subject': 'Insufficient content for AI analysis', 'keywords': '', '_ai_error': 'Not enough text content extracted' } else: # Generate metadata with AI new_metadata = analyzer.analyze_content(content, filename, file_type) # Log token usage if available if '_tokens_used' in new_metadata: import logging logging.getLogger(__name__).info( f"AI tokens used for {filename}: {new_metadata['_tokens_used']}" ) except Exception as e: import logging logging.getLogger(__name__).error(f"AI generation failed for {filename}: {e}") new_metadata = { 'title': Path(filename).stem, 'subject': f'AI generation error: {str(e)}', 'keywords': '', '_ai_error': str(e) } else: # AI not configured new_metadata = { 'title': Path(filename).stem, 'subject': 'AI generation not available (OpenAI API key not configured)', 'keywords': '', '_ai_error': 'OpenAI API key not configured' } elif metadata_source == 'import': # Import from external file (CSV, Excel, JSON) if import_map and importer: # Look up metadata for this file imported = importer.get_metadata_for_file(import_map, filename) if imported: new_metadata = imported excel_found = True # Mark as found in import else: # No metadata found in import file new_metadata = { 'title': Path(filename).stem, 'subject': f'No metadata found in imported file for {filename}', 'keywords': '' } else: # Import source not available new_metadata = { 'title': Path(filename).stem, 'subject': 'Import metadata not loaded', 'keywords': '' } file_info = { 'success': True, 'filename': filename, 'file_type': file_type.value, 'current_metadata': old_metadata, 'suggested_metadata': new_metadata, 'filepath': filepath, 'metadata_source': metadata_source, 'excel_found': excel_found } results.append(file_info) sessions[session_id]['files'].append(file_info) except Exception as e: results.append({ 'filename': file.filename, 'error': str(e) }) return jsonify({ 'success': True, 'session_id': session_id, 'files': results }) @app.route('/update', methods=['POST']) def update_metadata(): """Update file metadata from Excel and save to chosen location.""" data = request.json filepath = data.get('filepath') output_dir = data.get('output_dir', '') # User-selected output directory if not filepath or not os.path.exists(filepath): return jsonify({'error': 'File not found'}), 404 try: # Detect file type file_type = FileDetector.detect_file_type(filepath) if file_type == FileType.UNSUPPORTED: return jsonify({'error': 'Unsupported file type'}), 400 # Get updater updater = updaters.get(file_type) if not updater: return jsonify({'error': 'No updater available for this file type'}), 400 # Lookup metadata from Excel filename = Path(filepath).name lookup = get_metadata_lookup() excel_data = lookup.lookup_by_filename(filename) if excel_data: new_metadata = { 'title': excel_data.get('title', ''), 'subject': excel_data.get('description', ''), # External Description/Alt Text 'keywords': '' } else: return jsonify({'error': f'No metadata found in Excel for {filename}'}), 400 # Copy file to output directory if specified if output_dir and os.path.isdir(output_dir): output_path = os.path.join(output_dir, filename) shutil.copy2(filepath, output_path) target_file = output_path else: target_file = filepath # Update the file metadata WITHOUT changing filename success = updater.update_metadata(target_file, new_metadata, backup=False) if not success: return jsonify({'error': 'Failed to update metadata'}), 500 # Verify update verified = updater.verify_metadata(target_file, new_metadata) return jsonify({ 'success': True, 'message': 'Metadata updated successfully', 'verified': verified, 'output_path': target_file, 'metadata': new_metadata }) except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/update-manual', methods=['POST']) def update_manual_metadata(): """Update file with manually entered metadata.""" data = request.json session_id = data.get('session_id') file_index = data.get('file_index') # Validate and sanitize metadata custom_metadata = { 'title': data.get('title', '').strip()[:200], 'subject': data.get('subject', '').strip()[:300], 'keywords': data.get('keywords', '').strip()[:500] } # Validate session if not session_id or session_id not in sessions: return jsonify({'error': 'Invalid or expired session'}), 400 # Validate file index if file_index is None or file_index >= len(sessions[session_id]['files']): return jsonify({'error': 'Invalid file index'}), 400 try: # Get file info from session file_info = sessions[session_id]['files'][file_index] filepath = file_info.get('filepath') if not filepath or not os.path.exists(filepath): return jsonify({'error': 'File not found'}), 404 # Detect file type file_type = FileDetector.detect_file_type(filepath) if file_type == FileType.UNSUPPORTED: return jsonify({'error': 'Unsupported file type'}), 400 # Get updater for this file type updater = updaters.get(file_type) if not updater: return jsonify({'error': 'No updater available for this file type'}), 400 # Update metadata success = updater.update_metadata(filepath, custom_metadata, backup=True) if not success: return jsonify({'error': 'Failed to update metadata'}), 500 # Update session with new metadata sessions[session_id]['files'][file_index]['suggested_metadata'] = custom_metadata # Verify update verified = updater.verify_metadata(filepath, custom_metadata) return jsonify({ 'status': 'success', 'message': 'Metadata updated successfully', 'verified': verified, 'metadata': custom_metadata }) except Exception as e: return jsonify({'error': f'Error updating metadata: {str(e)}'}), 500 @app.route('/download/') def download_file(filename): """Download processed file.""" filepath = os.path.join(app.config['UPLOAD_FOLDER'], safe_filename(filename)) if os.path.exists(filepath): return send_file(filepath, as_attachment=True) return jsonify({'error': 'File not found'}), 404 @app.route('/import-metadata', methods=['POST']) def import_metadata(): """Import metadata from external file (CSV, Excel, JSON).""" if 'import_file' not in request.files: return jsonify({'error': 'No file provided'}), 400 file = request.files['import_file'] if file.filename == '': return jsonify({'error': 'No file selected'}), 400 try: # Save temp file import_filename = safe_filename(file.filename) temp_path = Path(app.config['UPLOAD_FOLDER']) / import_filename file.save(str(temp_path)) # Import based on file type importer = MetadataImporter() file_ext = temp_path.suffix.lower() if file_ext == '.csv': metadata_map = importer.import_from_csv(str(temp_path)) elif file_ext in ['.xlsx', '.xls']: metadata_map = importer.import_from_excel(str(temp_path)) elif file_ext == '.json': metadata_map = importer.import_from_json(str(temp_path)) else: return jsonify({'error': f'Unsupported file format: {file_ext}. Supported: .csv, .xlsx, .xls, .json'}), 400 # Validate import stats = importer.validate_import(metadata_map) # Store in global dict with unique session ID import_session_id = f"import_{len(imported_metadata) + 1}" imported_metadata[import_session_id] = metadata_map # Clean up temp file temp_path.unlink() return jsonify({ 'success': True, 'import_session_id': import_session_id, 'stats': stats, 'message': f'Imported {stats["total_records"]} metadata records from {import_filename}' }) except Exception as e: import logging logging.getLogger(__name__).error(f"Import failed: {e}") return jsonify({'error': f'Import failed: {str(e)}'}), 500 @app.route('/stats') def get_stats(): """Get Excel metadata statistics.""" try: lookup = get_metadata_lookup() stats = lookup.get_stats() return jsonify({ 'success': True, 'stats': stats }) except Exception as e: return jsonify({'error': str(e)}), 500 def open_browser(): """Open browser after short delay.""" sleep(1.5) webbrowser.open('http://localhost:5001') if __name__ == '__main__': print("="*60) print(f"{Config.APP_NAME} v{Config.APP_VERSION} - Web Interface") print("="*60) # Check dependencies print("\nšŸ” Checking dependencies...") # Check Excel file if not EXCEL_PATH.exists(): print(f"āš ļø Warning: Excel file not found at {EXCEL_PATH}") print(" Excel metadata lookup will not be available") print(" Please ensure the Excel file is in the project root") else: print(f"āœ“ Excel file found: {EXCEL_PATH.name}") # Check OpenAI API key (optional) if Config.OPENAI_API_KEY: print("āœ“ OpenAI API key configured (AI metadata generation available)") else: print("ā„¹ļø OpenAI API key not configured (AI generation disabled)") # Check ExifTool (optional) if Config.check_exiftool(): print("āœ“ ExifTool available for enhanced metadata operations") else: print("ā„¹ļø ExifTool not installed (using Python libraries)") print("\nMetadata sources available:") print(" • Excel lookup (Celum ID mapping)") if Config.OPENAI_API_KEY: print(" • AI generation (OpenAI)") print(" • Manual entry") print(" • File import (CSV/Excel/JSON)") print("\nStarting server...") print("Opening browser at http://localhost:5001") print("\nPress Ctrl+C to stop the server") print("="*60) # Open browser in background threading.Thread(target=open_browser, daemon=True).start() # Run Flask app app.run(debug=False, port=5001, host='127.0.0.1')