- Updated application name to "Oliver Metadata Tool" - Updated version to 3.0.0 - Added App Info constants to config.py (APP_NAME, APP_VERSION, APP_DESCRIPTION) - Updated web interface (title, header, footer) - Updated README with new branding and description - Added AI configuration settings to config.py - Added ExifTool check method to config.py Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
283 lines
9.2 KiB
Python
283 lines
9.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Oliver Metadata Tool - Web Interface
|
|
Universal metadata creation and management tool for files.
|
|
Flask-based web app for local or server deployment.
|
|
Supports multiple metadata sources: Excel, AI, manual entry, and file import.
|
|
"""
|
|
|
|
from flask import Flask, render_template, request, jsonify, send_file
|
|
from werkzeug.utils import secure_filename # noqa: F401 - kept as fallback
|
|
from pathlib import Path
|
|
import os
|
|
import tempfile
|
|
import threading
|
|
import webbrowser
|
|
from time import sleep
|
|
import shutil
|
|
import unicodedata
|
|
|
|
from src.file_detector import FileDetector, FileType
|
|
from src.excel_metadata_lookup import ExcelMetadataLookup
|
|
|
|
def safe_filename(filename):
|
|
"""Sanitize filename while preserving Unicode characters (Chinese, Japanese, Korean)."""
|
|
# Normalize unicode
|
|
filename = unicodedata.normalize('NFC', filename)
|
|
# Remove path separators and null bytes
|
|
filename = filename.replace('/', '_').replace('\\', '_').replace('\x00', '')
|
|
# Remove leading/trailing dots and spaces
|
|
filename = filename.strip('. ')
|
|
# If empty, use default
|
|
if not filename:
|
|
filename = 'unnamed_file'
|
|
return filename
|
|
from src.extractors.pdf_extractor import PDFExtractor
|
|
from src.extractors.image_extractor import ImageExtractor
|
|
from src.extractors.office_extractor import OfficeExtractor
|
|
from src.extractors.video_extractor import VideoExtractor
|
|
from src.updaters.pdf_updater import PDFUpdater
|
|
from src.updaters.image_updater import ImageUpdater
|
|
from src.updaters.office_updater import OfficeUpdater
|
|
from src.updaters.video_updater import VideoUpdater
|
|
|
|
app = Flask(__name__)
|
|
app.config['MAX_CONTENT_LENGTH'] = 500 * 1024 * 1024 # 500MB max file size
|
|
app.config['UPLOAD_FOLDER'] = tempfile.mkdtemp()
|
|
|
|
# Excel file path for metadata lookup
|
|
EXCEL_PATH = Path(__file__).parent / "Celum ID to Adobe Asset Path Mapping Spreadsheet (1).xlsx"
|
|
|
|
# Initialize metadata lookup from Excel
|
|
metadata_lookup = None
|
|
|
|
# Initialize extractors and updaters
|
|
extractors = {
|
|
FileType.PDF: PDFExtractor(),
|
|
FileType.IMAGE: ImageExtractor(),
|
|
FileType.OFFICE_DOC: OfficeExtractor(),
|
|
FileType.OFFICE_SHEET: OfficeExtractor(),
|
|
FileType.OFFICE_PRESENTATION: OfficeExtractor(),
|
|
FileType.VIDEO: VideoExtractor()
|
|
}
|
|
|
|
updaters = {
|
|
FileType.PDF: PDFUpdater(),
|
|
FileType.IMAGE: ImageUpdater(),
|
|
FileType.OFFICE_DOC: OfficeUpdater(),
|
|
FileType.OFFICE_SHEET: OfficeUpdater(),
|
|
FileType.OFFICE_PRESENTATION: OfficeUpdater(),
|
|
FileType.VIDEO: VideoUpdater()
|
|
}
|
|
|
|
# Store file processing sessions
|
|
sessions = {}
|
|
|
|
def get_metadata_lookup():
|
|
"""Get or create metadata lookup instance."""
|
|
global metadata_lookup
|
|
if metadata_lookup is None:
|
|
metadata_lookup = ExcelMetadataLookup(str(EXCEL_PATH))
|
|
return metadata_lookup
|
|
|
|
@app.route('/')
|
|
def index():
|
|
"""Main page."""
|
|
return render_template('index.html')
|
|
|
|
@app.route('/upload', methods=['POST'])
|
|
def upload_file():
|
|
"""Handle multiple file uploads and metadata lookup from Excel."""
|
|
if 'files' not in request.files:
|
|
return jsonify({'error': 'No files provided'}), 400
|
|
|
|
files = request.files.getlist('files')
|
|
if not files or files[0].filename == '':
|
|
return jsonify({'error': 'No files selected'}), 400
|
|
|
|
results = []
|
|
session_id = str(len(sessions) + 1)
|
|
sessions[session_id] = {'files': []}
|
|
|
|
# Get metadata lookup
|
|
lookup = get_metadata_lookup()
|
|
|
|
for file in files:
|
|
try:
|
|
# Save uploaded file
|
|
filename = safe_filename(file.filename)
|
|
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
|
file.save(filepath)
|
|
|
|
# Detect file type
|
|
file_type = FileDetector.detect_file_type(filepath)
|
|
|
|
if file_type == FileType.UNSUPPORTED:
|
|
results.append({
|
|
'filename': filename,
|
|
'error': 'Unsupported file type'
|
|
})
|
|
continue
|
|
|
|
# Get extractor for this file type
|
|
extractor = extractors.get(file_type)
|
|
if not extractor:
|
|
results.append({
|
|
'filename': filename,
|
|
'error': 'No extractor available'
|
|
})
|
|
continue
|
|
|
|
# Read current metadata from file
|
|
old_metadata = extractor.read_metadata(filepath)
|
|
|
|
# Lookup metadata from Excel by filename
|
|
excel_data = lookup.lookup_by_filename(filename)
|
|
|
|
if excel_data:
|
|
# Use Excel data for metadata
|
|
new_metadata = {
|
|
'title': excel_data.get('title', ''),
|
|
'subject': excel_data.get('description', ''), # External Description/Alt Text
|
|
'keywords': '' # Not used from Excel
|
|
}
|
|
else:
|
|
# No Excel data found - use filename as fallback
|
|
new_metadata = {
|
|
'title': Path(filename).stem,
|
|
'subject': f'No metadata found in Excel for {filename}',
|
|
'keywords': ''
|
|
}
|
|
|
|
file_info = {
|
|
'success': True,
|
|
'filename': filename,
|
|
'file_type': file_type.value,
|
|
'current_metadata': old_metadata,
|
|
'suggested_metadata': new_metadata,
|
|
'filepath': filepath,
|
|
'excel_found': excel_data is not None
|
|
}
|
|
|
|
results.append(file_info)
|
|
sessions[session_id]['files'].append(file_info)
|
|
|
|
except Exception as e:
|
|
results.append({
|
|
'filename': file.filename,
|
|
'error': str(e)
|
|
})
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'session_id': session_id,
|
|
'files': results
|
|
})
|
|
|
|
@app.route('/update', methods=['POST'])
|
|
def update_metadata():
|
|
"""Update file metadata from Excel and save to chosen location."""
|
|
data = request.json
|
|
filepath = data.get('filepath')
|
|
output_dir = data.get('output_dir', '') # User-selected output directory
|
|
|
|
if not filepath or not os.path.exists(filepath):
|
|
return jsonify({'error': 'File not found'}), 404
|
|
|
|
try:
|
|
# Detect file type
|
|
file_type = FileDetector.detect_file_type(filepath)
|
|
|
|
if file_type == FileType.UNSUPPORTED:
|
|
return jsonify({'error': 'Unsupported file type'}), 400
|
|
|
|
# Get updater
|
|
updater = updaters.get(file_type)
|
|
|
|
if not updater:
|
|
return jsonify({'error': 'No updater available for this file type'}), 400
|
|
|
|
# Lookup metadata from Excel
|
|
filename = Path(filepath).name
|
|
lookup = get_metadata_lookup()
|
|
excel_data = lookup.lookup_by_filename(filename)
|
|
|
|
if excel_data:
|
|
new_metadata = {
|
|
'title': excel_data.get('title', ''),
|
|
'subject': excel_data.get('description', ''), # External Description/Alt Text
|
|
'keywords': ''
|
|
}
|
|
else:
|
|
return jsonify({'error': f'No metadata found in Excel for {filename}'}), 400
|
|
|
|
# Copy file to output directory if specified
|
|
if output_dir and os.path.isdir(output_dir):
|
|
output_path = os.path.join(output_dir, filename)
|
|
shutil.copy2(filepath, output_path)
|
|
target_file = output_path
|
|
else:
|
|
target_file = filepath
|
|
|
|
# Update the file metadata WITHOUT changing filename
|
|
success = updater.update_metadata(target_file, new_metadata, backup=False)
|
|
|
|
if not success:
|
|
return jsonify({'error': 'Failed to update metadata'}), 500
|
|
|
|
# Verify update
|
|
verified = updater.verify_metadata(target_file, new_metadata)
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'message': 'Metadata updated successfully',
|
|
'verified': verified,
|
|
'output_path': target_file,
|
|
'metadata': new_metadata
|
|
})
|
|
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/download/<filename>')
|
|
def download_file(filename):
|
|
"""Download processed file."""
|
|
filepath = os.path.join(app.config['UPLOAD_FOLDER'], safe_filename(filename))
|
|
if os.path.exists(filepath):
|
|
return send_file(filepath, as_attachment=True)
|
|
return jsonify({'error': 'File not found'}), 404
|
|
|
|
@app.route('/stats')
|
|
def get_stats():
|
|
"""Get Excel metadata statistics."""
|
|
try:
|
|
lookup = get_metadata_lookup()
|
|
stats = lookup.get_stats()
|
|
return jsonify({
|
|
'success': True,
|
|
'stats': stats
|
|
})
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
def open_browser():
|
|
"""Open browser after short delay."""
|
|
sleep(1.5)
|
|
webbrowser.open('http://localhost:5001')
|
|
|
|
if __name__ == '__main__':
|
|
print("="*60)
|
|
print("Universal Metadata Tool - Web Interface")
|
|
print("="*60)
|
|
print("\nMetadata source: Excel file (Celum ID mapping)")
|
|
print(f"Excel file: {EXCEL_PATH.name}")
|
|
print("\nStarting server...")
|
|
print("Opening browser at http://localhost:5001")
|
|
print("\nPress Ctrl+C to stop the server")
|
|
print("="*60)
|
|
|
|
# Open browser in background
|
|
threading.Thread(target=open_browser, daemon=True).start()
|
|
|
|
# Run Flask app
|
|
app.run(debug=False, port=5001, host='127.0.0.1')
|