Implemented manual metadata editing UI: - Added editable input fields for title (200 chars), subject (300 chars), keywords (500 chars) - Character counters with warning/danger indicators at 90%/100% - Real-time validation with visual feedback - Save and Reset buttons for each file - Individual file metadata updates via /update-manual endpoint Implemented multiple metadata sources: - Added metadata source selector dropdown (Excel, Manual, AI, Import) - Modified /upload endpoint to handle different metadata sources - Excel lookup: existing functionality (fastest) - Manual entry: empty fields for user input - AI generation: placeholder for Phase 2.3 - Import: placeholder for Phase 2.4 Technical improvements: - Session-based metadata storage for persistence - Graceful success/error feedback with visual indicators - Sanitized metadata input with length limits - Backup creation before updates Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
412 lines
14 KiB
Python
412 lines
14 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Oliver Metadata Tool - Web Interface
|
||
Universal metadata creation and management tool for files.
|
||
Flask-based web app for local or server deployment.
|
||
Supports multiple metadata sources: Excel, AI, manual entry, and file import.
|
||
"""
|
||
|
||
from flask import Flask, render_template, request, jsonify, send_file
|
||
from werkzeug.utils import secure_filename # noqa: F401 - kept as fallback
|
||
from pathlib import Path
|
||
import os
|
||
import tempfile
|
||
import threading
|
||
import webbrowser
|
||
from time import sleep
|
||
import shutil
|
||
import unicodedata
|
||
|
||
from src.file_detector import FileDetector, FileType
|
||
from src.excel_metadata_lookup import ExcelMetadataLookup
|
||
from src.config import Config
|
||
|
||
def safe_filename(filename):
|
||
"""Sanitize filename while preserving Unicode characters (Chinese, Japanese, Korean)."""
|
||
# Normalize unicode
|
||
filename = unicodedata.normalize('NFC', filename)
|
||
# Remove path separators and null bytes
|
||
filename = filename.replace('/', '_').replace('\\', '_').replace('\x00', '')
|
||
# Remove leading/trailing dots and spaces
|
||
filename = filename.strip('. ')
|
||
# If empty, use default
|
||
if not filename:
|
||
filename = 'unnamed_file'
|
||
return filename
|
||
from src.extractors.pdf_extractor import PDFExtractor
|
||
from src.extractors.image_extractor import ImageExtractor
|
||
from src.extractors.office_extractor import OfficeExtractor
|
||
from src.extractors.video_extractor import VideoExtractor
|
||
from src.updaters.pdf_updater import PDFUpdater
|
||
from src.updaters.image_updater import ImageUpdater
|
||
from src.updaters.office_updater import OfficeUpdater
|
||
from src.updaters.video_updater import VideoUpdater
|
||
|
||
app = Flask(__name__)
|
||
app.config['MAX_CONTENT_LENGTH'] = 500 * 1024 * 1024 # 500MB max file size
|
||
app.config['UPLOAD_FOLDER'] = tempfile.mkdtemp()
|
||
|
||
# Excel file path for metadata lookup
|
||
EXCEL_PATH = Path(__file__).parent / "Celum ID to Adobe Asset Path Mapping Spreadsheet (1).xlsx"
|
||
|
||
# Initialize metadata lookup from Excel
|
||
metadata_lookup = None
|
||
|
||
# Initialize extractors and updaters
|
||
extractors = {
|
||
FileType.PDF: PDFExtractor(),
|
||
FileType.IMAGE: ImageExtractor(),
|
||
FileType.OFFICE_DOC: OfficeExtractor(),
|
||
FileType.OFFICE_SHEET: OfficeExtractor(),
|
||
FileType.OFFICE_PRESENTATION: OfficeExtractor(),
|
||
FileType.VIDEO: VideoExtractor()
|
||
}
|
||
|
||
updaters = {
|
||
FileType.PDF: PDFUpdater(),
|
||
FileType.IMAGE: ImageUpdater(),
|
||
FileType.OFFICE_DOC: OfficeUpdater(),
|
||
FileType.OFFICE_SHEET: OfficeUpdater(),
|
||
FileType.OFFICE_PRESENTATION: OfficeUpdater(),
|
||
FileType.VIDEO: VideoUpdater()
|
||
}
|
||
|
||
# Store file processing sessions
|
||
sessions = {}
|
||
|
||
def get_metadata_lookup():
|
||
"""Get or create metadata lookup instance."""
|
||
global metadata_lookup
|
||
if metadata_lookup is None:
|
||
metadata_lookup = ExcelMetadataLookup(str(EXCEL_PATH))
|
||
return metadata_lookup
|
||
|
||
@app.route('/')
|
||
def index():
|
||
"""Main page."""
|
||
return render_template('index.html')
|
||
|
||
@app.route('/upload', methods=['POST'])
|
||
def upload_file():
|
||
"""Handle multiple file uploads and metadata lookup from Excel."""
|
||
if 'files' not in request.files:
|
||
return jsonify({'error': 'No files provided'}), 400
|
||
|
||
files = request.files.getlist('files')
|
||
if not files or files[0].filename == '':
|
||
return jsonify({'error': 'No files selected'}), 400
|
||
|
||
# Get metadata source choice (excel, manual, ai, import)
|
||
metadata_source = request.form.get('metadata_source', 'excel')
|
||
|
||
results = []
|
||
session_id = str(len(sessions) + 1)
|
||
sessions[session_id] = {'files': [], 'metadata_source': metadata_source}
|
||
|
||
# Get metadata lookup (only if using Excel source)
|
||
lookup = get_metadata_lookup() if metadata_source == 'excel' else None
|
||
|
||
for file in files:
|
||
try:
|
||
# Save uploaded file
|
||
filename = safe_filename(file.filename)
|
||
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
||
file.save(filepath)
|
||
|
||
# Detect file type
|
||
file_type = FileDetector.detect_file_type(filepath)
|
||
|
||
if file_type == FileType.UNSUPPORTED:
|
||
results.append({
|
||
'filename': filename,
|
||
'error': 'Unsupported file type'
|
||
})
|
||
continue
|
||
|
||
# Get extractor for this file type
|
||
extractor = extractors.get(file_type)
|
||
if not extractor:
|
||
results.append({
|
||
'filename': filename,
|
||
'error': 'No extractor available'
|
||
})
|
||
continue
|
||
|
||
# Read current metadata from file
|
||
old_metadata = extractor.read_metadata(filepath)
|
||
|
||
# Generate metadata based on chosen source
|
||
excel_found = False
|
||
new_metadata = {'title': '', 'subject': '', 'keywords': ''}
|
||
|
||
if metadata_source == 'excel' and lookup:
|
||
# Lookup metadata from Excel by filename
|
||
excel_data = lookup.lookup_by_filename(filename)
|
||
|
||
if excel_data:
|
||
new_metadata = {
|
||
'title': excel_data.get('title', ''),
|
||
'subject': excel_data.get('description', ''),
|
||
'keywords': ''
|
||
}
|
||
excel_found = True
|
||
else:
|
||
# No Excel data found - use filename as fallback
|
||
new_metadata = {
|
||
'title': Path(filename).stem,
|
||
'subject': f'No metadata found in Excel for {filename}',
|
||
'keywords': ''
|
||
}
|
||
|
||
elif metadata_source == 'manual':
|
||
# Return empty metadata for user to fill manually
|
||
new_metadata = {
|
||
'title': Path(filename).stem, # Suggest filename
|
||
'subject': '',
|
||
'keywords': ''
|
||
}
|
||
|
||
elif metadata_source == 'ai':
|
||
# AI generation - will be implemented in Phase 2.3
|
||
# For now, return placeholder
|
||
new_metadata = {
|
||
'title': Path(filename).stem,
|
||
'subject': 'AI generation not yet implemented',
|
||
'keywords': ''
|
||
}
|
||
|
||
elif metadata_source == 'import':
|
||
# Import from file - will be implemented in Phase 2.4
|
||
# For now, return placeholder
|
||
new_metadata = {
|
||
'title': Path(filename).stem,
|
||
'subject': 'Import feature not yet implemented',
|
||
'keywords': ''
|
||
}
|
||
|
||
file_info = {
|
||
'success': True,
|
||
'filename': filename,
|
||
'file_type': file_type.value,
|
||
'current_metadata': old_metadata,
|
||
'suggested_metadata': new_metadata,
|
||
'filepath': filepath,
|
||
'metadata_source': metadata_source,
|
||
'excel_found': excel_found
|
||
}
|
||
|
||
results.append(file_info)
|
||
sessions[session_id]['files'].append(file_info)
|
||
|
||
except Exception as e:
|
||
results.append({
|
||
'filename': file.filename,
|
||
'error': str(e)
|
||
})
|
||
|
||
return jsonify({
|
||
'success': True,
|
||
'session_id': session_id,
|
||
'files': results
|
||
})
|
||
|
||
@app.route('/update', methods=['POST'])
|
||
def update_metadata():
|
||
"""Update file metadata from Excel and save to chosen location."""
|
||
data = request.json
|
||
filepath = data.get('filepath')
|
||
output_dir = data.get('output_dir', '') # User-selected output directory
|
||
|
||
if not filepath or not os.path.exists(filepath):
|
||
return jsonify({'error': 'File not found'}), 404
|
||
|
||
try:
|
||
# Detect file type
|
||
file_type = FileDetector.detect_file_type(filepath)
|
||
|
||
if file_type == FileType.UNSUPPORTED:
|
||
return jsonify({'error': 'Unsupported file type'}), 400
|
||
|
||
# Get updater
|
||
updater = updaters.get(file_type)
|
||
|
||
if not updater:
|
||
return jsonify({'error': 'No updater available for this file type'}), 400
|
||
|
||
# Lookup metadata from Excel
|
||
filename = Path(filepath).name
|
||
lookup = get_metadata_lookup()
|
||
excel_data = lookup.lookup_by_filename(filename)
|
||
|
||
if excel_data:
|
||
new_metadata = {
|
||
'title': excel_data.get('title', ''),
|
||
'subject': excel_data.get('description', ''), # External Description/Alt Text
|
||
'keywords': ''
|
||
}
|
||
else:
|
||
return jsonify({'error': f'No metadata found in Excel for {filename}'}), 400
|
||
|
||
# Copy file to output directory if specified
|
||
if output_dir and os.path.isdir(output_dir):
|
||
output_path = os.path.join(output_dir, filename)
|
||
shutil.copy2(filepath, output_path)
|
||
target_file = output_path
|
||
else:
|
||
target_file = filepath
|
||
|
||
# Update the file metadata WITHOUT changing filename
|
||
success = updater.update_metadata(target_file, new_metadata, backup=False)
|
||
|
||
if not success:
|
||
return jsonify({'error': 'Failed to update metadata'}), 500
|
||
|
||
# Verify update
|
||
verified = updater.verify_metadata(target_file, new_metadata)
|
||
|
||
return jsonify({
|
||
'success': True,
|
||
'message': 'Metadata updated successfully',
|
||
'verified': verified,
|
||
'output_path': target_file,
|
||
'metadata': new_metadata
|
||
})
|
||
|
||
except Exception as e:
|
||
return jsonify({'error': str(e)}), 500
|
||
|
||
@app.route('/update-manual', methods=['POST'])
|
||
def update_manual_metadata():
|
||
"""Update file with manually entered metadata."""
|
||
data = request.json
|
||
session_id = data.get('session_id')
|
||
file_index = data.get('file_index')
|
||
|
||
# Validate and sanitize metadata
|
||
custom_metadata = {
|
||
'title': data.get('title', '').strip()[:200],
|
||
'subject': data.get('subject', '').strip()[:300],
|
||
'keywords': data.get('keywords', '').strip()[:500]
|
||
}
|
||
|
||
# Validate session
|
||
if not session_id or session_id not in sessions:
|
||
return jsonify({'error': 'Invalid or expired session'}), 400
|
||
|
||
# Validate file index
|
||
if file_index is None or file_index >= len(sessions[session_id]['files']):
|
||
return jsonify({'error': 'Invalid file index'}), 400
|
||
|
||
try:
|
||
# Get file info from session
|
||
file_info = sessions[session_id]['files'][file_index]
|
||
filepath = file_info.get('filepath')
|
||
|
||
if not filepath or not os.path.exists(filepath):
|
||
return jsonify({'error': 'File not found'}), 404
|
||
|
||
# Detect file type
|
||
file_type = FileDetector.detect_file_type(filepath)
|
||
|
||
if file_type == FileType.UNSUPPORTED:
|
||
return jsonify({'error': 'Unsupported file type'}), 400
|
||
|
||
# Get updater for this file type
|
||
updater = updaters.get(file_type)
|
||
|
||
if not updater:
|
||
return jsonify({'error': 'No updater available for this file type'}), 400
|
||
|
||
# Update metadata
|
||
success = updater.update_metadata(filepath, custom_metadata, backup=True)
|
||
|
||
if not success:
|
||
return jsonify({'error': 'Failed to update metadata'}), 500
|
||
|
||
# Update session with new metadata
|
||
sessions[session_id]['files'][file_index]['suggested_metadata'] = custom_metadata
|
||
|
||
# Verify update
|
||
verified = updater.verify_metadata(filepath, custom_metadata)
|
||
|
||
return jsonify({
|
||
'status': 'success',
|
||
'message': 'Metadata updated successfully',
|
||
'verified': verified,
|
||
'metadata': custom_metadata
|
||
})
|
||
|
||
except Exception as e:
|
||
return jsonify({'error': f'Error updating metadata: {str(e)}'}), 500
|
||
|
||
@app.route('/download/<filename>')
|
||
def download_file(filename):
|
||
"""Download processed file."""
|
||
filepath = os.path.join(app.config['UPLOAD_FOLDER'], safe_filename(filename))
|
||
if os.path.exists(filepath):
|
||
return send_file(filepath, as_attachment=True)
|
||
return jsonify({'error': 'File not found'}), 404
|
||
|
||
@app.route('/stats')
|
||
def get_stats():
|
||
"""Get Excel metadata statistics."""
|
||
try:
|
||
lookup = get_metadata_lookup()
|
||
stats = lookup.get_stats()
|
||
return jsonify({
|
||
'success': True,
|
||
'stats': stats
|
||
})
|
||
except Exception as e:
|
||
return jsonify({'error': str(e)}), 500
|
||
|
||
def open_browser():
|
||
"""Open browser after short delay."""
|
||
sleep(1.5)
|
||
webbrowser.open('http://localhost:5001')
|
||
|
||
if __name__ == '__main__':
|
||
print("="*60)
|
||
print(f"{Config.APP_NAME} v{Config.APP_VERSION} - Web Interface")
|
||
print("="*60)
|
||
|
||
# Check dependencies
|
||
print("\n🔍 Checking dependencies...")
|
||
|
||
# Check Excel file
|
||
if not EXCEL_PATH.exists():
|
||
print(f"⚠️ Warning: Excel file not found at {EXCEL_PATH}")
|
||
print(" Excel metadata lookup will not be available")
|
||
print(" Please ensure the Excel file is in the project root")
|
||
else:
|
||
print(f"✓ Excel file found: {EXCEL_PATH.name}")
|
||
|
||
# Check OpenAI API key (optional)
|
||
if Config.OPENAI_API_KEY:
|
||
print("✓ OpenAI API key configured (AI metadata generation available)")
|
||
else:
|
||
print("ℹ️ OpenAI API key not configured (AI generation disabled)")
|
||
|
||
# Check ExifTool (optional)
|
||
if Config.check_exiftool():
|
||
print("✓ ExifTool available for enhanced metadata operations")
|
||
else:
|
||
print("ℹ️ ExifTool not installed (using Python libraries)")
|
||
|
||
print("\nMetadata sources available:")
|
||
print(" • Excel lookup (Celum ID mapping)")
|
||
if Config.OPENAI_API_KEY:
|
||
print(" • AI generation (OpenAI)")
|
||
print(" • Manual entry")
|
||
print(" • File import (CSV/Excel/JSON)")
|
||
|
||
print("\nStarting server...")
|
||
print("Opening browser at http://localhost:5001")
|
||
print("\nPress Ctrl+C to stop the server")
|
||
print("="*60)
|
||
|
||
# Open browser in background
|
||
threading.Thread(target=open_browser, daemon=True).start()
|
||
|
||
# Run Flask app
|
||
app.run(debug=False, port=5001, host='127.0.0.1')
|