solventum-image-metadata/web_app.py
SamoilenkoVadym 7db62e06da Phase 1.1: Rebrand to Oliver Metadata Tool v3.0
- Updated application name to "Oliver Metadata Tool"
- Updated version to 3.0.0
- Added App Info constants to config.py (APP_NAME, APP_VERSION, APP_DESCRIPTION)
- Updated web interface (title, header, footer)
- Updated README with new branding and description
- Added AI configuration settings to config.py
- Added ExifTool check method to config.py

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-25 15:15:26 +00:00

283 lines
9.2 KiB
Python

#!/usr/bin/env python3
"""
Oliver Metadata Tool - Web Interface
Universal metadata creation and management tool for files.
Flask-based web app for local or server deployment.
Supports multiple metadata sources: Excel, AI, manual entry, and file import.
"""
from flask import Flask, render_template, request, jsonify, send_file
from werkzeug.utils import secure_filename # noqa: F401 - kept as fallback
from pathlib import Path
import os
import tempfile
import threading
import webbrowser
from time import sleep
import shutil
import unicodedata
from src.file_detector import FileDetector, FileType
from src.excel_metadata_lookup import ExcelMetadataLookup
def safe_filename(filename):
"""Sanitize filename while preserving Unicode characters (Chinese, Japanese, Korean)."""
# Normalize unicode
filename = unicodedata.normalize('NFC', filename)
# Remove path separators and null bytes
filename = filename.replace('/', '_').replace('\\', '_').replace('\x00', '')
# Remove leading/trailing dots and spaces
filename = filename.strip('. ')
# If empty, use default
if not filename:
filename = 'unnamed_file'
return filename
from src.extractors.pdf_extractor import PDFExtractor
from src.extractors.image_extractor import ImageExtractor
from src.extractors.office_extractor import OfficeExtractor
from src.extractors.video_extractor import VideoExtractor
from src.updaters.pdf_updater import PDFUpdater
from src.updaters.image_updater import ImageUpdater
from src.updaters.office_updater import OfficeUpdater
from src.updaters.video_updater import VideoUpdater
app = Flask(__name__)
app.config['MAX_CONTENT_LENGTH'] = 500 * 1024 * 1024 # 500MB max file size
app.config['UPLOAD_FOLDER'] = tempfile.mkdtemp()
# Excel file path for metadata lookup
EXCEL_PATH = Path(__file__).parent / "Celum ID to Adobe Asset Path Mapping Spreadsheet (1).xlsx"
# Initialize metadata lookup from Excel
metadata_lookup = None
# Initialize extractors and updaters
extractors = {
FileType.PDF: PDFExtractor(),
FileType.IMAGE: ImageExtractor(),
FileType.OFFICE_DOC: OfficeExtractor(),
FileType.OFFICE_SHEET: OfficeExtractor(),
FileType.OFFICE_PRESENTATION: OfficeExtractor(),
FileType.VIDEO: VideoExtractor()
}
updaters = {
FileType.PDF: PDFUpdater(),
FileType.IMAGE: ImageUpdater(),
FileType.OFFICE_DOC: OfficeUpdater(),
FileType.OFFICE_SHEET: OfficeUpdater(),
FileType.OFFICE_PRESENTATION: OfficeUpdater(),
FileType.VIDEO: VideoUpdater()
}
# Store file processing sessions
sessions = {}
def get_metadata_lookup():
"""Get or create metadata lookup instance."""
global metadata_lookup
if metadata_lookup is None:
metadata_lookup = ExcelMetadataLookup(str(EXCEL_PATH))
return metadata_lookup
@app.route('/')
def index():
"""Main page."""
return render_template('index.html')
@app.route('/upload', methods=['POST'])
def upload_file():
"""Handle multiple file uploads and metadata lookup from Excel."""
if 'files' not in request.files:
return jsonify({'error': 'No files provided'}), 400
files = request.files.getlist('files')
if not files or files[0].filename == '':
return jsonify({'error': 'No files selected'}), 400
results = []
session_id = str(len(sessions) + 1)
sessions[session_id] = {'files': []}
# Get metadata lookup
lookup = get_metadata_lookup()
for file in files:
try:
# Save uploaded file
filename = safe_filename(file.filename)
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(filepath)
# Detect file type
file_type = FileDetector.detect_file_type(filepath)
if file_type == FileType.UNSUPPORTED:
results.append({
'filename': filename,
'error': 'Unsupported file type'
})
continue
# Get extractor for this file type
extractor = extractors.get(file_type)
if not extractor:
results.append({
'filename': filename,
'error': 'No extractor available'
})
continue
# Read current metadata from file
old_metadata = extractor.read_metadata(filepath)
# Lookup metadata from Excel by filename
excel_data = lookup.lookup_by_filename(filename)
if excel_data:
# Use Excel data for metadata
new_metadata = {
'title': excel_data.get('title', ''),
'subject': excel_data.get('description', ''), # External Description/Alt Text
'keywords': '' # Not used from Excel
}
else:
# No Excel data found - use filename as fallback
new_metadata = {
'title': Path(filename).stem,
'subject': f'No metadata found in Excel for {filename}',
'keywords': ''
}
file_info = {
'success': True,
'filename': filename,
'file_type': file_type.value,
'current_metadata': old_metadata,
'suggested_metadata': new_metadata,
'filepath': filepath,
'excel_found': excel_data is not None
}
results.append(file_info)
sessions[session_id]['files'].append(file_info)
except Exception as e:
results.append({
'filename': file.filename,
'error': str(e)
})
return jsonify({
'success': True,
'session_id': session_id,
'files': results
})
@app.route('/update', methods=['POST'])
def update_metadata():
"""Update file metadata from Excel and save to chosen location."""
data = request.json
filepath = data.get('filepath')
output_dir = data.get('output_dir', '') # User-selected output directory
if not filepath or not os.path.exists(filepath):
return jsonify({'error': 'File not found'}), 404
try:
# Detect file type
file_type = FileDetector.detect_file_type(filepath)
if file_type == FileType.UNSUPPORTED:
return jsonify({'error': 'Unsupported file type'}), 400
# Get updater
updater = updaters.get(file_type)
if not updater:
return jsonify({'error': 'No updater available for this file type'}), 400
# Lookup metadata from Excel
filename = Path(filepath).name
lookup = get_metadata_lookup()
excel_data = lookup.lookup_by_filename(filename)
if excel_data:
new_metadata = {
'title': excel_data.get('title', ''),
'subject': excel_data.get('description', ''), # External Description/Alt Text
'keywords': ''
}
else:
return jsonify({'error': f'No metadata found in Excel for {filename}'}), 400
# Copy file to output directory if specified
if output_dir and os.path.isdir(output_dir):
output_path = os.path.join(output_dir, filename)
shutil.copy2(filepath, output_path)
target_file = output_path
else:
target_file = filepath
# Update the file metadata WITHOUT changing filename
success = updater.update_metadata(target_file, new_metadata, backup=False)
if not success:
return jsonify({'error': 'Failed to update metadata'}), 500
# Verify update
verified = updater.verify_metadata(target_file, new_metadata)
return jsonify({
'success': True,
'message': 'Metadata updated successfully',
'verified': verified,
'output_path': target_file,
'metadata': new_metadata
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/download/<filename>')
def download_file(filename):
"""Download processed file."""
filepath = os.path.join(app.config['UPLOAD_FOLDER'], safe_filename(filename))
if os.path.exists(filepath):
return send_file(filepath, as_attachment=True)
return jsonify({'error': 'File not found'}), 404
@app.route('/stats')
def get_stats():
"""Get Excel metadata statistics."""
try:
lookup = get_metadata_lookup()
stats = lookup.get_stats()
return jsonify({
'success': True,
'stats': stats
})
except Exception as e:
return jsonify({'error': str(e)}), 500
def open_browser():
"""Open browser after short delay."""
sleep(1.5)
webbrowser.open('http://localhost:5001')
if __name__ == '__main__':
print("="*60)
print("Universal Metadata Tool - Web Interface")
print("="*60)
print("\nMetadata source: Excel file (Celum ID mapping)")
print(f"Excel file: {EXCEL_PATH.name}")
print("\nStarting server...")
print("Opening browser at http://localhost:5001")
print("\nPress Ctrl+C to stop the server")
print("="*60)
# Open browser in background
threading.Thread(target=open_browser, daemon=True).start()
# Run Flask app
app.run(debug=False, port=5001, host='127.0.0.1')