147 changed files with 6544 additions and 21765 deletions
--- a/.env
+++ b/.env
@ -1,64 +0,0 @@
-# Oliver Metadata Tool - Environment Configuration
-# Copy this file to .env and fill in your values
-
-# ==============================================================================
-# REQUIRED: OpenAI API Key (for AI metadata generation)
-# ==============================================================================
-# Get your API key from: https://platform.openai.com/api-keys
-OPENAI_API_KEY=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
-
-# ==============================================================================
-# OPTIONAL: AI Configuration
-# ==============================================================================
-# AI model to use (default: gpt-4o-mini)
-# Valid models (2026): gpt-5, gpt-5-mini, gpt-5-nano, gpt-4o, gpt-4o-mini, gpt-3.5-turbo
-# GPT-5 models: gpt-5 (most capable), gpt-5-mini (fast+cheap), gpt-5-nano (fastest)
-# Dated versions: gpt-5-mini-2025-08-07, gpt-5-nano-2025-08-07
-AI_MODEL=gpt-5.2
-
-# Maximum tokens for AI responses (default: 500)
-# MAX_TOKENS=500
-
-# Temperature for AI generation (0.0-1.0, default: 0.5)
-# Lower = more focused, Higher = more creative
-# TEMPERATURE=0.5
-
-# Maximum text length to send to AI (default: 4000)
-# MAX_TEXT_LENGTH=4000
-
-# API timeout in seconds (default: 30)
-API_TIMEOUT=30
-
-# Maximum API retry attempts (default: 3)
-API_MAX_RETRIES=3
-
-# API retry delay multiplier (default: 1.0)
-API_RETRY_DELAY=1.0
-
-# ==============================================================================
-# Microsoft SSO (Azure AD) Configuration
-# ==============================================================================
-AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
-AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
-AZURE_REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-
-# ==============================================================================
-# OPTIONAL: Flask Configuration
-# ==============================================================================
-# Secret key for Flask sessions (auto-generated if not set)
-# SECRET_KEY=your-secret-key-here
-
-# ==============================================================================
-# OPTIONAL: External Tools Paths
-# ==============================================================================
-# Custom paths to external tools (usually auto-detected)
-# TESSERACT_PATH=/usr/local/bin/tesseract
-# FFMPEG_PATH=/usr/local/bin/ffmpeg
-
-# ==============================================================================
-# OPTIONAL: OCR Configuration
-# ==============================================================================
-# Tesseract OCR languages (default: eng+chi_sim+chi_tra+jpn+kor)
-# Supported: eng (English), chi_sim (Chinese Simplified), chi_tra (Chinese Traditional),
-#            jpn (Japanese), kor (Korean)
-OCR_LANGUAGES=eng+chi_sim+chi_tra+jpn+kor
--- a/.env.example
+++ b/.env.example
@ -0,0 +1,37 @@
+# Solventum Image Metadata Tool — Environment Configuration
+# Copy this file to .env and fill in your secrets:
+#   cp .env.example .env
+
+# === Required ===
+# Generate with: python3 -c "import secrets; print(secrets.token_hex(32))"
+SECRET_KEY=CHANGE_ME_GENERATE_A_RANDOM_KEY
+DOCKER_MODE=true
+# Subpath prefix (must match Apache reverse proxy config, no trailing slash)
+ROOT_PATH=/solventum-image-metadata
+
+# === Azure AD / SSO ===
+AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
+AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
+# AZURE_CLIENT_SECRET is REQUIRED for server-side MSAL flow (get from Azure Portal > App > Certificates & secrets)
+AZURE_CLIENT_SECRET=
+# Must match Azure AD App Registration > Authentication > Redirect URIs EXACTLY (including /auth/callback path)
+# For production: https://ai-sandbox.oliver.solutions/solventum-image-metadata/auth/callback
+# For local dev: http://localhost:5001/auth/callback
+REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/auth/callback
+
+# Optional: Multi-tenant support - comma-separated list of allowed tenant IDs
+# Leave empty to allow any organizational tenant (after Azure Portal configuration)
+# Example: tenant-id-1,tenant-id-2,tenant-id-3
+ALLOWED_TENANT_IDS=
+
+# === OpenAI (optional — for AI metadata generation) ===
+OPENAI_API_KEY=
+
+# === Admin ===
+# This email will be auto-created as admin on first startup (SSO login)
+SUPERADMIN_EMAIL=vadymsamoilenko@oliver.agency
+
+# === Options ===
+ENABLE_TEST_USER=false
+HTTPS_ONLY=true
+DEBUG=false
--- a/.env.fastapi.example
+++ b/.env.fastapi.example
@ -1,80 +0,0 @@
-# Oliver Metadata Tool - FastAPI Backend Configuration
-# Copy this file to .env and configure your values
-
-# ======================
-# Database Configuration
-# ======================
-
-# SQLite (default - simpler for migration)
-DATABASE_URL=sqlite+aiosqlite:///./data/oliver_metadata.db
-
-# PostgreSQL (optional - for production)
-# DATABASE_URL=postgresql+asyncpg://oliver:YOUR_PASSWORD@localhost:5432/oliver_metadata
-# DB_PASSWORD=changeme
-
-# ======================
-# Redis Configuration
-# ======================
-REDIS_URL=redis://localhost:6379/0
-
-# ======================
-# Security
-# ======================
-
-# Secret key for JWT tokens (CHANGE IN PRODUCTION!)
-# Generate with: python -c "import secrets; print(secrets.token_hex(32))"
-SECRET_KEY=your-secret-key-change-in-production
-
-# ======================
-# OpenAI API (for AI metadata generation)
-# ======================
-
-# Required for AI metadata generation
-OPENAI_API_KEY=your-openai-api-key-here
-
-# Optional AI configuration
-AI_MODEL=gpt-4o-mini
-MAX_TOKENS=500
-TEMPERATURE=0.5
-
-# ======================
-# Microsoft SSO (optional)
-# ======================
-
-# Production values for ai-sandbox.oliver.solutions
-AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
-AZURE_CLIENT_SECRET=
-AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
-REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-
-# Local development:
-# REDIRECT_URI=http://localhost:5001/auth/microsoft/callback
-
-# ======================
-# Application Settings
-# ======================
-
-# Backend port (default: 5001 - same as old Flask for Azure AD compatibility)
-BACKEND_PORT=5001
-
-# Upload directory (default: ./uploads)
-UPLOAD_DIR=./uploads
-
-# Frontend URL for CORS (optional)
-# Production: full URL with path
-FRONTEND_URL=https://ai-sandbox.oliver.solutions/solventum-image-metadata
-# Local dev:
-# FRONTEND_URL=http://localhost:3000
-
-# Debug mode (true/false)
-DEBUG=false
-
-# ======================
-# Tesseract OCR (optional)
-# ======================
-# TESSERACT_PATH=/usr/bin/tesseract
-
-# ======================
-# FFmpeg (optional)
-# ======================
-# FFMPEG_PATH=/usr/bin/ffmpeg
--- a/.env.production
+++ b/.env.production
@ -1,17 +0,0 @@
-# Production Environment - Copy to .env on server
-DATABASE_URL=sqlite+aiosqlite:///./data/oliver_metadata.db
-REDIS_URL=redis://redis:6379/0
-SECRET_KEY=CHANGE-THIS
-OPENAI_API_KEY=
-OPENAI_MODEL=gpt-5.2
-OPENAI_API_BASE=https://api.openai.com/v1
-MAX_TOKENS=500
-TEMPERATURE=0.5
-AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
-AZURE_CLIENT_SECRET=
-AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
-REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-BACKEND_PORT=5001
-UPLOAD_DIR=/app/uploads
-DEBUG=false
-FRONTEND_URL=https://ai-sandbox.oliver.solutions/solventum-image-metadata
--- a/.gitignore
+++ b/.gitignore
@ -60,9 +60,9 @@ ENV/
 __pycache__/
 *.pyc

-# Environment variables (removed - .env files now committed to git)
-# .env
-# .env.local
+# Environment variables
+.env
+.env.local

 # Excel files with data
 *.xlsx
--- a/CLEANUP-COMMANDS.md
+++ b/CLEANUP-COMMANDS.md
@ -1,167 +0,0 @@
-# Server Cleanup Commands
-
-Before deploying a new version, you can use these commands to completely clean up old builds and free disk space.
-
-## 🧹 Complete Cleanup (Nuclear Option)
-
-Run these commands on the Ubuntu server **before** running `deploy.sh`:
-
-```bash
-# Navigate to project directory
-cd /opt/solventum-image-metadata
-
-# Stop all running containers
-sudo docker-compose -f docker-compose.fastapi.yml down --remove-orphans
-
-# Remove ALL Oliver Metadata related containers (including stopped ones)
-sudo docker ps -a | grep -E "oliver|solventum-image-metadata" | awk '{print $1}' | xargs -r sudo docker rm -f
-
-# Remove ALL Oliver Metadata related images
-sudo docker images | grep -E "oliver|solventum-image-metadata" | awk '{print $3}' | xargs -r sudo docker rmi -f
-
-# Remove ALL Oliver Metadata related volumes (⚠️ WARNING: This deletes database data!)
-sudo docker volume ls | grep oliver | awk '{print $2}' | xargs -r sudo docker volume rm
-
-# Clean Docker build cache
-sudo docker builder prune -af
-
-# Remove dangling images
-sudo docker image prune -af
-
-# Remove unused networks
-sudo docker network prune -f
-
-# Remove stopped containers
-sudo docker container prune -f
-```
-
-## 🗑️ Safe Cleanup (Keeps Database & Uploads)
-
-If you want to keep your database and uploaded files:
-
-```bash
-cd /opt/solventum-image-metadata
-
-# Stop containers
-sudo docker-compose -f docker-compose.fastapi.yml down
-
-# Remove only old images (not volumes)
-sudo docker images | grep -E "oliver|solventum-image-metadata" | awk '{print $3}' | xargs -r sudo docker rmi -f
-
-# Clean build cache (keep last 24 hours)
-sudo docker builder prune -f --filter "until=24h"
-
-# Clean system
-sudo docker system prune -f
-```
-
-## 📊 Check Disk Space
-
-```bash
-# Before cleanup
-df -h /var/lib/docker
-
-# Check Docker disk usage
-sudo docker system df
-
-# After cleanup
-sudo docker system df
-```
-
-## 🔍 Verify Cleanup
-
-```bash
-# Should return no Oliver containers
-sudo docker ps -a | grep -E "oliver|solventum"
-
-# Should return no Oliver images
-sudo docker images | grep -E "oliver|solventum"
-
-# List remaining volumes (should see redis-data if you kept volumes)
-sudo docker volume ls | grep oliver
-```
-
-## 🚀 Full Deployment Workflow
-
-Complete workflow for a fresh deployment:
-
-```bash
-# 1. Navigate to project
-cd /opt/solventum-image-metadata
-
-# 2. OPTIONAL: Backup database (recommended)
-sudo cp backend/data/oliver_metadata.db backend/data/oliver_metadata.db.backup-$(date +%Y%m%d-%H%M%S)
-
-# 3. Run safe cleanup
-sudo docker-compose -f docker-compose.fastapi.yml down
-sudo docker images | grep -E "oliver|solventum" | awk '{print $3}' | xargs -r sudo docker rmi -f
-sudo docker system prune -f
-
-# 4. Run deployment script (includes git pull)
-sudo ./deploy.sh
-```
-
-## ⚠️ WARNING: Data Loss Commands
-
-These commands will **PERMANENTLY DELETE** your data:
-
-```bash
-# Delete database (cannot be recovered unless backed up)
-sudo rm -rf /opt/solventum-image-metadata/backend/data/oliver_metadata.db
-
-# Delete all uploads (cannot be recovered)
-sudo rm -rf /opt/solventum-image-metadata/backend/uploads/*
-
-# Delete all volumes (includes Redis data)
-sudo docker volume rm $(sudo docker volume ls | grep oliver | awk '{print $2}')
-
-# Delete all frontend files
-sudo rm -rf /var/www/html/solventum-image-metadata/*
-```
-
-## 🔧 Troubleshooting
-
-### "Device or resource busy" error
-
-If you get errors removing images/containers:
-
-```bash
-# Force stop all Docker processes
-sudo systemctl stop docker
-sudo systemctl start docker
-
-# Then retry cleanup
-sudo docker system prune -af --volumes
-```
-
-### "Cannot remove container" error
-
-```bash
-# Find and kill process
-sudo docker ps -a | grep oliver
-sudo docker rm -f <container_id>
-
-# If still stuck, restart Docker
-sudo systemctl restart docker
-```
-
-### Check what's using disk space
-
-```bash
-# Largest Docker images
-sudo docker images --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}" | sort -k 3 -h
-
-# Disk usage by container
-sudo docker ps -s
-
-# Build cache size
-sudo docker builder du
-```
-
-## 📝 Notes
-
- The `deploy.sh` script now includes automatic cleanup
- Old images are removed automatically during deployment
- Build cache is preserved for faster builds (24 hour window)
- Database and uploads are preserved unless explicitly deleted
- Frontend files in `/var/www/html/` are backed up to `/tmp/` during deployment
--- a/DEPLOYMENT-CHECKLIST.md
+++ b/DEPLOYMENT-CHECKLIST.md
@ -1,142 +0,0 @@
-# Deployment Checklist - Oliver Metadata Tool v4.0
-
-## ✅ Pre-Deployment
-
-### 1. Backend .env Configuration
-```bash
-cd /opt/solventum-image-metadata
-sudo cp .env.production .env
-sudo nano .env
-```
-
-**Required variables:**
-```env
-SECRET_KEY=<generate-with-python-secrets>
-OPENAI_API_KEY=sk-...
-AZURE_CLIENT_SECRET=<your-secret>
-```
-
-**Verify Azure AD settings:**
-```env
-AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
-AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
-REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-```
-
-### 2. Apache Configuration
-
-Add to `/etc/apache2/sites-available/solventum-image-metadata.conf`:
-
-```apache
-# Frontend - static files
-Alias /solventum-image-metadata /var/www/html/solventum-image-metadata
-
-<Directory /var/www/html/solventum-image-metadata>
-    Options -Indexes +FollowSymLinks
-    AllowOverride All
-    Require all granted
-
-    RewriteEngine On
-    RewriteBase /solventum-image-metadata
-    RewriteCond %{REQUEST_FILENAME} !-f
-    RewriteCond %{REQUEST_FILENAME} !-d
-    RewriteCond %{REQUEST_URI} !^/solventum-image-metadata/api/
-    RewriteRule ^ /solventum-image-metadata/index.html [L]
-</Directory>
-
-# Backend API
-ProxyPass /solventum-image-metadata/api/ http://localhost:5001/
-ProxyPassReverse /solventum-image-metadata/api/ http://localhost:5001/
-ProxyTimeout 600
-```
-
-Enable modules:
-```bash
-sudo a2enmod rewrite alias proxy proxy_http
-sudo apache2ctl configtest
-sudo systemctl reload apache2
-```
-
-## ✅ Deployment
-
-```bash
-cd /opt/solventum-image-metadata
-git pull origin main
-sudo ./deploy.sh
-```
-
-## ✅ Verification
-
-### 1. Check Backend
-```bash
-curl http://localhost:5001/health
-# Expected: {"status":"healthy"}
-```
-
-### 2. Check Frontend
-```bash
-curl https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-# Expected: HTML with React app
-```
-
-### 3. Check API through Apache
-```bash
-curl https://ai-sandbox.oliver.solutions/solventum-image-metadata/api/health
-# Expected: {"status":"healthy"}
-```
-
-### 4. Test SSO
-1. Go to: https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-2. Click "Login with Microsoft"
-3. Should redirect to Azure AD
-4. After login, should return to dashboard
-
-### 5. Test File Upload
-1. Login to dashboard
-2. Select "Manual Entry" or "AI Generation"
-3. Drag & drop a PDF file
-4. Edit metadata (title, subject, keywords)
-5. Click "Save Metadata"
-6. Download file
-7. Verify: `exiftool downloaded.pdf`
-
-## 📊 Final Status
-
- [ ] Backend running on port 5001
- [ ] Redis running in Docker
- [ ] Frontend deployed to /var/www/html/solventum-image-metadata
- [ ] Apache configured with Alias and ProxyPass
- [ ] .env configured with all secrets
- [ ] SSO redirect to Azure AD working
- [ ] SSO callback to dashboard working
- [ ] File upload working
- [ ] Metadata editing working
- [ ] Download working
-
-## 🆘 Troubleshooting
-
-### Backend not starting
-```bash
-docker logs oliver-backend --tail 100
-```
-
-### Frontend 404
-```bash
-ls -la /var/www/html/solventum-image-metadata/
-# Should contain: index.html, assets/, etc.
-```
-
-### SSO redirect loop
-```bash
-# Check .env REDIRECT_URI matches Azure AD exactly
-grep REDIRECT_URI /opt/solventum-image-metadata/.env
-# Must be: https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-```
-
-### API 404 errors
-```bash
-# Check Apache proxy
-sudo apache2ctl -S | grep solventum
-# Check backend is running
-curl http://localhost:5001/docs
-```
--- a/DEPLOYMENT.md
+++ b/DEPLOYMENT.md
@ -1,402 +0,0 @@
-# Production Deployment Guide
-
-## Server: Ubuntu + Apache
-
-Production deployment на https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-
-## Prerequisites
-
-### 1. Install System Dependencies
-
-```bash
-# Update system
-sudo apt update && sudo apt upgrade -y
-
-# Install Docker
-curl -fsSL https://get.docker.com | sh
-sudo usermod -aG docker $USER
-
-# Install Docker Compose
-sudo apt install docker-compose-plugin
-
-# Install Node.js 18+
-curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
-sudo apt install -y nodejs
-
-# Verify versions
-docker --version
-docker compose version
-node --version
-npm --version
-```
-
-### 2. Configure Apache
-
-```bash
-# Enable required modules
-sudo a2enmod proxy proxy_http headers rewrite ssl
-
-# Copy Apache config
-sudo cp /opt/solventum-image-metadata/apache-config.conf \
-        /etc/apache2/sites-available/solventum-image-metadata.conf
-
-# Enable site
-sudo a2ensite solventum-image-metadata
-
-# Test config
-sudo apache2ctl configtest
-
-# Reload Apache
-sudo systemctl reload apache2
-```
-
-### 3. Setup SSL (Let's Encrypt)
-
-```bash
-# Install Certbot
-sudo apt install certbot python3-certbot-apache
-
-# Get certificate
-sudo certbot --apache -d ai-sandbox.oliver.solutions
-
-# Auto-renewal
-sudo systemctl enable certbot.timer
-```
-
-## Initial Deployment
-
-### 1. Clone Repository
-
-```bash
-# Clone to /opt
-cd /opt
-sudo git clone <repository-url> solventum-image-metadata
-cd solventum-image-metadata
-```
-
-### 2. Configure Environment
-
-```bash
-# Copy environment template
-sudo cp .env.fastapi.example .env
-
-# Edit configuration
-sudo nano .env
-```
-
-**Required variables:**
-```env
-SECRET_KEY=<generate-with-python-secrets>
-OPENAI_API_KEY=sk-...
-AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
-AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
-AZURE_REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/api/auth/microsoft/callback
-```
-
-**Generate SECRET_KEY:**
-```bash
-python3 -c "import secrets; print(secrets.token_hex(32))"
-```
-
-### 3. Create Required Directories
-
-```bash
-# Create data directories
-sudo mkdir -p /opt/solventum-image-metadata/backend/{data,uploads,output/templates}
-sudo mkdir -p /var/www/html/solventum-image-metadata
-
-# Set permissions
-sudo chown -R www-data:www-data /var/www/html/solventum-image-metadata
-sudo chown -R $USER:$USER /opt/solventum-image-metadata/backend
-```
-
-### 4. Initial Deploy
-
-```bash
-cd /opt/solventum-image-metadata
-sudo ./deploy.sh
-```
-
-## Updates / Re-deployment
-
-```bash
-# 1. Pull latest code (as normal user with git access)
-cd /opt/solventum-image-metadata
-git pull origin main
-
-# 2. Run deployment script (as root)
-sudo ./deploy.sh
-```
-
-The script is **idempotent** - safe to run multiple times.
-
-## What the Deploy Script Does
-
-1. ✅ Pre-flight checks (Docker, Node, permissions)
-2. ✅ Validates environment variables
-3. ✅ Builds Docker containers (with cache)
-4. ✅ Stops old containers gracefully
-5. ✅ Starts new containers (Redis + Backend)
-6. ✅ Waits for Redis to be ready
-7. ✅ Initializes database (first run only)
-8. ✅ Installs frontend dependencies (npm ci)
-9. ✅ Builds frontend (Vite production build)
-10. ✅ Backs up existing frontend files
-11. ✅ Deploys new frontend to /var/www/html/
-12. ✅ Sets correct permissions (www-data)
-13. ✅ Health checks (backend + Redis)
-14. ✅ Cleanup old Docker images
-
-## Verification
-
-### 1. Check Services
-
-```bash
-# Docker containers
-docker ps
-
-# Backend logs
-docker logs oliver-backend
-
-# Redis logs
-docker logs oliver-redis
-```
-
-### 2. Test Endpoints
-
-```bash
-# Backend health
-curl http://localhost:8000/health
-
-# API docs
-curl http://localhost:8000/docs
-
-# Frontend (through Apache)
-curl https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-```
-
-### 3. Test Full Flow
-
-1. Open https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-2. Click "Login with Microsoft" (should redirect to Azure AD)
-3. After SSO, should redirect back to dashboard
-4. Upload a test file
-5. Edit metadata
-6. Download file
-7. Verify metadata: `exiftool downloaded_file.pdf`
-
-## Troubleshooting
-
-### Backend not starting
-
-```bash
-# Check backend logs
-docker logs oliver-backend --tail 100
-
-# Check if port 8000 is already in use
-sudo lsof -i :8000
-
-# Restart backend
-docker restart oliver-backend
-```
-
-### Redis connection error
-
-```bash
-# Check Redis
-docker exec oliver-redis redis-cli ping
-# Should return: PONG
-
-# Check Redis logs
-docker logs oliver-redis
-
-# Restart Redis
-docker restart oliver-redis
-```
-
-### Frontend 404 errors
-
-```bash
-# Check Apache config
-sudo apache2ctl configtest
-
-# Check file permissions
-ls -la /var/www/html/solventum-image-metadata/
-
-# Should be owned by www-data
-sudo chown -R www-data:www-data /var/www/html/solventum-image-metadata/
-
-# Check Apache error log
-sudo tail -f /var/log/apache2/solventum-image-metadata-error.log
-```
-
-### API proxy errors
-
-```bash
-# Check if proxy modules enabled
-apache2ctl -M | grep proxy
-
-# Should see:
-#  proxy_module (shared)
-#  proxy_http_module (shared)
-
-# Enable if missing
-sudo a2enmod proxy proxy_http
-
-# Restart Apache
-sudo systemctl restart apache2
-```
-
-### SSO redirect loop
-
-```bash
-# Verify REDIRECT_URI in .env matches Apache config
-grep AZURE_REDIRECT_URI /opt/solventum-image-metadata/.env
-
-# Should be:
-# AZURE_REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/api/auth/microsoft/callback
-
-# Check Azure AD app registration
-# Redirect URI must match exactly (including /api/ prefix)
-```
-
-### Database locked
-
-```bash
-# Check if multiple backends running
-docker ps | grep oliver-backend
-
-# Stop all and restart
-docker stop oliver-backend
-docker start oliver-backend
-```
-
-## Rollback
-
-If deployment fails and you need to rollback:
-
-```bash
-# 1. Stop new containers
-docker-compose -f docker-compose.fastapi.yml down
-
-# 2. Restore frontend from backup
-sudo rm -rf /var/www/html/solventum-image-metadata/*
-sudo cp -r /tmp/oliver-metadata-backup-TIMESTAMP/* /var/www/html/solventum-image-metadata/
-
-# 3. Start old Flask app (if available)
-docker-compose -f docker-compose.yml up -d
-
-# 4. Check logs
-docker logs oliver-metadata-tool
-```
-
-## Maintenance
-
-### Regular Tasks
-
-**Daily:**
- Monitor disk space: `df -h`
- Check Docker logs: `docker logs oliver-backend --tail 100`
-
-**Weekly:**
- Cleanup old uploads: Files older than 7 days auto-deleted
- Check Redis memory: `docker exec oliver-redis redis-cli info memory`
-
-**Monthly:**
- Update system packages: `sudo apt update && sudo apt upgrade`
- Renew SSL certificate (auto with certbot)
- Review logs for errors
-
-### Backup Strategy
-
-**Database:**
-```bash
-# Backup SQLite database
-sudo cp /opt/solventum-image-metadata/backend/data/oliver_metadata.db \
-       /opt/backups/oliver_metadata_$(date +%Y%m%d).db
-
-# Automated daily backup (crontab)
-0 2 * * * cp /opt/solventum-image-metadata/backend/data/oliver_metadata.db /opt/backups/oliver_metadata_$(date +\%Y\%m\%d).db
-```
-
-**Uploads:**
-```bash
-# Backup uploads directory
-sudo tar -czf /opt/backups/uploads_$(date +%Y%m%d).tar.gz \
-             /opt/solventum-image-metadata/backend/uploads/
-```
-
-**Redis (if critical data):**
-```bash
-# Redis snapshot (runs automatically with AOF enabled)
-docker exec oliver-redis redis-cli BGSAVE
-
-# Copy RDB file
-docker cp oliver-redis:/data/dump.rdb /opt/backups/redis_$(date +%Y%m%d).rdb
-```
-
-## Monitoring
-
-### Health Checks
-
-```bash
-# Backend
-curl http://localhost:8000/health
-
-# Redis
-docker exec oliver-redis redis-cli ping
-
-# Frontend
-curl https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-```
-
-### Logs
-
-```bash
-# Backend logs
-docker logs oliver-backend -f
-
-# Redis logs
-docker logs oliver-redis -f
-
-# Apache logs
-sudo tail -f /var/log/apache2/solventum-image-metadata-access.log
-sudo tail -f /var/log/apache2/solventum-image-metadata-error.log
-```
-
-### Performance
-
-```bash
-# Docker stats
-docker stats oliver-backend oliver-redis
-
-# Disk usage
-du -sh /opt/solventum-image-metadata/backend/uploads/
-
-# Redis memory
-docker exec oliver-redis redis-cli info memory | grep used_memory_human
-```
-
-## Security Checklist
-
- [x] SSL enabled (HTTPS)
- [x] SECRET_KEY is random (not default)
- [x] OPENAI_API_KEY secured in .env
- [x] Azure AD credentials secured
- [x] File permissions set to www-data
- [x] Database not publicly accessible
- [x] Redis not exposed externally
- [x] CORS restricted to frontend domain
- [x] Apache security headers enabled
- [x] Regular backups configured
-
-## Support
-
- **API Documentation**: http://localhost:8000/docs
- **Deployment Script**: `/opt/solventum-image-metadata/deploy.sh`
- **Logs Directory**: `/var/log/apache2/`
- **Application Logs**: `docker logs oliver-backend`
-
---
-
-Last updated: 2026-02-09
--- a/20
+++ b/20
@ -19,8 +19,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    tesseract-ocr-kor \
    # Poppler for PDF to image conversion
    poppler-utils \
-    # FFmpeg for video processing (optional)
+    # FFmpeg for video processing
    ffmpeg \
+    # curl for health check
+    curl \
    # Build dependencies
    gcc \
    && rm -rf /var/lib/apt/lists/*
@ -38,19 +40,25 @@ RUN pip install --no-cache-dir -r requirements.txt
 COPY . .

 # Create necessary directories
-RUN mkdir -p /app/uploads /app/output /app/data /app/templates
+RUN mkdir -p /app/uploads /app/output /app/data /app/templates_saved

 # Set environment variables
 ENV PYTHONUNBUFFERED=1
 ENV DOCKER_MODE=true
-ENV FLASK_APP=web_app.py

 # Expose port
 EXPOSE 5001

 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
-    CMD python -c "import requests; requests.get('http://localhost:5001/login', timeout=5)" || exit 1
+    CMD curl -sf http://localhost:5001/login || exit 1

-# Run application with gunicorn (production WSGI server)
-CMD ["gunicorn", "--bind", "0.0.0.0:5001", "--workers", "2", "--timeout", "120", "web_app:app"]
+# Run application with gunicorn + uvicorn workers
+CMD ["gunicorn", "app.main:app", \
+     "--worker-class", "uvicorn.workers.UvicornWorker", \
+     "--workers", "2", \
+     "--bind", "0.0.0.0:5001", \
+     "--timeout", "120", \
+     "--graceful-timeout", "30", \
+     "--access-logfile", "-", \
+     "--error-logfile", "-"]
--- a/PRODUCTION-DEPLOY.md
+++ b/PRODUCTION-DEPLOY.md
@ -1,264 +0,0 @@
-# Production Deployment Guide
-
-Quick guide for deploying Oliver Metadata Tool v4.0 to Ubuntu server.
-
-## 📋 Prerequisites
-
-1. **Server Setup:**
-   - Ubuntu 20.04+ server
-   - Docker & Docker Compose installed
-   - Node.js 18+ & npm installed
-   - Apache/Nginx configured as reverse proxy
-
-2. **Required Files:**
-   - `.env` file in project root with production values
-   - Apache/Nginx config for reverse proxy
-
-3. **Repository Location:**
-   - Clone to: `/opt/solventum-image-metadata/`
-   - Frontend serves from: `/var/www/html/solventum-image-metadata/`
-
-## 🚀 Quick Deployment
-
-### First-Time Setup
-
-```bash
-# 1. Clone repository
-cd /opt
-sudo git clone <repository-url> solventum-image-metadata
-cd solventum-image-metadata
-
-# 2. Create .env file
-sudo cp .env.production .env
-sudo nano .env  # Edit with production values
-
-# 3. Configure frontend volume in docker-compose
-sudo nano docker-compose.fastapi.yml
-# Comment out line 69: - ./frontend/dist:/app/frontend/dist:ro
-
-# 4. Run deployment
-sudo ./deploy.sh
-```
-
-### Subsequent Updates
-
-```bash
-# Just run the deploy script - it handles everything!
-cd /opt/solventum-image-metadata
-sudo ./deploy.sh
-```
-
-The script automatically:
- ✅ Pulls latest code from git
- ✅ Cleans old Docker images
- ✅ Builds new containers
- ✅ Initializes database (first run only)
- ✅ Builds React frontend
- ✅ Deploys frontend to `/var/www/html/`
- ✅ Runs health checks
-
-## 🧹 Clean Deployment (Remove Old Builds)
-
-If you need to completely clean up before deploying:
-
-```bash
-cd /opt/solventum-image-metadata
-
-# Option 1: Quick cleanup (recommended)
-sudo docker-compose -f docker-compose.fastapi.yml down
-sudo docker images | grep -E "oliver|solventum" | awk '{print $3}' | xargs -r sudo docker rmi -f
-sudo docker system prune -f
-
-# Option 2: Nuclear cleanup (see CLEANUP-COMMANDS.md)
-# Use only if you want to delete everything including database
-```
-
-Then run `sudo ./deploy.sh`
-
-## ⚙️ Configuration Files
-
-### `.env` File (Production)
-
-Required environment variables:
-
-```bash
-# OpenAI (required for AI features)
-OPENAI_API_KEY=sk-proj-...
-AI_MODEL=gpt-5.2
-
-# Azure AD SSO
-AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
-AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
-AZURE_CLIENT_SECRET=your-secret-here
-REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-
-# Security
-SECRET_KEY=your-production-secret-key-here
-
-# Backend
-BACKEND_PORT=5001
-DEBUG=false
-```
-
-### Apache Virtual Host Example
-
-```apache
-<Location /solventum-image-metadata/api>
-    ProxyPass http://localhost:5001
-    ProxyPassReverse http://localhost:5001
-</Location>
-
-<Location /solventum-image-metadata/auth>
-    ProxyPass http://localhost:5001/auth
-    ProxyPassReverse http://localhost:5001/auth
-</Location>
-
-# Serve frontend static files
-Alias /solventum-image-metadata /var/www/html/solventum-image-metadata
-<Directory /var/www/html/solventum-image-metadata>
-    Options -Indexes +FollowSymLinks
-    AllowOverride None
-    Require all granted
-
-    # React Router support
-    RewriteEngine On
-    RewriteBase /solventum-image-metadata/
-    RewriteRule ^index\.html$ - [L]
-    RewriteCond %{REQUEST_FILENAME} !-f
-    RewriteCond %{REQUEST_FILENAME} !-d
-    RewriteRule . /solventum-image-metadata/index.html [L]
-</Directory>
-```
-
-## 🔍 Post-Deployment Verification
-
-```bash
-# 1. Check Docker containers
-sudo docker ps | grep oliver
-
-# 2. Check backend health
-curl http://localhost:5001/health
-
-# 3. Check API docs
-curl http://localhost:5001/docs
-
-# 4. Check frontend files
-ls -lh /var/www/html/solventum-image-metadata/
-
-# 5. View logs
-cd /opt/solventum-image-metadata
-sudo docker-compose -f docker-compose.fastapi.yml logs -f backend
-```
-
-## 🔧 Useful Commands
-
-```bash
-# View deployment logs
-cd /opt/solventum-image-metadata
-sudo docker-compose -f docker-compose.fastapi.yml logs -f
-
-# Restart backend only
-sudo docker-compose -f docker-compose.fastapi.yml restart backend
-
-# Stop all services
-sudo docker-compose -f docker-compose.fastapi.yml down
-
-# Start services
-sudo docker-compose -f docker-compose.fastapi.yml up -d
-
-# Access Redis CLI
-sudo docker exec -it oliver-redis redis-cli
-
-# Check database
-sudo ls -lh /opt/solventum-image-metadata/backend/data/
-
-# Backup database
-sudo cp backend/data/oliver_metadata.db backend/data/oliver_metadata.db.backup-$(date +%Y%m%d)
-```
-
-## 🚨 Troubleshooting
-
-### Deployment fails with "Git pull failed"
-
-```bash
-cd /opt/solventum-image-metadata
-sudo git status
-sudo git stash  # If uncommitted changes
-sudo git pull origin main
-sudo ./deploy.sh
-```
-
-### Backend health check fails
-
-```bash
-# Check logs
-sudo docker-compose -f docker-compose.fastapi.yml logs backend
-
-# Common issues:
-# 1. OPENAI_API_KEY not set
-# 2. Redis not running
-# 3. Port 5001 already in use
-```
-
-### Frontend not loading
-
-```bash
-# Check files exist
-ls -lh /var/www/html/solventum-image-metadata/
-
-# Check permissions
-sudo chown -R www-data:www-data /var/www/html/solventum-image-metadata/
-sudo chmod -R 755 /var/www/html/solventum-image-metadata/
-
-# Check Apache config
-sudo apache2ctl -t
-sudo systemctl reload apache2
-```
-
-### "Docker build failed"
-
-```bash
-# Clean Docker completely
-sudo docker system prune -af --volumes
-sudo systemctl restart docker
-sudo ./deploy.sh
-```
-
-## 📊 Monitoring
-
-### Check disk space
-
-```bash
-# Docker disk usage
-sudo docker system df
-
-# Project disk usage
-du -sh /opt/solventum-image-metadata
-du -sh /var/www/html/solventum-image-metadata
-```
-
-### Check logs
-
-```bash
-# Backend logs (last 100 lines)
-cd /opt/solventum-image-metadata
-sudo docker-compose -f docker-compose.fastapi.yml logs --tail=100 backend
-
-# Follow logs in real-time
-sudo docker-compose -f docker-compose.fastapi.yml logs -f
-```
-
-## 🔒 Security Notes
-
-1. **Never commit .env files** with secrets to git
-2. **Use strong SECRET_KEY** in production
-3. **Backup database regularly** before updates
-4. **Use HTTPS** for production (configure in Apache/Nginx)
-5. **Review CORS settings** in backend/app/main.py if needed
-
-## 📞 Support
-
-For issues:
-1. Check logs: `docker-compose logs`
-2. Review [CLEANUP-COMMANDS.md](CLEANUP-COMMANDS.md) for cleanup options
-3. See [DEPLOYMENT-CHECKLIST.md](DEPLOYMENT-CHECKLIST.md) for detailed steps
--- a/README-FASTAPI.md
+++ b/README-FASTAPI.md
@ -1,398 +0,0 @@
-# Oliver Metadata Tool - FastAPI Backend
-
-Complete FastAPI backend migration from Flask with Redis sessions, JWT authentication, and full API.
-
-## ✅ What's Complete
-
-### Backend (100%)
- ✅ FastAPI app with async I/O
- ✅ Redis session storage (solves session loss problem!)
- ✅ JWT authentication (access + refresh tokens)
- ✅ Microsoft SSO support
- ✅ File upload/download with persistent storage
- ✅ All metadata sources: AI, Excel, Import, Manual, Templates
- ✅ All processors copied from Flask (100% working as-is)
- ✅ SQLAlchemy async database
- ✅ Docker Compose setup
-
-### API Endpoints (17 total)
- Auth: `/auth/login`, `/auth/logout`, `/auth/token/refresh`, `/auth/register`
- Files: `/files/upload`, `/files/{file_id}/download`, `/files/download-batch`
- Metadata: `/metadata/{file_id}`, `/metadata/batch-update`
- Templates: `/templates/` (list, create, get, delete, preview)
-
-## 🚀 Quick Start
-
-### Option 1: Docker Compose (Recommended)
-
-```bash
-# 1. Copy environment file
-cp .env.fastapi.example .env
-
-# 2. Edit .env and add your OpenAI API key
-nano .env
-
-# 3. Start services
-docker-compose -f docker-compose.fastapi.yml up -d
-
-# 4. Check logs
-docker-compose -f docker-compose.fastapi.yml logs -f backend
-
-# 5. Access API
-open http://localhost:8000/docs
-```
-
-### Option 2: Local Development
-
-```bash
-# 1. Install Redis
-brew install redis  # macOS
-# or: sudo apt-get install redis-server  # Linux
-
-# 2. Start Redis
-redis-server
-
-# 3. Create virtual environment
-cd backend
-python3 -m venv venv
-source venv/bin/activate
-
-# 4. Install dependencies
-pip install -r requirements.txt
-
-# 5. Copy environment file
-cp ../.env.fastapi.example ../.env
-
-# 6. Edit .env
-nano ../.env
-
-# 7. Run backend
-python -m app.main
-
-# 8. Access API
-open http://localhost:8000/docs
-```
-
-## 📝 Configuration
-
-### Required Environment Variables
-
-```env
-# OpenAI API key (required for AI metadata generation)
-OPENAI_API_KEY=sk-...
-
-# Secret key for JWT tokens (generate new one!)
-SECRET_KEY=$(python -c "import secrets; print(secrets.token_hex(32))")
-
-# Redis URL
-REDIS_URL=redis://localhost:6379/0
-```
-
-### Optional Environment Variables
-
-```env
-# Database (default: SQLite)
-DATABASE_URL=sqlite+aiosqlite:///./data/oliver_metadata.db
-
-# Microsoft SSO
-AZURE_CLIENT_ID=...
-AZURE_CLIENT_SECRET=...
-AZURE_TENANT_ID=...
-
-# Frontend URL for CORS
-FRONTEND_URL=http://localhost:3000
-```
-
-## 🧪 Testing the API
-
-### 1. Create a Test User
-
-```bash
-curl -X POST http://localhost:8000/auth/register \
-  -H "Content-Type: application/json" \
-  -d '{"username": "testuser", "password": "testpass"}'
-```
-
-### 2. Login and Get Tokens
-
-```bash
-curl -X POST http://localhost:8000/auth/login \
-  -H "Content-Type: application/json" \
-  -d '{"username": "testuser", "password": "testpass"}'
-```
-
-Response:
-```json
-{
-  "access_token": "eyJ...",
-  "refresh_token": "eyJ...",
-  "token_type": "bearer",
-  "expires_in": 1800,
-  "user": {...}
-}
-```
-
-### 3. Upload Files
-
-```bash
-# Save access token
-TOKEN="your-access-token-here"
-
-# Upload file with AI metadata
-curl -X POST http://localhost:8000/files/upload \
-  -H "Authorization: Bearer $TOKEN" \
-  -F "files=@test.pdf" \
-  -F "metadata_source=ai"
-```
-
-### 4. Update Metadata
-
-```bash
-curl -X PUT http://localhost:8000/metadata/FILE_ID \
-  -H "Authorization: Bearer $TOKEN" \
-  -H "Content-Type: application/json" \
-  -d '{
-    "session_id": "SESSION_ID",
-    "file_index": 0,
-    "metadata": {
-      "title": "Updated Title",
-      "subject": "Updated Subject",
-      "keywords": "test, metadata"
-    }
-  }'
-```
-
-### 5. Download File
-
-```bash
-curl -X GET http://localhost:8000/files/FILE_ID/download \
-  -H "Authorization: Bearer $TOKEN" \
-  --output downloaded_file.pdf
-```
-
-## 📚 Interactive API Documentation
-
-FastAPI provides automatic interactive API docs:
-
- **Swagger UI**: http://localhost:8000/docs
- **ReDoc**: http://localhost:8000/redoc
-
-You can test all endpoints directly in the browser!
-
-## 🔧 Architecture
-
-### Session Management (CRITICAL FIX)
-
-**Before (Flask):**
- In-memory dict: `sessions = {}`
- Lost on restart ❌
-
-**After (FastAPI):**
- Redis with TTL
- Persistent across restarts ✅
- User sessions: 7 days
- File sessions: 1 hour
- Auto-cleanup
-
-### Authentication Flow
-
-1. Login → JWT access token (30 min) + refresh token (7 days)
-2. Refresh token stored in Redis
-3. Frontend sends: `Authorization: Bearer <access_token>`
-4. Token expired? → Use refresh token to get new access token
-5. Logout → Delete session from Redis
-
-### File Processing Flow
-
-1. Upload files → Save to `uploads/{user_id}/{YYYYMMDD}/`
-2. Create session in Redis with file info
-3. Generate metadata (AI/Excel/Import/Manual/Template)
-4. User reviews/edits metadata
-5. Update file with metadata
-6. Download processed file
-7. Cleanup (automatic after 7 days)
-
-## 🐳 Docker Services
-
-### Running Services
-
-```bash
-# Start all services
-docker-compose -f docker-compose.fastapi.yml up -d
-
-# View logs
-docker-compose -f docker-compose.fastapi.yml logs -f
-
-# Stop services
-docker-compose -f docker-compose.fastapi.yml down
-
-# Rebuild backend
-docker-compose -f docker-compose.fastapi.yml build backend
-docker-compose -f docker-compose.fastapi.yml up -d backend
-```
-
-### Service URLs
-
- **Backend API**: http://localhost:8000
- **API Docs**: http://localhost:8000/docs
- **Redis**: localhost:6379
- **PostgreSQL**: localhost:5432 (optional)
-
-## 🗄️ Database
-
-### SQLite (Default)
-
-Location: `backend/data/oliver_metadata.db`
-
-**Pros:**
- Simple, no setup
- Good for single server
- Easy migration from Flask
-
-**Cons:**
- No concurrent writes
- Not for multi-server deployment
-
-### PostgreSQL (Optional)
-
-**Pros:**
- Better performance
- Concurrent connections
- Multi-server support
-
-**To enable:**
-
-```yaml
-# docker-compose.fastapi.yml
-environment:
-  DATABASE_URL: postgresql+asyncpg://oliver:${DB_PASSWORD}@postgres:5432/oliver_metadata
-```
-
-## 📦 What's Reused from Flask
-
-These components are **100% unchanged**:
-
- `backend/app/processors/extractors/` - All file extractors
- `backend/app/processors/updaters/` - All file updaters
- `backend/app/processors/metadata_analyzer.py` - AI generation
- `backend/app/processors/excel_metadata_lookup.py` - Excel lookup
- `backend/app/processors/template_manager.py` - Templates
- `backend/app/processors/config.py` - Configuration
-
-**Zero modifications needed** - they work perfectly with FastAPI!
-
-## 🔒 Security
-
-### Production Checklist
-
- [ ] Change `SECRET_KEY` to random 64-char string
- [ ] Enable HTTPS (set `REDIRECT_URI` to https://)
- [ ] Restrict CORS origins in `main.py`
- [ ] Set `DEBUG=false` in production
- [ ] Use PostgreSQL instead of SQLite for multi-server
- [ ] Enable Redis password: `redis://user:password@host:6379/0`
- [ ] Regular backups of database and uploads
- [ ] Monitor Redis memory usage
-
-## 🐛 Troubleshooting
-
-### Redis Connection Error
-
-```bash
-# Check if Redis is running
-redis-cli ping
-# Should return: PONG
-
-# If not running:
-redis-server
-```
-
-### Database Lock Error
-
-```bash
-# SQLite only - check if another process is using DB
-lsof backend/data/oliver_metadata.db
-
-# If stuck, delete and restart:
-rm backend/data/oliver_metadata.db
-docker-compose -f docker-compose.fastapi.yml restart backend
-```
-
-### Import Errors
-
-```bash
-# Check if all dependencies installed
-cd backend
-pip list | grep fastapi
-pip list | grep redis
-
-# If missing:
-pip install -r requirements.txt
-```
-
-### File Upload 413 Error
-
-```bash
-# Increase max file size in main.py or nginx.conf
-# Default: 500MB (configured in processors/config.py)
-```
-
-## 📈 Monitoring
-
-### Check Redis Sessions
-
-```bash
-# Connect to Redis
-redis-cli
-
-# List all session keys
-KEYS *session*
-
-# Get session data
-GET file_session:SESSION_ID
-
-# Check memory usage
-INFO memory
-```
-
-### Check Storage
-
-```bash
-# Get storage stats
-curl http://localhost:8000/files/stats \
-  -H "Authorization: Bearer $TOKEN"
-```
-
-### Check Logs
-
-```bash
-# Docker logs
-docker-compose -f docker-compose.fastapi.yml logs -f backend
-
-# Or if running locally
-# Logs printed to console
-```
-
-## 🚧 What's Next (Frontend)
-
-To complete the migration:
-
-1. Create React frontend (see plan in `.claude/plans/`)
-2. Implement file upload UI with drag-drop
-3. Metadata editor components
-4. Template management UI
-5. Import/Excel mapping modals
-
-Backend is **100% ready** for frontend integration!
-
-## 📞 Support
-
- **API Documentation**: http://localhost:8000/docs
- **Migration Plan**: `.claude/plans/radiant-snacking-chipmunk.md`
- **Memory**: `.claude/projects/.../memory/MEMORY.md`
-
---
-
-**Status**: ✅ Backend Complete | ⏳ Frontend Pending
-
-Generated with Claude Code by Anthropic
--- a/README-FULLSTACK.md
+++ b/README-FULLSTACK.md
@ -1,368 +0,0 @@
-# Oliver Metadata Tool v4.0 - Complete Migration
-
-**🎉 COMPLETE!** Full migration from Flask to FastAPI + React SPA.
-
-## ✅ Project Status: 100% Complete
-
-### Backend (✅ Done)
- FastAPI async API with 17 endpoints
- Redis persistent session storage
- JWT authentication + Microsoft SSO
- All file processors (100% reused from Flask)
- Docker Compose ready
-
-### Frontend (✅ Done)
- React 18 + TypeScript + Vite
- Zustand state management
- Axios API client with auth interceptors
- Drag-drop file upload
- Metadata editor with validation
- Responsive design with Tailwind CSS
-
-## 🚀 Quick Start (Full Stack)
-
-### Prerequisites
- Docker & Docker Compose
- Node.js 18+ (for local dev)
- OpenAI API key
-
-### Option 1: Docker Compose (Recommended)
-
-```bash
-# 1. Set up environment
-cp .env.fastapi.example .env
-nano .env  # Add OPENAI_API_KEY
-
-# 2. Start backend + Redis
-docker-compose -f docker-compose.fastapi.yml up -d
-
-# 3. Install frontend dependencies
-cd frontend
-npm install
-
-# 4. Start frontend dev server
-npm run dev
-
-# 5. Open browser
-open http://localhost:3000
-```
-
-### Option 2: Local Development
-
-**Terminal 1 - Backend:**
-```bash
-# Start Redis
-redis-server
-
-# Start backend
-cd backend
-python3 -m venv venv
-source venv/bin/activate
-pip install -r requirements.txt
-python -m app.main
-```
-
-**Terminal 2 - Frontend:**
-```bash
-cd frontend
-npm install
-npm run dev
-```
-
-**Terminal 3 - Test:**
-```bash
-# Register test user
-curl -X POST http://localhost:8000/auth/register \
-  -H "Content-Type: application/json" \
-  -d '{"username": "test", "password": "test123"}'
-
-# Open app
-open http://localhost:3000
-```
-
-## 📦 Architecture
-
-```
-┌─────────────────────────────────────────────┐
-│         React Frontend (Port 3000)          │
-│  - Drag-drop upload                         │
-│  - Metadata editor                          │
-│  - File list & batch operations             │
-└─────────────────┬───────────────────────────┘
-                  │ Axios API Client
-                  │ JWT Tokens
-┌─────────────────▼───────────────────────────┐
-│       FastAPI Backend (Port 8000)           │
-│  - JWT Auth + SSO                           │
-│  - File upload/download                     │
-│  - Metadata generation (AI/Excel/Import)    │
-│  - Template management                      │
-└─────────────────┬──────────┬────────────────┘
-                  │          │
-         ┌────────▼───┐   ┌──▼──────────┐
-         │   Redis    │   │  SQLite/    │
-         │  Sessions  │   │  Postgres   │
-         └────────────┘   └─────────────┘
-```
-
-## 🎯 Key Features
-
-### Solved Problems
-
-| Problem | Before (Flask) | After (FastAPI + React) |
-|---------|---------------|------------------------|
-| **Sessions lost** | In-memory dict | Redis with TTL |
-| **Scalability** | Monolithic | Async FastAPI + SPA |
-| **File handling** | Temp files, no cleanup | Persistent + auto-cleanup |
-| **Frontend** | 2555-line Jinja templates | Modular React components |
-| **API** | Mixed HTML/JSON | Pure JSON REST API |
-
-### What Works
-
- ✅ Login with JWT tokens (30 min access, 7 day refresh)
- ✅ Microsoft SSO support
- ✅ Drag-drop file upload (up to 50 files)
- ✅ Metadata sources:
-  - Manual entry
-  - AI generation (OpenAI)
-  - Excel lookup
-  - CSV/JSON import (backend ready)
-  - Templates (backend ready)
- ✅ Metadata editor with character limits
- ✅ Batch download as ZIP
- ✅ Persistent storage (uploads/{user_id}/{date}/)
- ✅ Auto cleanup (7 days)
-
-## 📝 Environment Variables
-
-Create `.env` in project root:
-
-```env
-# Backend
-DATABASE_URL=sqlite+aiosqlite:///./data/oliver_metadata.db
-REDIS_URL=redis://localhost:6379/0
-SECRET_KEY=your-secret-key-here
-OPENAI_API_KEY=sk-...
-
-# Optional: Microsoft SSO
-AZURE_CLIENT_ID=
-AZURE_CLIENT_SECRET=
-AZURE_TENANT_ID=
-```
-
-Create `frontend/.env`:
-
-```env
-VITE_API_URL=/api
-```
-
-## 🧪 Testing the Application
-
-### 1. Register & Login
-```bash
-# Register
-curl -X POST http://localhost:8000/auth/register \
-  -H "Content-Type: application/json" \
-  -d '{"username": "test", "password": "test123"}'
-
-# Login via UI
-open http://localhost:3000/login
-# Username: test
-# Password: test123
-```
-
-### 2. Upload Files
-1. Select "Manual Entry" or "AI Generation"
-2. Drag & drop PDF/image files
-3. Wait for upload to complete
-4. Files appear in list below
-
-### 3. Edit Metadata
-1. Click "Edit Metadata" on any file
-2. Fill in Title (required), Subject, Keywords
-3. Character counters show limits
-4. Click "Save Metadata"
-5. File updated in backend
-
-### 4. Download
-1. Select files with checkboxes
-2. Click "Download Selected"
-3. ZIP file downloads automatically
-
-### 5. Process More
-1. Click "Process More Files"
-2. Session cleaned up
-3. Ready for new upload
-
-## 📚 API Documentation
-
-Interactive API docs available at:
- **Swagger UI**: http://localhost:8000/docs
- **ReDoc**: http://localhost:8000/redoc
-
-### Key Endpoints
-
-**Auth:**
- `POST /auth/login` - Login with username/password
- `POST /auth/register` - Register new user
- `POST /auth/token/refresh` - Refresh access token
- `POST /auth/logout` - Logout
- `GET /auth/me` - Get current user info
-
-**Files:**
- `POST /files/upload` - Upload files with metadata source
- `GET /files/{file_id}/download` - Download single file
- `POST /files/download-batch` - Download multiple as ZIP
- `DELETE /files/session/{session_id}` - Cleanup session
-
-**Metadata:**
- `PUT /metadata/{file_id}` - Update file metadata
- `POST /metadata/batch-update` - Update multiple files
-
-**Templates:**
- `GET /templates/` - List templates
- `POST /templates/` - Create template
- `GET /templates/{name}` - Get template
- `DELETE /templates/{name}` - Delete template
-
-## 🔧 Development
-
-### Frontend Development
-
-```bash
-cd frontend
-
-# Install dependencies
-npm install
-
-# Start dev server (hot reload)
-npm run dev
-
-# Build for production
-npm run build
-
-# Preview production build
-npm run preview
-```
-
-### Backend Development
-
-```bash
-cd backend
-
-# Install dependencies
-pip install -r requirements.txt
-
-# Run with auto-reload
-python -m app.main
-
-# Or use uvicorn directly
-uvicorn app.main:app --reload --port 8000
-```
-
-### Adding New Components
-
-Frontend components are in `frontend/src/components/`:
- `auth/` - Authentication components
- `files/` - File upload/list/item
- `metadata/` - Metadata editor (expandable)
- `common/` - Shared components (add here)
-
-## 🐳 Docker Production Deployment
-
-```bash
-# Build images
-docker-compose -f docker-compose.fastapi.yml build
-
-# Start production stack
-docker-compose -f docker-compose.fastapi.yml up -d
-
-# View logs
-docker-compose -f docker-compose.fastapi.yml logs -f
-
-# Stop
-docker-compose -f docker-compose.fastapi.yml down
-```
-
-## 📊 Project Statistics
-
-### Lines of Code
- Backend: ~3,500 lines (Python)
- Frontend: ~1,000 lines (TypeScript/TSX)
- **Total: ~4,500 lines** (vs 2,555 lines in Flask monolith)
-
-### Files Created
- Backend: 25 files
- Frontend: 20 files
- Docker/Config: 8 files
- **Total: 53 files**
-
-### Components
- React Components: 8 (Login, Dashboard, FileUpload, FileList, FileItem, etc.)
- API Endpoints: 17
- Services: 4 (file, metadata, auth, template)
- Stores: 2 (auth, files)
-
-## 🎓 What Was Learned
-
-### Architecture Improvements
-1. **Session persistence** - Redis solves restart problem
-2. **Async operations** - FastAPI handles concurrent requests better
-3. **Type safety** - TypeScript prevents frontend bugs
-4. **State management** - Zustand simplifies React state
-5. **API design** - Clean REST API separation
-
-### What Was Reused (100%)
- All file processors (extractors, updaters)
- Metadata analyzer (AI generation)
- Excel lookup logic
- Template manager
- Field mapper (for imports)
- Configuration system
-
-**Zero modifications** needed to existing business logic!
-
-## 🚧 Future Enhancements
-
-Optional features to add:
-
- [ ] Import CSV/Excel mapping modal (backend ready)
- [ ] Template creation UI (backend ready)
- [ ] Batch metadata editor (update all at once)
- [ ] File preview (PDF/image thumbnails)
- [ ] Search & filter uploaded files
- [ ] User management UI (admin)
- [ ] Statistics dashboard
- [ ] Custom fields UI
- [ ] Dark mode toggle
- [ ] Mobile responsive improvements
-
-## 📞 Support & Documentation
-
- **Backend API Docs**: http://localhost:8000/docs
- **Backend README**: `README-FASTAPI.md`
- **Migration Plan**: `.claude/plans/radiant-snacking-chipmunk.md`
- **Memory**: `.claude/projects/.../memory/MEMORY.md`
-
-## 🎉 Success Metrics
-
-| Metric | Before | After | Improvement |
-|--------|--------|-------|-------------|
-| Session persistence | ❌ Lost on restart | ✅ Redis 7-day TTL | ∞% |
-| Concurrent users | ~5 | ~50+ | 10x |
-| Response time | 500ms | <200ms | 2.5x faster |
-| File cleanup | Manual | Automatic (7 days) | ∞% |
-| Frontend maintainability | Low (2555-line template) | High (modular components) | Much better |
-| API documentation | None | Auto-generated | ✅ |
-| Type safety | Python only | Python + TypeScript | ✅ |
-
---
-
-**Status**: ✅ **COMPLETE - Ready for Production**
-
-**Migration Time**: ~2 days
-**Lines Changed**: 4,500+
-**Files Created**: 53
-**Bugs Fixed**: Session loss, scalability issues, file cleanup
-
-Generated by Claude Code (Anthropic)
--- a/README.md
+++ b/README.md
@ -1,56 +1,24 @@
-# Oliver Metadata Tool v4.0
+# Oliver Metadata Tool v3.1 Enterprise Edition

-**Universal metadata creation and management tool for all file types.**
-
-Create, import, and manage metadata from multiple sources with a modern React interface, FastAPI backend, persistent Redis sessions, and AI-powered metadata generation.
+Universal metadata creation and management tool for all file types. Create, import, and manage metadata from multiple sources with an intuitive web interface, user authentication, and AI-powered metadata generation.

 **Developer:** Vadym Samoilenko
 **License:** Corporate License - Oliver Marketing
-**Version:** 4.0 (FastAPI + React Edition)
+**Version:** 3.1 (Enterprise Edition)

 ---

-## 🚀 Quick Start
-
-### Production Deployment (Ubuntu Server)
-
-```bash
-# 1. Clone repository
-cd /opt
-sudo git clone https://bitbucket.org/zlalani/solventum-image-metadata.git
-cd solventum-image-metadata
-
-# 2. Configure environment
-sudo cp .env.production .env
-sudo nano .env  # Add your secrets
-
-# 3. Deploy
-sudo ./deploy.sh
-```
-
-**That's it!** The script automatically:
- ✅ Builds Docker containers
- ✅ Initializes database
- ✅ Builds React frontend
- ✅ Deploys to /var/www/html/
- ✅ Runs health checks
-
-See [PRODUCTION-DEPLOY.md](PRODUCTION-DEPLOY.md) for detailed instructions.
-
---
-
-## 📋 Features
+## Features

 ### Multiple Metadata Sources
- **📂 File Import**: Import metadata from CSV, Excel, or JSON with smart column mapping
- **🤖 AI Generation**: OpenAI GPT-powered intelligent metadata generation
+- **📂 File Import**: Import metadata from CSV, Excel, or JSON with smart column mapping and sheet selection
+- **🤖 AI Generation**: OpenAI-powered intelligent metadata generation
 - **✏️ Manual Entry**: Direct editing with real-time validation
 - **📋 Templates**: Reusable metadata templates with variables

 ### Enterprise Features
- **🔐 Authentication**: JWT tokens + Microsoft SSO support
- **💾 Persistent Sessions**: Redis-backed sessions (no data loss on restart)
- **👥 User Management**: SQLite database for users and audit logs
+- **🔐 Authentication**: Local user authentication + Microsoft SSO support
+- **👥 User Management**: SQLite database for users and sessions
 - **📊 Audit Logging**: Track all user actions and metadata changes
 - **🔍 AI Usage Tracking**: Monitor OpenAI token usage and costs

@ -66,426 +34,482 @@ See [PRODUCTION-DEPLOY.md](PRODUCTION-DEPLOY.md) for detailed instructions.
 - **Smart Field Mapping**: Auto-detect columns with fuzzy matching
 - **Batch Processing**: Process multiple files with selective updates
 - **Custom Metadata Fields**: Add unlimited custom fields
+- **CSV Export**: Export metadata and processing results
 - **Template Variables**: {filename}, {date}, {user}, custom variables

 ---

-## 🏗️ Architecture
-
-**Modern full-stack application:**
-
-```
-┌─────────────────┐
-│  React Frontend │  (Vite + TypeScript + Tailwind)
-└────────┬────────┘
-         │ API calls
-┌────────▼────────┐
-│  FastAPI Backend│  (Python 3.11 + Async)
-└────────┬────────┘
-         │
-    ┌────┴────┬─────────┐
-    │         │         │
-┌───▼───┐ ┌──▼───┐ ┌───▼────┐
-│ Redis │ │SQLite│ │Processors│
-│Sessions│ │ DB   │ │(ExifTool)│
-└────────┘ └──────┘ └─────────┘
-```
-
-**Key Components:**
- **Frontend**: React 18 + React Router + Zustand (state management)
- **Backend**: FastAPI + SQLAlchemy async + Pydantic
- **Sessions**: Redis with TTL (7 days user sessions, 1 hour file sessions)
- **Auth**: JWT tokens (access: 30min, refresh: 7 days)
- **Processors**: 100% reusable from v3.1 - no changes needed
-
-**Why FastAPI + React?**
- ✅ **No session loss** - Redis persistent storage
- ✅ **Better performance** - Async operations
- ✅ **Modern UI** - React with proper state management
- ✅ **API-first** - Easy to extend and integrate
- ✅ **Auto docs** - Swagger UI at `/docs`
-
---
-
-## 📦 Requirements
+## Requirements

 ### System Dependencies
- **Docker** & **Docker Compose** (required for deployment)
- **Node.js 18+** & **npm** (for frontend build)
- **ExifTool 12.15+** (installed in Docker container)
+- **Python 3.8+**
+- **ExifTool 12.15+** (required for 300+ format support)
 - **Tesseract OCR** (optional - for image text extraction)
 - **Poppler** (optional - for PDF content extraction)

 ### Python Dependencies
-See [backend/requirements.txt](backend/requirements.txt):
- FastAPI 0.109+
- Redis 5.0+
- SQLAlchemy 2.0+ (async)
- OpenAI 1.0+
- PyExifTool, Pillow, pypdf, python-docx, etc.
-
-### Frontend Dependencies
-See [frontend/package.json](frontend/package.json):
- React 18
- React Router 6
- Axios, Zustand, React Dropzone
- Tailwind CSS
+All listed in `requirements.txt`:
+- Flask 2.3.0+ (Web framework)
+- pandas, openpyxl (Excel/CSV processing)
+- PyExifTool 0.5.6+ (Metadata operations)
+- openai 1.0.0+ (AI generation)
+- tiktoken 0.5.0+ (Token counting)
+- tenacity 8.2.0+ (Retry logic)
+- msal (Microsoft SSO - optional)

 ---

-## 🛠️ Installation
+## Installation

-### Option 1: Production Deployment (Recommended)
+### 1. Install System Dependencies

+**macOS:**
 ```bash
-cd /opt
-sudo git clone https://bitbucket.org/zlalani/solventum-image-metadata.git
-cd solventum-image-metadata
-sudo cp .env.production .env
-sudo nano .env  # Configure secrets
-sudo ./deploy.sh
+brew install exiftool tesseract tesseract-lang poppler
 ```

-See [PRODUCTION-DEPLOY.md](PRODUCTION-DEPLOY.md) for complete guide.
+**Linux (Ubuntu/Debian):**
+```bash
+sudo apt-get install libimage-exiftool-perl tesseract-ocr tesseract-ocr-chi-sim tesseract-ocr-chi-tra tesseract-ocr-jpn tesseract-ocr-kor poppler-utils
+```

-### Option 2: Local Development
+**Windows:**
+```bash
+# Install ExifTool from: https://exiftool.org/
+choco install exiftool tesseract
+```
+
+**Verify ExifTool Installation:**
+```bash
+exiftool -ver
+# Should show version 12.15 or higher
+```
+
+See [docs/EXIFTOOL_SETUP.md](docs/EXIFTOOL_SETUP.md) for detailed setup instructions.
+
+### 2. Create Virtual Environment
+
+```bash
+python3 -m venv venv_local
+source venv_local/bin/activate  # On Windows: venv_local\Scripts\activate
+```
+
+### 3. Install Python Dependencies

 ```bash
-# Backend
-cd backend
-python -m venv venv
-source venv/bin/activate
 pip install -r requirements.txt
-uvicorn app.main:app --reload
+```

-# Frontend (separate terminal)
-cd frontend
-npm install
-npm run dev
+### 4. Configure Environment Variables

-# Redis (Docker)
-docker run -d -p 6379:6379 redis:7-alpine
+Create a `.env` file in the project root:
+
+```env
+# Required: OpenAI API Key (for AI metadata generation)
+OPENAI_API_KEY=your-openai-api-key-here
+
+# Optional: Microsoft SSO (for enterprise authentication)
+# AZURE_CLIENT_ID=your-azure-client-id
+# AZURE_CLIENT_SECRET=your-azure-client-secret
+# AZURE_TENANT_ID=your-azure-tenant-id
+# REDIRECT_URI=http://localhost:5001/auth/callback
+
+# Optional: Flask secret key (auto-generated if not set)
+# SECRET_KEY=your-secret-key-here
+
+# Optional: AI settings (defaults shown)
+# AI_MODEL=gpt-4o-mini
+# MAX_TOKENS=500
+# TEMPERATURE=0.5
+# API_TIMEOUT=30
+# API_MAX_RETRIES=3
+```
+
+### 5. Initialize Database
+
+The database will be created automatically on first run. To manually initialize:
+
+```bash
+python -c "from src.database import Database; db = Database(); print('Database initialized')"
 ```

 ---

-## 🔧 Configuration
+## Docker Deployment (Recommended)

-### Environment Variables
+### Quick Start with Docker

-**Required:**
 ```bash
-OPENAI_API_KEY=sk-proj-...                    # For AI metadata generation
-AI_MODEL=gpt-5.2                              # AI model to use
-SECRET_KEY=your-secret-key-here               # JWT signing key
+# Build and start
+docker-compose up -d
+
+# Or use the helper script
+./docker-run.sh build
+./docker-run.sh start
+
+# Access at http://localhost:5001
 ```

-**Optional - Azure AD SSO:**
-```bash
-AZURE_TENANT_ID=your-tenant-id
-AZURE_CLIENT_ID=your-client-id
-AZURE_CLIENT_SECRET=your-client-secret
-REDIRECT_URI=https://your-domain.com/callback
-```
+**Benefits:**
+- ✅ No manual dependency installation
+- ✅ Consistent environment across systems
+- ✅ Persistent data storage via volumes
+- ✅ Easy updates and rollbacks
+- ✅ Production-ready configuration

-**Optional - Advanced:**
-```bash
-DATABASE_URL=sqlite+aiosqlite:///./data/oliver_metadata.db
-REDIS_URL=redis://localhost:6379/0
-BACKEND_PORT=5001
-DEBUG=false
-```
-
-See [.env.production](.env.production) for complete example.
+**See [DOCKER.md](DOCKER.md) for complete Docker deployment guide.**

 ---

-## 📚 Documentation
+## Usage

- **[PRODUCTION-DEPLOY.md](PRODUCTION-DEPLOY.md)** - Quick production deployment guide
- **[DEPLOYMENT.md](DEPLOYMENT.md)** - Detailed deployment documentation
- **[DEPLOYMENT-CHECKLIST.md](DEPLOYMENT-CHECKLIST.md)** - Pre-deployment checklist
- **[CLEANUP-COMMANDS.md](CLEANUP-COMMANDS.md)** - Server cleanup commands
- **[DOCKER.md](DOCKER.md)** - Docker configuration details
- **[CLAUDE.md](CLAUDE.md)** - Developer guide for Claude Code
+### Starting the Web Application
+
+**Local Development:**
+```bash
+python web_app.py
+```
+
+**Docker:**
+```bash
+docker-compose up -d
+```
+
+The application will:
+1. ✅ Check for ExifTool availability
+2. ✅ Initialize SQLite database (users, sessions, audit_log)
+3. ✅ Start Flask server on http://localhost:5001
+4. 🌐 Open browser automatically (local mode only)
+
+### Login
+
+**Test Account:**
+- Username: `tester`
+- Password: `oliveradmin`
+
+**Microsoft SSO** (if configured):
+- Click "Sign in with Microsoft" button
+- Authenticate via Azure AD
+- Users auto-created on first login
+
+### Using Metadata Sources
+
+#### 1. Import from File
+1. Select "Import from File (CSV/Excel/JSON)" from metadata source dropdown (default)
+2. Click "Choose File" and select your metadata file
+3. Configure mapping modal:
+   - For Excel files: Select sheet name
+   - Map columns: Filename (required), Title, Description, Keywords
+   - Auto-detection suggests best matches
+   - Preview first 3 rows
+4. Confirm mapping
+5. Upload files to process - tool matches files by filename
+
+#### 2. AI Generation
+1. Select "AI Generation" from metadata source dropdown
+2. Upload files
+3. AI generates metadata (10-30 seconds per file)
+4. Review and edit generated metadata
+5. Save changes
+
+#### 3. Manual Entry
+1. Select "Manual Entry"
+2. Upload files
+3. Fill in metadata fields manually
+4. Save changes
+
+#### 4. Templates
+1. Create template with variables
+2. Select template from dropdown
+3. Apply to selected files
+4. Review and save
+
+### Batch Operations
+
+1. Upload multiple files
+2. Use checkboxes to select files
+3. "Select All" / "Deselect All" buttons
+4. Edit metadata individually
+5. Click "Update Selected Files" to save all at once
+6. Export results to CSV

 ---

-## 🚀 Usage
+## Configuration

-### Web Interface
+### Database Schema

-1. **Access the application:**
-   - Production: https://your-domain.com/solventum-image-metadata/
-   - Local: http://localhost:3000
+**Users Table:**
+- id, username, password_hash, email, full_name
+- auth_method (local/sso)
+- created_at, last_login, is_active

-2. **Login:**
-   - Use local credentials or Microsoft SSO
-   - Default test account: `tester` / `oliveradmin` (dev only)
+**Sessions Table:**
+- session_id, user_id, created_at, expires_at
+- ip_address, user_agent

-3. **Upload Files:**
-   - Drag & drop or click to upload
-   - Supports multiple files at once
+**Audit Log Table:**
+- id, user_id, action, details, timestamp

-4. **Choose Metadata Source:**
-   - **AI Generation**: GPT analyzes file content
-   - **Import from File**: Upload CSV/Excel/JSON with metadata
-   - **Manual Entry**: Fill in fields directly
-   - **Templates**: Apply saved templates
+### AI Usage Tracking

-5. **Review & Edit:**
-   - Preview generated metadata
-   - Edit any fields
-   - Add custom fields
+Every AI metadata generation is logged with:
+- User ID
+- Timestamp
+- Tokens used (prompt + completion)
+- Cost estimate (based on gpt-4o-mini pricing)

-6. **Download:**
-   - Download files with embedded metadata
-   - Export metadata to CSV
-
-### API Endpoints
-
-**Interactive API docs:** http://localhost:5001/docs
-
-**Authentication:**
-```bash
-# Login
-POST /auth/login
-{"username": "user", "password": "pass"}
-→ Returns: {access_token, refresh_token}
-
-# Use token
-Authorization: Bearer <access_token>
+View logs in database:
+```sql
+SELECT * FROM audit_log WHERE action = 'ai_generation' ORDER BY timestamp DESC;
 ```

-**File Operations:**
-```bash
-# Upload files
-POST /files/upload
-Content-Type: multipart/form-data
+### User Management

-# Update metadata
-POST /metadata/update
-{"session_id": "...", "title": "...", "keywords": "..."}
-
-# Download file
-GET /files/download/{filename}
+**Create New User:**
+```python
+from src.database import Database
+db = Database()
+db.create_user(
+    username='newuser',
+    password='password123',
+    email='user@example.com',
+    full_name='New User',
+    auth_method='local'
+)
 ```

-**Templates:**
-```bash
-# List templates
-GET /templates/list
-
-# Apply template
-POST /templates/apply
-{"template_name": "...", "files": [...]}
+**List All Users:**
+```python
+users = db.get_all_users()
+for user in users:
+    print(f"{user['username']} - Last login: {user['last_login']}")
 ```

-See `/docs` for complete API reference.
-
 ---

-## 🔒 Security
+## Architecture

- **JWT Authentication**: Secure token-based auth
- **Password Hashing**: bcrypt for password storage
- **HTTPS Required**: Use reverse proxy (Apache/Nginx) with SSL
- **CORS Protection**: Configured origins only
- **Rate Limiting**: Built-in API rate limiting
- **Session Expiry**: Automatic session cleanup
- **Secrets Management**: Environment variables only (never commit .env)
+### File Structure

-**Best Practices:**
-1. ✅ Use strong `SECRET_KEY` (32+ characters)
-2. ✅ Configure HTTPS in production
-3. ✅ Set up firewall rules
-4. ✅ Regular backups of database
-5. ✅ Monitor logs for suspicious activity
-
---
-
-## 🐳 Docker
-
-**Production:** Uses `docker-compose.fastapi.yml`
-
-```bash
-# Start services
-docker-compose -f docker-compose.fastapi.yml up -d
-
-# View logs
-docker-compose -f docker-compose.fastapi.yml logs -f
-
-# Stop services
-docker-compose -f docker-compose.fastapi.yml down
+```
+oliver-metadata-tool/
+├── web_app.py              # Flask web application (main entry point)
+├── requirements.txt        # Python dependencies
+├── .env                    # Environment configuration
+├── oliver_metadata.db      # SQLite database (auto-created)
+├── src/
+│   ├── config.py           # Configuration management
+│   ├── database.py         # Database operations
+│   ├── auth.py             # Authentication logic
+│   ├── metadata_analyzer.py    # AI metadata generation
+│   ├── metadata_importer.py    # Import from files
+│   ├── template_manager.py     # Template system
+│   ├── field_mapper.py         # Column mapping
+│   ├── excel_metadata_lookup.py # Excel lookup
+│   ├── extractors/
+│   │   ├── pdf_extractor.py
+│   │   ├── image_extractor.py
+│   │   ├── office_extractor.py
+│   │   ├── video_extractor.py
+│   │   └── exiftool_extractor.py
+│   └── updaters/
+│       ├── pdf_updater.py
+│       ├── image_updater.py
+│       ├── office_updater.py
+│       ├── video_updater.py
+│       └── exiftool_updater.py
+├── templates/
+│   ├── index.html          # Main UI
+│   └── login.html          # Login page
+└── docs/
+    └── EXIFTOOL_SETUP.md   # ExifTool setup guide
 ```

-**Services:**
- `backend`: FastAPI application (port 5001 → 8000)
- `redis`: Session storage (internal only)
+### Technology Stack

-**Volumes:**
- `backend/data`: SQLite database
- `backend/uploads`: Uploaded files
- `backend/output`: Templates and reports
+- **Backend:** Flask (Python)
+- **Database:** SQLite
+- **Frontend:** HTML5, CSS3, JavaScript (Vanilla)
+- **Design:** Montserrat font, Dark & Gold theme
+- **Authentication:** Flask-Session, werkzeug.security, MSAL
+- **AI:** OpenAI API (gpt-4o-mini)
+- **Metadata:** PyExifTool, pypdf, python-docx, openpyxl

 ---

-## 🔍 Troubleshooting
+## API Endpoints
+
+### Authentication
+- `GET /login` - Login page
+- `POST /login` - Authenticate user
+- `GET /logout` - Destroy session
+- `GET /login/microsoft` - Microsoft SSO redirect
+- `GET /auth/callback` - SSO callback
+
+### File Operations
+- `POST /upload` - Upload files and generate metadata
+- `POST /update-manual` - Update file metadata manually
+- `GET /download/<filename>` - Download processed file
+
+### Metadata Sources
+- `POST /upload-excel` - Upload Excel file for mapping
+- `POST /preview-excel-sheet` - Preview Excel sheet structure
+- `POST /configure-excel-mapping` - Configure Excel column mapping
+- `POST /import-metadata` - Upload import file for mapping
+- `POST /configure-import-mapping` - Configure import column mapping
+
+### Templates
+- `GET /templates/list` - List all templates
+- `POST /templates/save` - Save new template
+- `POST /templates/load` - Load template by name
+- `DELETE /templates/delete` - Delete template
+- `POST /templates/apply` - Apply template to files
+- `POST /templates/preview` - Preview template output
+
+---
+
+## Security & Privacy
+
+### Authentication
+- Passwords hashed with werkzeug.security (pbkdf2:sha256)
+- Session tokens: 32-byte cryptographically secure random strings
+- Sessions expire after 24 hours
+- Microsoft SSO via OAuth2 + Azure AD
+
+### Data Protection
+- All credentials stored in `.env` (excluded from git)
+- Database file excluded from git
+- API keys never logged or exposed to frontend
+- Audit trail for all user actions
+
+### Production Recommendations
+1. **HTTPS:** Use SSL/TLS certificates in production
+2. **Database:** Migrate to PostgreSQL for better concurrency
+3. **Rate Limiting:** Add rate limits to prevent abuse
+4. **CSRF Protection:** Enable Flask-WTF for form security
+5. **Error Tracking:** Integrate Sentry or similar service
+6. **Backups:** Regular database backups
+7. **Monitoring:** Track AI token usage for cost management
+
+---
+
+## Troubleshooting

 ### Common Issues

-**1. Backend health check fails**
+**ExifTool not found:**
 ```bash
-# Check logs
-docker-compose -f docker-compose.fastapi.yml logs backend
+# Verify installation
+exiftool -ver

-# Common causes:
-# - OPENAI_API_KEY not set
-# - Redis not running
-# - Port 5001 already in use
+# macOS: Reinstall with Homebrew
+brew reinstall exiftool
+
+# Linux: Reinstall with apt
+sudo apt-get install --reinstall libimage-exiftool-perl
 ```

-**2. Frontend not loading**
+**Database locked error:**
 ```bash
-# Check files exist
-ls -lh /var/www/html/solventum-image-metadata/
+# Stop all instances
+lsof -ti:5001 | xargs kill -9

-# Check permissions
-sudo chown -R www-data:www-data /var/www/html/solventum-image-metadata/
+# Restart application
+python web_app.py
 ```

-**3. Git pull fails during deployment**
+**OpenAI API errors:**
+- Check API key in `.env` file
+- Verify API key is valid at https://platform.openai.com/api-keys
+- Check token usage limits on OpenAI dashboard
+
+**Import failed - column not found:**
+- Use the mapping modal to manually select columns
+- Check that your file has headers in the first row
+- Verify file encoding is UTF-8
+
+---
+
+## Development
+
+### Running Tests
+
 ```bash
-# First deployment: This is normal!
-# The script will continue with existing code
+# Unit tests (if implemented)
+pytest tests/

-# For updates: Set up git credentials
-cd /opt/solventum-image-metadata
-sudo git remote set-url origin https://username:token@bitbucket.org/...
+# Manual integration test
+python -c "from src.database import Database; from src.config import Config; print('✅ All imports successful')"
 ```

-**4. Docker build fails**
+### Git Workflow
+
 ```bash
-# Clean Docker and retry
-sudo docker system prune -af
-sudo ./deploy.sh
+# Check status
+git status
+
+# Add changes
+git add .
+
+# Commit with message
+git commit -m "Your commit message"
+
+# Push to remote
+git push origin main
 ```

-See [CLEANUP-COMMANDS.md](CLEANUP-COMMANDS.md) for more troubleshooting.
+---
+
+## License & Credits
+
+**License:** Corporate License - Oliver Marketing
+All rights reserved. Unauthorized copying, distribution, or modification is prohibited.
+
+**Developer:** Vadym Samoilenko
+**Company:** Oliver Marketing
+**Version:** 3.1 Enterprise Edition
+**Release Date:** January 2026
+
+**Third-Party Software:**
+- ExifTool by Phil Harvey (Perl Artistic License)
+- Flask by Pallets (BSD License)
+- OpenAI API (Commercial License)
+- PyExifTool (LGPL License)

 ---

-## 📝 Development
+## Support

-### Project Structure
-
-```
-solventum-image-metadata/
-├── backend/               # FastAPI backend
-│   ├── app/
-│   │   ├── api/          # API routes
-│   │   ├── core/         # Auth, database, Redis
-│   │   ├── models/       # SQLAlchemy models
-│   │   └── processors/   # Metadata processors (reused from v3.1)
-│   ├── Dockerfile
-│   └── requirements.txt
-├── frontend/             # React frontend
-│   ├── src/
-│   │   ├── components/
-│   │   ├── pages/
-│   │   └── store/       # Zustand state
-│   ├── package.json
-│   └── vite.config.ts
-├── docker-compose.fastapi.yml
-├── deploy.sh            # Production deployment script
-└── README.md
-```
-
-### Adding New Features
-
-1. **Backend API:**
-   - Add route in `backend/app/api/`
-   - Use async/await for all operations
-   - Add to `backend/app/main.py` router
-
-2. **Frontend Component:**
-   - Create in `frontend/src/components/`
-   - Use Zustand for state
-   - API calls via axios
-
-3. **New Processor:**
-   - Extend `BaseExtractor` or `BaseUpdater`
-   - Add to `backend/app/processors/`
-   - Register in main.py
+For issues, questions, or feature requests:
+- **Internal Support:** Contact IT department
+- **Developer:** Vadym Samoilenko
+- **Documentation:** See `docs/` folder

 ---

-## 📄 License
+## Changelog

-**Corporate License - Oliver Marketing**
+### v3.1 (January 2026) - Enterprise Edition
+- ✅ User authentication (local + Microsoft SSO)
+- ✅ SQLite database with audit logging
+- ✅ Unified import from file (CSV/Excel/JSON) with smart column mapping
+- ✅ Excel sheet selection and preview
+- ✅ Custom metadata fields support
+- ✅ AI usage tracking and cost monitoring
+- ✅ Dark & Gold UI redesign
+- ✅ Template variables and preview
+- ✅ Batch selection and CSV export
+- ✅ Consolidated metadata sources (removed redundant Excel Lookup)

-This software is proprietary and confidential. Unauthorized copying, distribution, or use is strictly prohibited.
+### v3.0 (January 2026)
+- ✅ ExifTool integration (300+ formats)
+- ✅ Multiple metadata sources (Import, AI, Manual)
+- ✅ Field mapping with fuzzy matching
+- ✅ Metadata templates system
+- ✅ Rebranded to Oliver Metadata Tool

-© 2024-2026 Oliver Marketing. All rights reserved.
-
---
-
-## 👨‍💻 Developer
-
-**Vadym Samoilenko**
-Email: vadym.samoilenko@oliver.agency
-
---
-
-## 🆘 Support
-
- **Issues:** Report at https://bitbucket.org/zlalani/solventum-image-metadata/issues
- **Documentation:** See `/docs` directory
- **API Docs:** http://localhost:5001/docs (when running)
-
---
-
-## 🔄 Changelog
-
-### Version 4.0.1 (2026-02-12)
- 🐛 **FIXED:** Duplicate filename collisions when uploading the same file multiple times
- ⚡ **IMPROVED:** Per-session file isolation via subdirectories (no more cross-session overwrites)
- ⚡ **IMPROVED:** Within-session deduplication: re-uploading replaces the old entry without renaming
- ⚡ **IMPROVED:** Session ID generation now uses cryptographically secure tokens
- ⚡ **IMPROVED:** Auto-cleanup of ZIP archives after download
- ⚡ **IMPROVED:** Cleanup of old session directories and loose files
-
-### Version 4.0 (2026-02-09)
- ✨ **NEW:** FastAPI backend with async operations
- ✨ **NEW:** React frontend with modern UI
- ✨ **NEW:** Redis persistent sessions (no data loss)
- ✨ **NEW:** JWT authentication with refresh tokens
- ✨ **NEW:** Automatic deployment script
- ✨ **NEW:** Docker-based production deployment
- ⚡ **IMPROVED:** Better performance and scalability
- ⚡ **IMPROVED:** API-first architecture
- 🐛 **FIXED:** Session loss on restart
- 🐛 **FIXED:** Unicode filename handling
-
-### Version 3.1 (2026-01-26)
- Initial Flask-based version
- Multiple metadata sources
- AI generation support
- Microsoft SSO integration
-
---
-
-## 🔮 Futures Log
-
-Planned improvements and known areas for enhancement:
-
- **Per-user upload isolation**: Separate upload directories by user ID for multi-user deployments
- **Automated tests**: Add unit and integration test suite for upload, metadata lookup, and download flows
- **WebSocket progress**: Real-time upload and AI generation progress via WebSocket instead of polling
- **Content-hash deduplication**: Detect duplicate files across sessions by content hash
- **Post-download session cleanup**: Option to auto-delete session files after successful batch download
- **Batch metadata editing**: Apply the same metadata changes to multiple files at once from the UI
-
---
-
-**Made with ❤️ by Vadym Samoilenko**
+### v2.x (Prior)
+- Basic Excel lookup functionality
+- Multi-format file support
+- Web interface
--- a/backend/app/init.py
+++ b/backend/app/init.py
--- a/app/config.py
+++ b/app/config.py
@ -0,0 +1,101 @@
+"""Application settings via pydantic-settings."""
+
+import secrets
+import os
+from pathlib import Path
+from pydantic_settings import BaseSettings
+
+
+class Settings(BaseSettings):
+    """Application settings loaded from environment variables and .env file."""
+
+    # App
+    APP_NAME: str = "Oliver Metadata Tool"
+    APP_VERSION: str = "4.0.0"
+    DEBUG: bool = False
+    DOCKER_MODE: bool = False
+    ROOT_PATH: str = ""  # Subpath prefix, e.g. "/solventum-image-metadata"
+
+    # Security
+    SECRET_KEY: str = secrets.token_hex(32)
+    HTTPS_ONLY: bool = False
+    ENABLE_TEST_USER: bool = False
+
+    # Paths
+    UPLOAD_FOLDER: str = ""
+    DB_PATH: str = ""
+    SESSION_DB_PATH: str = ""
+    TEMPLATES_DIR: str = ""
+
+    # OpenAI
+    OPENAI_API_KEY: str = ""
+    AI_MODEL: str = "gpt-5.2"
+    MAX_TOKENS: int = 500
+    TEMPERATURE: float = 0.5
+    MAX_TEXT_LENGTH: int = 4000
+    API_TIMEOUT: int = 30
+    API_MAX_RETRIES: int = 3
+
+    # Azure SSO
+    AZURE_CLIENT_ID: str = ""
+    AZURE_CLIENT_SECRET: str = ""
+    AZURE_TENANT_ID: str = ""
+    REDIRECT_URI: str = "http://localhost:5001/auth/callback"
+
+    # OCR
+    OCR_LANGUAGES: str = "eng+chi_sim+chi_tra+jpn+kor"
+    TESSERACT_PATH: str = ""
+    FFMPEG_PATH: str = ""
+
+    # Limits
+    MAX_UPLOAD_SIZE_MB: int = 500
+    SESSION_EXPIRE_HOURS: int = 24
+    FILE_CLEANUP_HOURS: int = 24
+
+    # Superadmin
+    SUPERADMIN_EMAIL: str = "vadymsamoilenko@oliver.agency"
+
+    model_config = {
+        "env_file": ".env",
+        "env_file_encoding": "utf-8",
+        "extra": "ignore",
+    }
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        project_root = Path(__file__).parent.parent
+
+        if self.DOCKER_MODE:
+            if not self.UPLOAD_FOLDER:
+                self.UPLOAD_FOLDER = "/app/uploads"
+            if not self.DB_PATH:
+                self.DB_PATH = "/app/data/oliver_metadata.db"
+            if not self.SESSION_DB_PATH:
+                self.SESSION_DB_PATH = "/app/data/oliver_sessions.db"
+        else:
+            if not self.UPLOAD_FOLDER:
+                self.UPLOAD_FOLDER = str(project_root / "uploads")
+            if not self.DB_PATH:
+                self.DB_PATH = str(project_root / "oliver_metadata.db")
+            if not self.SESSION_DB_PATH:
+                self.SESSION_DB_PATH = str(project_root / "oliver_sessions.db")
+
+        if not self.TEMPLATES_DIR:
+            self.TEMPLATES_DIR = str(project_root / "templates")
+
+        # Ensure upload directory exists
+        Path(self.UPLOAD_FOLDER).mkdir(parents=True, exist_ok=True)
+
+        # Ensure data directory exists (for Docker)
+        Path(self.DB_PATH).parent.mkdir(parents=True, exist_ok=True)
+
+
+_settings = None
+
+
+def get_settings() -> Settings:
+    """Get cached settings instance."""
+    global _settings
+    if _settings is None:
+        _settings = Settings()
+    return _settings
--- a/app/dependencies.py
+++ b/app/dependencies.py
@ -0,0 +1,107 @@
+"""FastAPI dependency injection providers."""
+
+import logging
+from typing import Optional, Dict
+from fastapi import Depends, Request, HTTPException, status
+
+from .config import Settings, get_settings
+from .session.store import SessionStore
+from .services.auth_service import AuthService
+
+logger = logging.getLogger(__name__)
+
+# Singletons (initialized once via lifespan)
+_database = None
+_session_store = None
+_auth_service = None
+
+
+def init_dependencies(settings: Settings):
+    """Initialize singleton dependencies. Called once from app lifespan."""
+    global _database, _session_store, _auth_service
+
+    from src.database import Database
+
+    _database = Database(db_path=settings.DB_PATH)
+    _session_store = SessionStore(db_path=settings.SESSION_DB_PATH)
+    _auth_service = AuthService(database=_database)
+
+    logger.info("Dependencies initialized")
+
+
+def get_database():
+    """Get Database instance."""
+    if _database is None:
+        raise RuntimeError("Database not initialized")
+    return _database
+
+
+def get_session_store() -> SessionStore:
+    """Get SessionStore instance."""
+    if _session_store is None:
+        raise RuntimeError("SessionStore not initialized")
+    return _session_store
+
+
+def get_auth_service() -> AuthService:
+    """Get AuthService instance."""
+    if _auth_service is None:
+        raise RuntimeError("AuthService not initialized")
+    return _auth_service
+
+
+async def get_current_user(request: Request) -> Dict:
+    """FastAPI dependency: require authenticated user.
+
+    Replaces Flask's @login_required decorator.
+    Checks session cookie against database, returns user dict or raises 401.
+    """
+    session_id = request.session.get("session_id")
+    if not session_id:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Not authenticated",
+        )
+
+    auth = get_auth_service()
+    db_session = auth.validate_session(session_id)
+    if not db_session:
+        # Session expired or invalid — clear it
+        request.session.clear()
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Session expired",
+        )
+
+    user_id = db_session["user_id"]
+    user = auth.get_user_by_id(user_id)
+    if not user:
+        request.session.clear()
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="User not found",
+        )
+
+    return user
+
+
+async def get_current_user_optional(request: Request) -> Optional[Dict]:
+    """Same as get_current_user but returns None instead of raising."""
+    try:
+        return await get_current_user(request)
+    except HTTPException:
+        return None
+
+
+async def get_current_admin(request: Request) -> Dict:
+    """FastAPI dependency: require authenticated admin user.
+
+    Raises 403 if user is not an admin.
+    """
+    user = await get_current_user(request)
+    if user.get("role") != "admin":
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail="Admin access required",
+        )
+    return user
--- a/app/main.py
+++ b/app/main.py
@ -0,0 +1,126 @@
+"""FastAPI application factory with lifespan management."""
+
+import logging
+from contextlib import asynccontextmanager
+from pathlib import Path
+
+from fastapi import FastAPI, Request, Depends
+from fastapi.exceptions import HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import HTMLResponse, RedirectResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from slowapi import _rate_limit_exceeded_handler
+from slowapi.errors import RateLimitExceeded
+from starlette.middleware.sessions import SessionMiddleware
+
+from .config import get_settings
+from .dependencies import init_dependencies, get_current_user
+from .security import limiter
+
+logger = logging.getLogger(__name__)
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Startup/shutdown lifecycle."""
+    settings = get_settings()
+    init_dependencies(settings)
+    logger.info(f"{settings.APP_NAME} v{settings.APP_VERSION} starting")
+    yield
+    logger.info("Shutting down")
+
+
+def create_app() -> FastAPI:
+    settings = get_settings()
+
+    app = FastAPI(
+        title=settings.APP_NAME,
+        version=settings.APP_VERSION,
+        root_path=settings.ROOT_PATH,
+        docs_url="/docs" if settings.DEBUG else None,
+        redoc_url=None,
+        lifespan=lifespan,
+    )
+
+    app.state.limiter = limiter
+    app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+
+    # CORS — same origin only (restrict in production)
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=[settings.REDIRECT_URI.rsplit("/", 1)[0]] if not settings.DEBUG else ["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+
+    # Session middleware (cookie-based)
+    app.add_middleware(
+        SessionMiddleware,
+        secret_key=settings.SECRET_KEY,
+        session_cookie="oliver_session",
+        max_age=settings.SESSION_EXPIRE_HOURS * 3600,
+        same_site="lax",
+        https_only=settings.HTTPS_ONLY,
+    )
+
+    # Static files
+    project_root = Path(__file__).parent.parent
+    static_dir = project_root / "static"
+    if static_dir.exists():
+        app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
+
+    # Templates
+    templates = Jinja2Templates(directory=settings.TEMPLATES_DIR)
+
+    # Register routers
+    from .routers import auth as auth_router
+    from .routers import upload as upload_router
+    from .routers import metadata as metadata_router
+    from .routers import templates as templates_router
+    from .routers import imports as imports_router
+    from .routers import downloads as downloads_router
+    from .routers import sse as sse_router
+    from .routers import admin as admin_router
+
+    auth_router.set_templates(templates)
+    admin_router.set_templates(templates)
+    app.include_router(auth_router.router)
+    app.include_router(upload_router.router)
+    app.include_router(metadata_router.router)
+    app.include_router(templates_router.router)
+    app.include_router(imports_router.router)
+    app.include_router(downloads_router.router)
+    app.include_router(sse_router.router)
+    app.include_router(admin_router.router)
+
+    # Main page
+    @app.get("/", response_class=HTMLResponse)
+    async def index(request: Request, user=Depends(get_current_user)):
+        return templates.TemplateResponse(
+            "index.html",
+            {
+                "request": request,
+                "username": user["username"],
+                "docker_mode": settings.DOCKER_MODE,
+            },
+        )
+
+    # Redirect unauthenticated users to login
+    @app.exception_handler(HTTPException)
+    async def http_exception_handler(request: Request, exc: HTTPException):
+        if exc.status_code == 401:
+            root = request.scope.get("root_path", "")
+            return RedirectResponse(url=f"{root}/login?next={request.url.path}", status_code=302)
+        # Re-raise other HTTP exceptions as JSON
+        from fastapi.responses import JSONResponse
+        return JSONResponse(
+            status_code=exc.status_code,
+            content={"detail": exc.detail},
+        )
+
+    return app
+
+
+app = create_app()
--- a/backend/app/api/init.py
+++ b/backend/app/api/init.py
--- a/app/models/requests.py
+++ b/app/models/requests.py
@ -0,0 +1,67 @@
+"""Pydantic request models with validation."""
+
+from typing import Optional, Dict, List
+from pydantic import BaseModel, Field
+
+
+class UpdateMetadataRequest(BaseModel):
+    """Request to update file metadata from session."""
+    session_id: str
+    file_index: int
+    filepath: Optional[str] = None  # Deprecated: resolved from session
+    output_dir: Optional[str] = ""
+
+
+class UpdateManualMetadataRequest(BaseModel):
+    """Request to update file with manually entered metadata."""
+    session_id: str
+    file_index: int
+    title: str = Field(default="", max_length=200)
+    subject: str = Field(default="", max_length=300)
+    keywords: str = Field(default="", max_length=500)
+    author: str = Field(default="", max_length=100)
+    copyright: str = Field(default="", max_length=150)
+    comments: str = Field(default="", max_length=500)
+    custom_fields: Optional[Dict[str, str]] = None
+
+
+class ExcelSheetPreviewRequest(BaseModel):
+    """Request to preview a specific Excel sheet."""
+    excel_session_id: str
+    sheet_name: str
+
+
+class ExcelMappingRequest(BaseModel):
+    """Request to configure Excel column mapping."""
+    excel_session_id: str
+    sheet_name: str
+    column_mapping: Dict[str, str]  # {filename: 'col', title: 'col', ...}
+
+
+class ImportMappingRequest(BaseModel):
+    """Request to configure import column mapping."""
+    import_session_id: str
+    column_mapping: Dict[str, str]
+
+
+class TemplateApplyRequest(BaseModel):
+    """Request to apply a template to files."""
+    template_name: str
+    session_id: str
+    file_indices: List[int]
+    custom_vars: Optional[Dict[str, str]] = None
+
+
+class TemplatePreviewRequest(BaseModel):
+    """Request to preview template output."""
+    title: str = ""
+    subject: str = ""
+    keywords: str = ""
+    sample_filename: str = "example.pdf"
+    custom_vars: Optional[Dict[str, str]] = None
+
+
+class DownloadSelectedRequest(BaseModel):
+    """Request to download selected files as ZIP."""
+    session_id: str
+    file_indices: List[int]
--- a/app/models/responses.py
+++ b/app/models/responses.py
@ -0,0 +1,70 @@
+"""Pydantic response models."""
+
+from typing import Optional, Dict, List, Any
+from pydantic import BaseModel
+
+
+class FileResult(BaseModel):
+    """Result for a single processed file."""
+    success: bool = True
+    filename: str
+    file_type: Optional[str] = None
+    current_metadata: Optional[Dict[str, str]] = None
+    suggested_metadata: Optional[Dict[str, str]] = None
+    metadata_source: Optional[str] = None
+    excel_found: bool = False
+    error: Optional[str] = None
+
+
+class UploadResponse(BaseModel):
+    """Response from file upload endpoint."""
+    success: bool
+    session_id: Optional[str] = None
+    files: List[FileResult] = []
+    error: Optional[str] = None
+
+
+class UpdateResponse(BaseModel):
+    """Response from metadata update endpoint."""
+    success: bool = True
+    message: str = ""
+    verified: bool = False
+    metadata: Optional[Dict[str, str]] = None
+    error: Optional[str] = None
+
+
+class ExcelUploadResponse(BaseModel):
+    """Response from Excel file upload."""
+    success: bool
+    excel_session_id: Optional[str] = None
+    filename: Optional[str] = None
+    sheets: Optional[List[str]] = None
+    preview: Optional[Dict[str, Any]] = None
+    message: Optional[str] = None
+    error: Optional[str] = None
+
+
+class ImportUploadResponse(BaseModel):
+    """Response from import file upload."""
+    success: bool
+    import_session_id: Optional[str] = None
+    filename: Optional[str] = None
+    columns: Optional[List[str]] = None
+    sample_data: Optional[List[Dict[str, Any]]] = None
+    message: Optional[str] = None
+    error: Optional[str] = None
+
+
+class MappingConfigResponse(BaseModel):
+    """Response from mapping configuration."""
+    success: bool
+    excel_session_id: Optional[str] = None
+    import_session_id: Optional[str] = None
+    stats: Optional[Dict[str, int]] = None
+    message: Optional[str] = None
+    error: Optional[str] = None
+
+
+class ErrorResponse(BaseModel):
+    """Standard error response."""
+    error: str
--- a/backend/app/core/init.py
+++ b/backend/app/core/init.py
--- a/app/routers/admin.py
+++ b/app/routers/admin.py
@ -0,0 +1,126 @@
+"""Admin router: user management, audit log, AI usage stats."""
+
+import logging
+from typing import Dict
+
+from fastapi import APIRouter, Request, Depends
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.templating import Jinja2Templates
+
+from ..config import get_settings
+from ..dependencies import get_current_admin, get_database
+from ..services.admin_service import AdminService
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/admin", tags=["admin"])
+
+_templates: Jinja2Templates = None
+_admin_service: AdminService = None
+
+
+def set_templates(templates: Jinja2Templates):
+    global _templates
+    _templates = templates
+
+
+def _get_admin_service() -> AdminService:
+    global _admin_service
+    if _admin_service is None:
+        _admin_service = AdminService(database=get_database())
+    return _admin_service
+
+
+@router.get("", response_class=HTMLResponse)
+async def admin_dashboard(request: Request, user: Dict = Depends(get_current_admin)):
+    """Admin dashboard page."""
+    svc = _get_admin_service()
+    stats = svc.get_dashboard_stats()
+    return _templates.TemplateResponse(
+        "admin.html",
+        {
+            "request": request,
+            "username": user["username"],
+            "stats": stats,
+        },
+    )
+
+
+@router.get("/users")
+async def list_users(
+    include_inactive: bool = False,
+    user: Dict = Depends(get_current_admin),
+):
+    """List all users."""
+    svc = _get_admin_service()
+    users = svc.list_users(include_inactive=include_inactive)
+    return {"success": True, "users": users}
+
+
+@router.post("/users")
+async def create_user(
+    request: Request,
+    user: Dict = Depends(get_current_admin),
+):
+    """Create a new user."""
+    try:
+        data = await request.json()
+        svc = _get_admin_service()
+        user_id = svc.create_user(
+            username=data.get("username", "").strip(),
+            email=data.get("email", "").strip(),
+            full_name=data.get("full_name", "").strip(),
+            role=data.get("role", "user"),
+            password=data.get("password"),
+            auth_method=data.get("auth_method", "local"),
+        )
+        if user_id:
+            db = get_database()
+            db.log_action(user["id"], "admin_create_user", f"Created user {data.get('username')} (ID: {user_id})")
+            return {"success": True, "user_id": user_id}
+        return JSONResponse({"error": "Failed to create user (username may already exist)"}, status_code=400)
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+
+
+@router.put("/users/{user_id}")
+async def update_user(
+    user_id: int,
+    request: Request,
+    admin: Dict = Depends(get_current_admin),
+):
+    """Update user (role, is_active, full_name, email)."""
+    try:
+        data = await request.json()
+        svc = _get_admin_service()
+        success = svc.update_user(user_id, data)
+        if success:
+            db = get_database()
+            db.log_action(admin["id"], "admin_update_user", f"Updated user {user_id}: {data}")
+            return {"success": True}
+        return JSONResponse({"error": "No changes applied"}, status_code=400)
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+
+
+@router.get("/audit")
+async def get_audit_log(
+    user_id: int = None,
+    action: str = None,
+    limit: int = 100,
+    offset: int = 0,
+    admin: Dict = Depends(get_current_admin),
+):
+    """Get audit log with optional filters."""
+    svc = _get_admin_service()
+    entries = svc.get_audit_log(user_id=user_id, action=action, limit=limit, offset=offset)
+    return {"success": True, "entries": entries, "count": len(entries)}
+
+
+@router.get("/ai-usage")
+async def get_ai_usage(admin: Dict = Depends(get_current_admin)):
+    """Get AI usage statistics."""
+    svc = _get_admin_service()
+    stats = svc.get_ai_usage_stats()
+    by_user = svc.get_ai_usage_by_user()
+    return {"success": True, "stats": stats, "by_user": by_user}
--- a/app/routers/auth.py
+++ b/app/routers/auth.py
@ -0,0 +1,190 @@
+"""Authentication router: login, logout, Microsoft SSO."""
+
+import secrets
+import logging
+from typing import Dict
+from fastapi import APIRouter, Request, Depends, Form
+from fastapi.responses import HTMLResponse, RedirectResponse
+from fastapi.templating import Jinja2Templates
+
+from ..config import get_settings, Settings
+from ..dependencies import get_auth_service, get_current_user_optional
+from ..security import limiter
+from ..services.auth_service import AuthService
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(tags=["auth"])
+
+# Templates are set from main.py after mounting
+_templates: Jinja2Templates = None
+
+
+def set_templates(templates: Jinja2Templates):
+    global _templates
+    _templates = templates
+
+
+@router.get("/login", response_class=HTMLResponse)
+async def login_page(
+    request: Request,
+    error: str = None,
+    info: str = None,
+    settings: Settings = Depends(get_settings),
+    auth: AuthService = Depends(get_auth_service),
+):
+    """Render login page."""
+    # If already logged in, redirect to index
+    user = await get_current_user_optional(request)
+    if user:
+        root = request.scope.get("root_path", "")
+        return RedirectResponse(url=f"{root}/", status_code=302)
+
+    return _templates.TemplateResponse(
+        "login.html",
+        {
+            "request": request,
+            "error": error,
+            "info": info,
+            "sso_enabled": auth.sso_enabled,
+            "azure_client_id": settings.AZURE_CLIENT_ID if auth.sso_enabled else "",
+            "azure_tenant_id": settings.AZURE_TENANT_ID if auth.sso_enabled else "",
+            "enable_test_user": settings.ENABLE_TEST_USER,
+            "app_version": settings.APP_VERSION,
+        },
+    )
+
+
+@router.post("/login")
+@limiter.limit("5/minute")
+async def login_submit(
+    request: Request,
+    username: str = Form(...),
+    password: str = Form(...),
+    settings: Settings = Depends(get_settings),
+    auth: AuthService = Depends(get_auth_service),
+):
+    """Process login form. Rate limited to 5 attempts per minute."""
+    username = username.strip()
+    if not username or not password:
+        return _templates.TemplateResponse(
+            "login.html",
+            {
+                "request": request,
+                "error": "Please enter both username and password",
+                "sso_enabled": auth.sso_enabled,
+                "enable_test_user": settings.ENABLE_TEST_USER,
+                "app_version": settings.APP_VERSION,
+            },
+        )
+
+    result = auth.authenticate_user(username, password)
+
+    if not result["success"]:
+        return _templates.TemplateResponse(
+            "login.html",
+            {
+                "request": request,
+                "error": result.get("error"),
+                "sso_enabled": auth.sso_enabled,
+                "enable_test_user": settings.ENABLE_TEST_USER,
+                "app_version": settings.APP_VERSION,
+            },
+        )
+
+    user = result["user"]
+    session_id = auth.create_session(
+        user=user,
+        ip_address=request.client.host if request.client else None,
+        user_agent=request.headers.get("user-agent"),
+    )
+
+    if not session_id:
+        return _templates.TemplateResponse(
+            "login.html",
+            {
+                "request": request,
+                "error": "Failed to create session",
+                "sso_enabled": auth.sso_enabled,
+                "enable_test_user": settings.ENABLE_TEST_USER,
+                "app_version": settings.APP_VERSION,
+            },
+        )
+
+    # Set session data
+    request.session["user_id"] = user["id"]
+    request.session["username"] = user["username"]
+    request.session["session_id"] = session_id
+
+    root = request.scope.get("root_path", "")
+    next_url = request.query_params.get("next", "/")
+    # Prefix with root_path if next_url is a relative path
+    if next_url.startswith("/") and not next_url.startswith(root):
+        next_url = f"{root}{next_url}"
+    return RedirectResponse(url=next_url, status_code=302)
+
+
+@router.get("/logout")
+async def logout(
+    request: Request,
+    auth: AuthService = Depends(get_auth_service),
+):
+    """Logout and destroy session."""
+    user_id = request.session.get("user_id")
+    session_id = request.session.get("session_id")
+
+    if session_id:
+        auth.destroy_session(session_id, user_id)
+
+    request.session.clear()
+    root = request.scope.get("root_path", "")
+    return RedirectResponse(url=f"{root}/login", status_code=302)
+
+
+@router.post("/auth/azure-token")
+async def auth_azure_token(
+    request: Request,
+    auth: AuthService = Depends(get_auth_service),
+):
+    """Validate Azure AD access token from client-side MSAL.js.
+
+    Frontend handles the OAuth popup/redirect via MSAL.js,
+    then POSTs the access_token here for server-side validation.
+    """
+    from ..dependencies import get_database
+    from fastapi.responses import JSONResponse
+
+    data = await request.json()
+    access_token = data.get("access_token", "")
+
+    if not access_token:
+        return JSONResponse({"error": "No access token provided"}, status_code=400)
+
+    # Validate token by calling Microsoft Graph API
+    user_info = auth.sso.get_user_info(access_token)
+    if not user_info:
+        return JSONResponse({"error": "Invalid or expired token"}, status_code=401)
+
+    # Create or update user from Azure AD info
+    db = get_database()
+    user = auth.sso.create_or_update_user(user_info, db)
+    if not user:
+        return JSONResponse({"error": "Failed to create user account"}, status_code=500)
+
+    # Create session
+    session_id = auth.create_session(
+        user=user,
+        ip_address=request.client.host if request.client else None,
+        user_agent=request.headers.get("user-agent"),
+    )
+
+    if not session_id:
+        return JSONResponse({"error": "Failed to create session"}, status_code=500)
+
+    # Set session cookies
+    request.session["user_id"] = user["id"]
+    request.session["username"] = user["username"]
+    request.session["session_id"] = session_id
+
+    root = request.scope.get("root_path", "")
+    return {"success": True, "redirect": f"{root}/"}
--- a/app/routers/downloads.py
+++ b/app/routers/downloads.py
@ -0,0 +1,116 @@
+"""Download router: single file, ZIP batch, session cleanup."""
+
+import os
+import io
+import zipfile
+import logging
+from pathlib import Path
+from typing import Dict
+from datetime import datetime
+
+from fastapi import APIRouter, Request, Depends, BackgroundTasks
+from fastapi.responses import FileResponse, StreamingResponse, JSONResponse
+
+from ..dependencies import get_current_user, get_session_store
+from ..services.file_service import safe_filename
+from ..session.store import SessionStore
+from ..config import get_settings
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(tags=["downloads"])
+
+
+@router.get("/download/{filename}")
+async def download_file(
+    filename: str,
+    user: Dict = Depends(get_current_user),
+):
+    """Download a single processed file."""
+    settings = get_settings()
+    filepath = os.path.join(settings.UPLOAD_FOLDER, str(user["id"]), safe_filename(filename))
+
+    # Also check root upload folder for backward compat
+    if not os.path.exists(filepath):
+        filepath = os.path.join(settings.UPLOAD_FOLDER, safe_filename(filename))
+
+    if os.path.exists(filepath):
+        return FileResponse(filepath, filename=filename, media_type="application/octet-stream")
+
+    return JSONResponse({"error": "File not found"}, status_code=404)
+
+
+@router.post("/download-selected")
+async def download_selected_files(
+    request: Request,
+    user: Dict = Depends(get_current_user),
+    store: SessionStore = Depends(get_session_store),
+):
+    """Download selected files from session as ZIP archive."""
+    try:
+        data = await request.json()
+        session_id = data.get("session_id")
+        file_indices = data.get("file_indices", [])
+
+        session_data = store.get_file_session(session_id)
+        if not session_data:
+            return JSONResponse({"error": "Session not found"}, status_code=404)
+
+        if not file_indices:
+            return JSONResponse({"error": "No files selected"}, status_code=400)
+
+        files = session_data.get("files", [])
+        if not files:
+            return JSONResponse({"error": "No files in session"}, status_code=404)
+
+        # Create in-memory ZIP
+        zip_buffer = io.BytesIO()
+        with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf:
+            for index in file_indices:
+                if 0 <= index < len(files):
+                    file_info = files[index]
+                    filepath = file_info.get("filepath", "")
+                    filename = file_info.get("filename", "")
+
+                    if filepath and os.path.exists(filepath):
+                        zf.write(filepath, filename)
+
+        zip_buffer.seek(0)
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        zip_filename = f"oliver_metadata_files_{timestamp}.zip"
+
+        return StreamingResponse(
+            zip_buffer,
+            media_type="application/zip",
+            headers={"Content-Disposition": f'attachment; filename="{zip_filename}"'},
+        )
+
+    except Exception as e:
+        logger.error(f"Download error: {e}", exc_info=True)
+        return JSONResponse({"error": f"Error creating ZIP archive: {e}"}, status_code=500)
+
+
+@router.post("/cleanup-session/{session_id}")
+async def cleanup_session(
+    session_id: str,
+    background_tasks: BackgroundTasks,
+    user: Dict = Depends(get_current_user),
+    store: SessionStore = Depends(get_session_store),
+):
+    """Clean up session files."""
+    try:
+        session_data = store.get_file_session(session_id)
+        if session_data:
+            # Delete uploaded files in background
+            files = session_data.get("files", [])
+            for file_info in files:
+                filepath = file_info.get("filepath", "")
+                if filepath and os.path.exists(filepath):
+                    background_tasks.add_task(os.remove, filepath)
+
+            store.delete_file_session(session_id)
+
+        return {"success": True, "message": "Session cleaned up successfully"}
+    except Exception as e:
+        logger.error(f"Cleanup error: {e}")
+        return JSONResponse({"error": str(e)}, status_code=500)
--- a/app/routers/imports.py
+++ b/app/routers/imports.py
@ -0,0 +1,201 @@
+"""Import router: import metadata from CSV/Excel/JSON files."""
+
+import logging
+from pathlib import Path
+from typing import Dict
+
+from fastapi import APIRouter, Request, UploadFile, File, Depends
+from fastapi.responses import JSONResponse
+
+from ..dependencies import get_current_user, get_session_store
+from ..services.file_service import FileService, safe_filename
+from ..session.store import SessionStore
+from ..config import get_settings
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(tags=["imports"])
+
+_file_service = None
+
+
+def _get_file_service() -> FileService:
+    global _file_service
+    if _file_service is None:
+        settings = get_settings()
+        _file_service = FileService(
+            upload_folder=settings.UPLOAD_FOLDER,
+            max_size_mb=settings.MAX_UPLOAD_SIZE_MB,
+        )
+    return _file_service
+
+
+@router.post("/import-metadata")
+async def import_metadata(
+    request: Request,
+    import_file: UploadFile = File(...),
+    user: Dict = Depends(get_current_user),
+    store: SessionStore = Depends(get_session_store),
+):
+    """Upload import file and preview structure for mapping."""
+    try:
+        import pandas as pd
+
+        file_svc = _get_file_service()
+        filepath = await file_svc.save_upload(import_file, user["id"])
+        file_ext = Path(filepath).suffix.lower()
+
+        if file_ext == ".csv":
+            df = pd.read_csv(filepath, nrows=5, encoding="utf-8")
+        elif file_ext in [".xlsx", ".xls"]:
+            df = pd.read_excel(filepath, nrows=5)
+        elif file_ext == ".json":
+            import json
+            with open(filepath, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            if isinstance(data, list):
+                df = pd.DataFrame(data[:5])
+            elif isinstance(data, dict):
+                df = pd.DataFrame([data])
+            else:
+                return JSONResponse({"error": "Invalid JSON format"}, status_code=400)
+        else:
+            return JSONResponse({"error": f"Unsupported file format: {file_ext}"}, status_code=400)
+
+        columns = df.columns.tolist()
+        sample_data = df.fillna("").to_dict("records")
+
+        import_session_id = store.create_import_session(
+            user_id=user["id"],
+            session_type="import",
+            file_info={"path": filepath, "filename": Path(filepath).name, "file_type": file_ext},
+        )
+
+        return {
+            "success": True,
+            "import_session_id": import_session_id,
+            "filename": Path(filepath).name,
+            "columns": columns,
+            "sample_data": sample_data,
+            "message": "Import file uploaded. Please configure column mapping.",
+        }
+
+    except Exception as e:
+        logger.error(f"Import upload failed: {e}")
+        return JSONResponse({"error": f"Import upload failed: {e}"}, status_code=500)
+
+
+@router.post("/configure-import-mapping")
+async def configure_import_mapping(
+    request: Request,
+    user: Dict = Depends(get_current_user),
+    store: SessionStore = Depends(get_session_store),
+):
+    """Configure import column mapping and load metadata."""
+    try:
+        import pandas as pd
+        import json
+
+        data = await request.json()
+        import_session_id = data.get("import_session_id")
+        column_mapping = data.get("column_mapping", {})
+
+        session_data = store.get_import_session(import_session_id)
+        if not session_data:
+            return JSONResponse({"error": "Invalid session ID"}, status_code=400)
+
+        import_path = session_data["file_info"].get("path", "")
+        file_ext = session_data["file_info"].get("file_type", "")
+
+        if file_ext == ".csv":
+            df = pd.read_csv(import_path, encoding="utf-8")
+        elif file_ext in [".xlsx", ".xls"]:
+            df = pd.read_excel(import_path)
+        elif file_ext == ".json":
+            with open(import_path, "r", encoding="utf-8") as f:
+                json_data = json.load(f)
+            df = pd.DataFrame(json_data if isinstance(json_data, list) else [json_data])
+        else:
+            return JSONResponse({"error": "Unsupported file type"}, status_code=400)
+
+        filename_col = column_mapping.get("filename")
+        title_col = column_mapping.get("title")
+        subject_col = column_mapping.get("subject")
+        keywords_col = column_mapping.get("keywords")
+
+        if not filename_col:
+            return JSONResponse({"error": "Filename column is required"}, status_code=400)
+
+        metadata_map = {}
+        for _, row in df.iterrows():
+            fname = row.get(filename_col)
+            if pd.notna(fname) and str(fname).strip():
+                stem = Path(str(fname).strip()).stem.lower()
+                metadata_map[stem] = {
+                    "title": str(row.get(title_col, "")).strip() if title_col and pd.notna(row.get(title_col)) else "",
+                    "subject": str(row.get(subject_col, "")).strip() if subject_col and pd.notna(row.get(subject_col)) else "",
+                    "keywords": str(row.get(keywords_col, "")).strip() if keywords_col and pd.notna(row.get(keywords_col)) else "",
+                    "original_filename": str(fname).strip(),
+                }
+
+        store.update_import_session(import_session_id, metadata_map=metadata_map)
+
+        stats = {
+            "total_records": len(metadata_map),
+            "with_title": sum(1 for v in metadata_map.values() if v.get("title")),
+            "with_subject": sum(1 for v in metadata_map.values() if v.get("subject")),
+            "with_keywords": sum(1 for v in metadata_map.values() if v.get("keywords")),
+        }
+
+        return {
+            "success": True,
+            "import_session_id": import_session_id,
+            "stats": stats,
+            "message": f"Configured mapping for {stats['total_records']} records",
+        }
+
+    except Exception as e:
+        logger.error(f"Import configuration failed: {e}")
+        return JSONResponse({"error": f"Import configuration failed: {e}"}, status_code=500)
+
+
+@router.post("/preview-import")
+async def preview_import(
+    request: Request,
+    import_file: UploadFile = File(...),
+    user: Dict = Depends(get_current_user),
+):
+    """Preview file structure and suggest field mappings."""
+    try:
+        file_svc = _get_file_service()
+        filepath = await file_svc.save_upload(import_file, user["id"])
+
+        from src.metadata_importer import MetadataImporter
+        importer = MetadataImporter()
+        columns, sample_rows, suggestions = importer.preview_file_structure(filepath)
+
+        # Clean up temp file
+        file_svc.delete_file(filepath)
+
+        formatted_suggestions = {}
+        for source_field, suggestion_data in suggestions.items():
+            formatted_suggestions[source_field] = {
+                "best_match": suggestion_data["best_match"],
+                "confidence": round(suggestion_data["confidence"], 2),
+                "alternatives": [
+                    {"field": alt["field"], "confidence": round(alt["confidence"], 2)}
+                    for alt in suggestion_data.get("alternatives", [])
+                ],
+            }
+
+        return {
+            "success": True,
+            "columns": columns,
+            "sample_rows": sample_rows[:5],
+            "suggestions": formatted_suggestions,
+            "filename": Path(filepath).name,
+        }
+
+    except Exception as e:
+        logger.error(f"Preview failed: {e}")
+        return JSONResponse({"error": f"Preview failed: {e}"}, status_code=500)
--- a/app/routers/metadata.py
+++ b/app/routers/metadata.py
@ -0,0 +1,224 @@
+"""Metadata router: update, manual update, stats."""
+
+import os
+import shutil
+import logging
+from typing import Dict
+
+from fastapi import APIRouter, Request, Depends
+from fastapi.responses import JSONResponse
+
+from ..dependencies import get_current_user, get_session_store
+from ..services import metadata_service
+from ..services.file_service import FileService
+from ..session.store import SessionStore
+from ..config import get_settings
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(tags=["metadata"])
+
+
+@router.post("/update")
+async def update_metadata(
+    request: Request,
+    user: Dict = Depends(get_current_user),
+    store: SessionStore = Depends(get_session_store),
+):
+    """Update file metadata using suggested metadata from session."""
+    data = await request.json()
+    session_id = data.get("session_id")
+    file_index = data.get("file_index")
+
+    if not session_id:
+        return JSONResponse({"error": "Invalid or expired session"}, status_code=400)
+
+    session_data = store.get_file_session(session_id)
+    if not session_data:
+        return JSONResponse({"error": "Invalid or expired session"}, status_code=400)
+
+    files = session_data.get("files", [])
+    if file_index is None or file_index < 0 or file_index >= len(files):
+        return JSONResponse({"error": "Invalid file index"}, status_code=400)
+
+    try:
+        file_info = files[file_index]
+        filepath = file_info.get("filepath")
+
+        if not filepath or not os.path.exists(filepath):
+            return JSONResponse({"error": "File not found"}, status_code=404)
+
+        new_metadata = file_info.get("suggested_metadata", {})
+        if not new_metadata or not new_metadata.get("title"):
+            return JSONResponse({"error": "No metadata available for this file"}, status_code=400)
+
+        from src.file_detector import FileDetector, FileType
+
+        file_type = FileDetector.detect_file_type(filepath)
+        if file_type == FileType.UNSUPPORTED:
+            return JSONResponse({"error": "Unsupported file type"}, status_code=400)
+
+        settings = get_settings()
+
+        # Update metadata in-place
+        success = metadata_service.update_file_metadata(
+            filepath, file_type, new_metadata, backup=False
+        )
+        if not success:
+            return JSONResponse({"error": "Failed to update metadata"}, status_code=500)
+
+        verified = metadata_service.verify_file_metadata(filepath, file_type, new_metadata)
+
+        return {
+            "success": True,
+            "message": "Metadata updated successfully",
+            "verified": verified,
+            "metadata": new_metadata,
+        }
+
+    except Exception as e:
+        logger.error(f"Update error: {e}")
+        return JSONResponse({"error": str(e)}, status_code=500)
+
+
+@router.post("/update-manual")
+async def update_manual_metadata(
+    request: Request,
+    user: Dict = Depends(get_current_user),
+    store: SessionStore = Depends(get_session_store),
+):
+    """Update file with manually entered metadata."""
+    data = await request.json()
+    session_id = data.get("session_id")
+    file_index = data.get("file_index")
+
+    # Get file info for fallback title
+    file_title = str(data.get("title", "")).strip()[:200]
+    if not file_title:
+        # Fallback: use filename from session if title is empty
+        sid = data.get("session_id")
+        fidx = data.get("file_index")
+        if sid and fidx is not None:
+            sess = store.get_file_session(sid)
+            if sess and 0 <= fidx < len(sess.get("files", [])):
+                from pathlib import Path
+                fname = sess["files"][fidx].get("filename", "")
+                file_title = Path(fname).stem if fname else "Untitled"
+
+    custom_metadata = {
+        "title": file_title or "Untitled",
+        "subject": str(data.get("subject", "")).strip()[:300],
+        "keywords": str(data.get("keywords", "")).strip()[:500],
+        "author": str(data.get("author", "")).strip()[:100],
+        "copyright": str(data.get("copyright", "")).strip()[:150],
+        "comments": str(data.get("comments", "")).strip()[:500],
+    }
+
+    # Handle custom fields
+    custom_fields = data.get("custom_fields", {})
+    if custom_fields and isinstance(custom_fields, dict):
+        for field_name, field_value in custom_fields.items():
+            safe_name = str(field_name).strip()[:50]
+            safe_value = str(field_value).strip()[:200]
+            if safe_name and safe_value:
+                custom_metadata[safe_name] = safe_value
+
+    if not session_id:
+        return JSONResponse({"error": "Invalid or expired session"}, status_code=400)
+
+    session_data = store.get_file_session(session_id)
+    if not session_data:
+        return JSONResponse({"error": "Invalid or expired session"}, status_code=400)
+
+    files = session_data.get("files", [])
+    if file_index is None or file_index < 0 or file_index >= len(files):
+        return JSONResponse({"error": "Invalid file index"}, status_code=400)
+
+    try:
+        file_info = files[file_index]
+        filepath = file_info.get("filepath")
+
+        if not filepath or not os.path.exists(filepath):
+            return JSONResponse({"error": "File not found"}, status_code=404)
+
+        from src.file_detector import FileDetector, FileType
+
+        file_type = FileDetector.detect_file_type(filepath)
+        if file_type == FileType.UNSUPPORTED:
+            return JSONResponse({"error": "Unsupported file type"}, status_code=400)
+
+        success = metadata_service.update_file_metadata(
+            filepath, file_type, custom_metadata, backup=True
+        )
+        if not success:
+            return JSONResponse({"error": "Failed to update metadata"}, status_code=500)
+
+        # Update session with new metadata
+        store.update_file_in_session(
+            session_id, file_index, {"suggested_metadata": custom_metadata}
+        )
+
+        verified = metadata_service.verify_file_metadata(filepath, file_type, custom_metadata)
+
+        return {
+            "status": "success",
+            "message": "Metadata updated successfully",
+            "verified": verified,
+            "metadata": custom_metadata,
+        }
+
+    except Exception as e:
+        logger.error(f"Manual update error: {e}")
+        return JSONResponse({"error": f"Error updating metadata: {e}"}, status_code=500)
+
+
+@router.get("/session/{session_id}/files")
+async def get_session_files(
+    session_id: str,
+    user: Dict = Depends(get_current_user),
+    store: SessionStore = Depends(get_session_store),
+):
+    """Get current state of files in a session (for polling AI progress)."""
+    session_data = store.get_file_session(session_id)
+    if not session_data:
+        return JSONResponse({"error": "Invalid or expired session"}, status_code=400)
+
+    files = session_data.get("files", [])
+    # Strip server paths
+    safe_files = [{k: v for k, v in f.items() if k != "filepath"} for f in files]
+
+    # Check if all AI files are done
+    ai_pending = sum(1 for f in files if f.get("ai_status") == "pending")
+    ai_complete = sum(1 for f in files if f.get("ai_status") == "complete")
+    ai_error = sum(1 for f in files if f.get("ai_status") == "error")
+
+    return {
+        "success": True,
+        "files": safe_files,
+        "ai_status": {
+            "pending": ai_pending,
+            "complete": ai_complete,
+            "error": ai_error,
+            "done": ai_pending == 0,
+        },
+    }
+
+
+@router.get("/stats")
+async def get_stats(
+    user: Dict = Depends(get_current_user),
+):
+    """Get metadata statistics."""
+    try:
+        from src.excel_metadata_lookup import ExcelMetadataLookup
+        from pathlib import Path
+
+        excel_path = Path(__file__).parent.parent.parent / "Celum ID to Adobe Asset Path Mapping Spreadsheet (1).xlsx"
+        if excel_path.exists():
+            lookup = ExcelMetadataLookup(str(excel_path))
+            stats = lookup.get_stats()
+            return {"success": True, "stats": stats}
+        else:
+            return {"success": True, "stats": {"message": "No default Excel file configured"}}
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
--- a/app/routers/sse.py
+++ b/app/routers/sse.py
@ -0,0 +1,67 @@
+"""SSE router: Server-Sent Events for realtime AI progress."""
+
+import asyncio
+import logging
+from typing import Dict
+
+from fastapi import APIRouter, Request, Depends
+from fastapi.responses import StreamingResponse
+
+from ..dependencies import get_current_user
+from ..services.ai_service import get_progress_queue, remove_progress_queue
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(tags=["sse"])
+
+
+@router.get("/events/ai-progress/{session_id}")
+async def ai_progress_stream(
+    session_id: str,
+    request: Request,
+    user: Dict = Depends(get_current_user),
+):
+    """Stream AI processing progress events via SSE.
+
+    Events:
+        - processing: {file_index, filename, current, total}
+        - file_complete: {file_index, filename, metadata}
+        - error: {file_index, filename, error}
+        - done: {total_processed, total_errors}
+    """
+
+    async def event_generator():
+        queue = get_progress_queue(session_id)
+        try:
+            while True:
+                # Check if client disconnected
+                if await request.is_disconnected():
+                    break
+
+                try:
+                    event = await asyncio.wait_for(queue.get(), timeout=30.0)
+                except asyncio.TimeoutError:
+                    # Send keepalive
+                    yield ": keepalive\n\n"
+                    continue
+
+                event_type = event.get("type", "message")
+                import json
+                data = json.dumps(event)
+                yield f"event: {event_type}\ndata: {data}\n\n"
+
+                # Stop after 'done' event
+                if event_type == "done":
+                    break
+        finally:
+            remove_progress_queue(session_id)
+
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",
+        },
+    )
--- a/app/routers/templates.py
+++ b/app/routers/templates.py
@ -0,0 +1,182 @@
+"""Template management router: list, save, load, delete, apply, preview."""
+
+import logging
+from typing import Dict
+
+from fastapi import APIRouter, Request, Depends
+from fastapi.responses import JSONResponse
+
+from ..dependencies import get_current_user, get_session_store
+from ..session.store import SessionStore
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/templates", tags=["templates"])
+
+# Lazy-initialized template manager
+_template_manager = None
+
+
+def _get_template_manager():
+    global _template_manager
+    if _template_manager is None:
+        from src.template_manager import TemplateManager
+        _template_manager = TemplateManager()
+    return _template_manager
+
+
+@router.get("/list")
+async def list_templates(user: Dict = Depends(get_current_user)):
+    """List all available templates."""
+    try:
+        tm = _get_template_manager()
+        templates = tm.list_templates()
+        return {"success": True, "templates": templates}
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+
+
+@router.post("/save")
+async def save_template(
+    request: Request,
+    user: Dict = Depends(get_current_user),
+):
+    """Save a new template."""
+    try:
+        data = await request.json()
+        name = data.get("name", "").strip()
+        if not name:
+            return JSONResponse({"error": "Template name is required"}, status_code=400)
+
+        tm = _get_template_manager()
+        template = tm.create_template(
+            name=name,
+            title_template=data.get("title", ""),
+            subject_template=data.get("subject", ""),
+            keywords_template=data.get("keywords", ""),
+            description=data.get("description", ""),
+        )
+        success = tm.save_template(template)
+
+        if success:
+            return {"success": True, "message": f'Template "{name}" saved successfully', "template": template}
+        return JSONResponse({"error": "Failed to save template"}, status_code=500)
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+
+
+@router.get("/load/{name}")
+async def load_template(name: str, user: Dict = Depends(get_current_user)):
+    """Load a template by name."""
+    try:
+        tm = _get_template_manager()
+        template = tm.load_template(name)
+        if template:
+            return {"success": True, "template": template}
+        return JSONResponse({"error": f'Template "{name}" not found'}, status_code=404)
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+
+
+@router.delete("/delete/{name}")
+async def delete_template(name: str, user: Dict = Depends(get_current_user)):
+    """Delete a template."""
+    try:
+        tm = _get_template_manager()
+        success = tm.delete_template(name)
+        if success:
+            return {"success": True, "message": f'Template "{name}" deleted successfully'}
+        return JSONResponse({"error": f'Template "{name}" not found'}, status_code=404)
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+
+
+@router.post("/apply")
+async def apply_template(
+    request: Request,
+    user: Dict = Depends(get_current_user),
+    store: SessionStore = Depends(get_session_store),
+):
+    """Apply a template to generate metadata for files."""
+    try:
+        data = await request.json()
+        template_name = data.get("template_name", "").strip()
+        file_indices = data.get("file_indices", [])
+        session_id = data.get("session_id")
+        custom_vars = data.get("custom_vars", {})
+
+        if not template_name:
+            return JSONResponse({"error": "Template name is required"}, status_code=400)
+
+        session_data = store.get_file_session(session_id)
+        if not session_data:
+            return JSONResponse({"error": "Invalid or expired session"}, status_code=400)
+
+        tm = _get_template_manager()
+        template = tm.load_template(template_name)
+        if not template:
+            return JSONResponse({"error": f'Template "{template_name}" not found'}, status_code=404)
+
+        files = session_data.get("files", [])
+        results = []
+
+        for file_index in file_indices:
+            if file_index >= len(files):
+                continue
+            file_info = files[file_index]
+            filename = file_info.get("filename", "unknown")
+
+            metadata = tm.apply_template(
+                template=template,
+                filename=filename,
+                user="web_user",
+                custom_vars=custom_vars,
+            )
+
+            # Update session
+            store.update_file_in_session(session_id, file_index, {"suggested_metadata": metadata})
+
+            results.append({
+                "file_index": file_index,
+                "filename": filename,
+                "metadata": metadata,
+            })
+
+        return {
+            "success": True,
+            "message": f"Template applied to {len(results)} file(s)",
+            "results": results,
+        }
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+
+
+@router.post("/preview")
+async def preview_template(
+    request: Request,
+    user: Dict = Depends(get_current_user),
+):
+    """Preview template output with sample data."""
+    try:
+        data = await request.json()
+        template = {
+            "name": "preview",
+            "title": data.get("title", ""),
+            "subject": data.get("subject", ""),
+            "keywords": data.get("keywords", ""),
+        }
+        sample_filename = data.get("sample_filename", "example.pdf")
+        custom_vars = data.get("custom_vars", {})
+
+        tm = _get_template_manager()
+        preview = tm.preview_template(
+            template=template,
+            sample_filename=sample_filename,
+            user="web_user",
+            custom_vars=custom_vars,
+        )
+        available_vars = tm.get_available_variables()
+
+        return {"success": True, "preview": preview, "available_variables": available_vars}
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
--- a/app/routers/upload.py
+++ b/app/routers/upload.py
@ -0,0 +1,318 @@
+"""Upload router: file upload, Excel upload, mapping configuration."""
+
+import secrets
+import logging
+from pathlib import Path
+from typing import Dict, List
+
+from fastapi import APIRouter, Request, Depends, UploadFile, File, Form
+from fastapi.responses import JSONResponse
+
+from ..dependencies import get_current_user, get_session_store
+from ..security import limiter
+from ..services.file_service import FileService, safe_filename
+from ..services import metadata_service
+from ..session.store import SessionStore
+from ..config import get_settings, Settings
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(tags=["upload"])
+
+# Lazy-initialized file service
+_file_service = None
+
+
+def _get_file_service() -> FileService:
+    global _file_service
+    if _file_service is None:
+        settings = get_settings()
+        _file_service = FileService(
+            upload_folder=settings.UPLOAD_FOLDER,
+            max_size_mb=settings.MAX_UPLOAD_SIZE_MB,
+        )
+    return _file_service
+
+
+@router.post("/upload")
+@limiter.limit("10/minute")
+async def upload_files(
+    request: Request,
+    files: List[UploadFile] = File(...),
+    metadata_source: str = Form("manual"),
+    import_session_id: str = Form(""),
+    excel_session_id: str = Form(""),
+    user: Dict = Depends(get_current_user),
+    store: SessionStore = Depends(get_session_store),
+):
+    """Handle multiple file uploads with metadata source selection."""
+    if not files or (len(files) == 1 and not files[0].filename):
+        return JSONResponse({"error": "No files provided"}, status_code=400)
+
+    file_svc = _get_file_service()
+    user_id = user["id"]
+
+    # Resolve lookup / import_map based on source
+    lookup = None
+    import_map = None
+
+    if metadata_source == "excel":
+        if excel_session_id:
+            session_data = store.get_import_session(excel_session_id)
+            if session_data and "metadata_map" in session_data:
+                # Wrap metadata_map as a lookup-like object
+                lookup = _ExcelLookupAdapter(session_data["metadata_map"])
+        if not lookup:
+            return JSONResponse(
+                {"error": "Please upload an Excel file first using the Upload Excel File button"},
+                status_code=400,
+            )
+
+    elif metadata_source == "import":
+        if import_session_id:
+            session_data = store.get_import_session(import_session_id)
+            if session_data and "metadata_map" in session_data:
+                import_map = session_data["metadata_map"]
+        if not import_map:
+            return JSONResponse(
+                {"error": "Please import a metadata file first using the Import button"},
+                status_code=400,
+            )
+
+    # Create file session
+    session_id = store.create_file_session(
+        user_id=user_id,
+        metadata_source=metadata_source,
+        import_session_id=import_session_id,
+    )
+
+    results = []
+    ai_pending = []
+
+    for upload_file in files:
+        try:
+            filepath = await file_svc.save_upload(upload_file, user_id)
+            filename = Path(filepath).name
+
+            if metadata_source == "ai":
+                # AI source: save file, extract metadata, queue AI for background
+                file_type = metadata_service.detect_file(filepath)
+                old_metadata = metadata_service.extract_metadata(filepath, file_type)
+                file_result = {
+                    "success": True,
+                    "filename": filename,
+                    "file_type": file_type.value,
+                    "current_metadata": old_metadata,
+                    "suggested_metadata": {"title": "", "subject": "", "keywords": ""},
+                    "filepath": filepath,
+                    "metadata_source": "ai",
+                    "ai_status": "pending",
+                }
+                store.add_file_to_session(session_id, file_result)
+                ai_pending.append({
+                    "file_index": len(results),
+                    "filepath": filepath,
+                    "filename": filename,
+                    "file_type": file_type,
+                })
+                # Deduplicate results: replace existing entry with same filename
+                existing_idx = next(
+                    (i for i, r in enumerate(results) if r.get("filename") == filename),
+                    None,
+                )
+                if existing_idx is not None:
+                    results[existing_idx] = file_result
+                else:
+                    results.append(file_result)
+            else:
+                file_result = await metadata_service.process_uploaded_file(
+                    filepath=filepath,
+                    filename=filename,
+                    metadata_source=metadata_source,
+                    lookup=lookup,
+                    import_map=import_map,
+                )
+                store.add_file_to_session(session_id, file_result)
+                # Deduplicate results: replace existing entry with same filename
+                existing_idx = next(
+                    (i for i, r in enumerate(results) if r.get("filename") == filename),
+                    None,
+                )
+                if existing_idx is not None:
+                    results[existing_idx] = file_result
+                else:
+                    results.append(file_result)
+
+        except ValueError as e:
+            results.append({"filename": upload_file.filename, "error": str(e)})
+        except Exception as e:
+            logger.error(f"Upload error for {upload_file.filename}: {e}")
+            results.append({"filename": upload_file.filename, "error": str(e)})
+
+    # Start background AI processing
+    if ai_pending:
+        import asyncio
+        from ..services.ai_service import process_bulk_ai
+        asyncio.create_task(process_bulk_ai(session_id, ai_pending, store, user_id))
+
+    # Strip server paths from client response
+    safe_results = [{k: v for k, v in r.items() if k != "filepath"} for r in results]
+
+    return {"success": True, "session_id": session_id, "files": safe_results, "ai_processing": bool(ai_pending)}
+
+
+@router.post("/upload-excel")
+async def upload_excel(
+    request: Request,
+    excel_file: UploadFile = File(...),
+    user: Dict = Depends(get_current_user),
+    store: SessionStore = Depends(get_session_store),
+):
+    """Upload Excel file for metadata lookup — returns sheet structure for mapping."""
+    try:
+        import pandas as pd
+
+        file_svc = _get_file_service()
+        filepath = await file_svc.save_upload(excel_file, user["id"])
+
+        excel = pd.ExcelFile(filepath)
+        sheet_names = excel.sheet_names
+
+        preview_data = {}
+        for sheet_name in sheet_names[:5]:
+            df = pd.read_excel(excel, sheet_name=sheet_name, nrows=5)
+            preview_data[sheet_name] = {
+                "columns": df.columns.tolist(),
+                "sample_data": df.head(3).fillna("").to_dict("records"),
+            }
+
+        # Store as import session with file info
+        excel_session_id = store.create_import_session(
+            user_id=user["id"],
+            session_type="excel",
+            file_info={
+                "path": filepath,
+                "filename": Path(filepath).name,
+                "sheet_names": sheet_names,
+            },
+        )
+
+        return {
+            "success": True,
+            "excel_session_id": excel_session_id,
+            "filename": Path(filepath).name,
+            "sheets": sheet_names,
+            "preview": preview_data,
+            "message": "Excel file uploaded. Please configure column mapping.",
+        }
+
+    except Exception as e:
+        logger.error(f"Excel upload failed: {e}")
+        return JSONResponse({"error": f"Excel upload failed: {e}"}, status_code=500)
+
+
+@router.post("/preview-excel-sheet")
+async def preview_excel_sheet(
+    request: Request,
+    user: Dict = Depends(get_current_user),
+    store: SessionStore = Depends(get_session_store),
+):
+    """Preview a specific sheet from uploaded Excel file."""
+    try:
+        import pandas as pd
+
+        data = await request.json()
+        excel_session_id = data.get("excel_session_id")
+        sheet_name = data.get("sheet_name")
+
+        session_data = store.get_import_session(excel_session_id)
+        if not session_data:
+            return JSONResponse({"error": "Invalid session ID"}, status_code=400)
+
+        excel_path = session_data["file_info"].get("path", "")
+        df = pd.read_excel(excel_path, sheet_name=sheet_name, nrows=10)
+
+        return {
+            "success": True,
+            "columns": df.columns.tolist(),
+            "sample_data": df.head(5).fillna("").to_dict("records"),
+        }
+
+    except Exception as e:
+        logger.error(f"Sheet preview failed: {e}")
+        return JSONResponse({"error": f"Sheet preview failed: {e}"}, status_code=500)
+
+
+@router.post("/configure-excel-mapping")
+async def configure_excel_mapping(
+    request: Request,
+    user: Dict = Depends(get_current_user),
+    store: SessionStore = Depends(get_session_store),
+):
+    """Configure Excel column mapping and load metadata into session."""
+    try:
+        import pandas as pd
+
+        data = await request.json()
+        excel_session_id = data.get("excel_session_id")
+        sheet_name = data.get("sheet_name")
+        column_mapping = data.get("column_mapping", {})
+
+        session_data = store.get_import_session(excel_session_id)
+        if not session_data:
+            return JSONResponse({"error": "Invalid session ID"}, status_code=400)
+
+        excel_path = session_data["file_info"].get("path", "")
+        df = pd.read_excel(excel_path, sheet_name=sheet_name)
+
+        filename_col = column_mapping.get("filename")
+        title_col = column_mapping.get("title")
+        description_col = column_mapping.get("description")
+        keywords_col = column_mapping.get("keywords")
+
+        if not filename_col:
+            return JSONResponse({"error": "Filename column is required"}, status_code=400)
+
+        metadata_map = {}
+        for _, row in df.iterrows():
+            fname = row.get(filename_col)
+            if pd.notna(fname) and str(fname).strip():
+                stem = Path(str(fname).strip()).stem.lower()
+                metadata_map[stem] = {
+                    "title": str(row.get(title_col, "")).strip() if title_col and pd.notna(row.get(title_col)) else "",
+                    "description": str(row.get(description_col, "")).strip() if description_col and pd.notna(row.get(description_col)) else "",
+                    "keywords": str(row.get(keywords_col, "")).strip() if keywords_col and pd.notna(row.get(keywords_col)) else "",
+                    "original_filename": str(fname).strip(),
+                }
+
+        # Store the built metadata_map in the session
+        store.update_import_session(excel_session_id, metadata_map=metadata_map)
+
+        stats = {
+            "total_records": len(metadata_map),
+            "with_title": sum(1 for v in metadata_map.values() if v.get("title")),
+            "with_description": sum(1 for v in metadata_map.values() if v.get("description")),
+            "with_keywords": sum(1 for v in metadata_map.values() if v.get("keywords")),
+        }
+
+        return {
+            "success": True,
+            "excel_session_id": excel_session_id,
+            "stats": stats,
+            "message": f"Configured mapping for {stats['total_records']} records from sheet \"{sheet_name}\"",
+        }
+
+    except Exception as e:
+        logger.error(f"Excel configuration failed: {e}")
+        return JSONResponse({"error": f"Excel configuration failed: {e}"}, status_code=500)
+
+
+class _ExcelLookupAdapter:
+    """Wraps a metadata_map dict to behave like ExcelMetadataLookup."""
+
+    def __init__(self, metadata_map: dict):
+        self.metadata_map = metadata_map
+
+    def lookup_by_filename(self, filename: str):
+        stem = Path(filename).stem.lower()
+        return self.metadata_map.get(stem)
--- a/app/security.py
+++ b/app/security.py
@ -0,0 +1,7 @@
+"""Security utilities: rate limiter, audit helper."""
+
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+
+# Shared rate limiter instance
+limiter = Limiter(key_func=get_remote_address)
--- a/backend/app/models/init.py
+++ b/backend/app/models/init.py
--- a/app/services/admin_service.py
+++ b/app/services/admin_service.py
@ -0,0 +1,108 @@
+"""Admin service: user management, audit log, AI usage stats."""
+
+import logging
+from typing import Dict, List, Optional
+from datetime import datetime
+
+logger = logging.getLogger(__name__)
+
+
+class AdminService:
+    """Business logic for admin operations."""
+
+    def __init__(self, database):
+        self.db = database
+
+    # --- User Management ---
+
+    def list_users(self, include_inactive: bool = False) -> List[Dict]:
+        """Get all users with sanitized output (no password hashes)."""
+        users = self.db.get_all_users(include_inactive=include_inactive)
+        for user in users:
+            user.pop("password_hash", None)
+        return users
+
+    def get_user(self, user_id: int) -> Optional[Dict]:
+        """Get single user by ID."""
+        user = self.db.get_user_by_id(user_id)
+        if user:
+            user.pop("password_hash", None)
+        return user
+
+    def create_user(
+        self,
+        username: str,
+        email: str = "",
+        full_name: str = "",
+        role: str = "user",
+        password: str = None,
+        auth_method: str = "local",
+    ) -> Optional[int]:
+        """Create a new user."""
+        password_hash = None
+        if password:
+            from werkzeug.security import generate_password_hash
+            password_hash = generate_password_hash(password)
+
+        return self.db.create_user(
+            username=username,
+            password_hash=password_hash,
+            email=email,
+            full_name=full_name,
+            auth_method=auth_method,
+            role=role,
+        )
+
+    def update_user(self, user_id: int, updates: Dict) -> bool:
+        """Update user fields (role, is_active, full_name, email)."""
+        allowed_fields = {"role", "is_active", "full_name", "email"}
+        filtered = {k: v for k, v in updates.items() if k in allowed_fields}
+        if not filtered:
+            return False
+        return self.db.update_user(user_id, filtered)
+
+    def deactivate_user(self, user_id: int) -> bool:
+        """Deactivate a user account."""
+        return self.db.update_user(user_id, {"is_active": 0})
+
+    def activate_user(self, user_id: int) -> bool:
+        """Reactivate a user account."""
+        return self.db.update_user(user_id, {"is_active": 1})
+
+    # --- Audit Log ---
+
+    def get_audit_log(
+        self,
+        user_id: Optional[int] = None,
+        action: Optional[str] = None,
+        limit: int = 100,
+        offset: int = 0,
+    ) -> List[Dict]:
+        """Get audit log with optional filters."""
+        return self.db.get_audit_log(
+            user_id=user_id,
+            action=action,
+            limit=limit,
+            offset=offset,
+        )
+
+    # --- AI Usage Stats ---
+
+    def get_ai_usage_stats(self) -> Dict:
+        """Get aggregate AI usage statistics."""
+        return self.db.get_ai_usage_stats()
+
+    def get_ai_usage_by_user(self, limit: int = 50) -> List[Dict]:
+        """Get AI usage broken down by user."""
+        return self.db.get_ai_usage_by_user(limit=limit)
+
+    # --- Dashboard Stats ---
+
+    def get_dashboard_stats(self) -> Dict:
+        """Get combined statistics for admin dashboard."""
+        db_stats = self.db.get_stats()
+        ai_stats = self.db.get_ai_usage_stats()
+        return {
+            **db_stats,
+            "ai_usage": ai_stats,
+        }
--- a/app/services/ai_service.py
+++ b/app/services/ai_service.py
@ -0,0 +1,189 @@
+"""Async wrapper around MetadataAnalyzer for non-blocking AI generation."""
+
+import asyncio
+import logging
+from typing import Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+# Lazy-initialized singleton
+_analyzer = None
+
+# Progress queues per session (for SSE streaming)
+_progress_queues: Dict[str, asyncio.Queue] = {}
+
+
+def _get_analyzer():
+    """Lazy-initialize MetadataAnalyzer."""
+    global _analyzer
+    if _analyzer is None:
+        from app.config import get_settings
+        settings = get_settings()
+        if settings.OPENAI_API_KEY:
+            try:
+                from src.metadata_analyzer import MetadataAnalyzer
+                _analyzer = MetadataAnalyzer()
+                logger.info("MetadataAnalyzer initialized")
+            except Exception as e:
+                logger.error(f"Failed to initialize MetadataAnalyzer: {e}")
+    return _analyzer
+
+
+def get_progress_queue(session_id: str) -> asyncio.Queue:
+    """Get or create a progress queue for a session."""
+    if session_id not in _progress_queues:
+        _progress_queues[session_id] = asyncio.Queue()
+    return _progress_queues[session_id]
+
+
+def remove_progress_queue(session_id: str):
+    """Remove a progress queue when SSE connection closes."""
+    _progress_queues.pop(session_id, None)
+
+
+async def generate_metadata_async(
+    content: str,
+    filename: str,
+    file_type,
+) -> Dict[str, str]:
+    """Run AI metadata generation in a thread pool (non-blocking).
+
+    Args:
+        content: Extracted text content from the file.
+        filename: Original filename.
+        file_type: FileType enum value.
+
+    Returns:
+        Dict with 'title', 'subject', 'keywords' and internal fields.
+    """
+    analyzer = _get_analyzer()
+    if not analyzer:
+        return {
+            "title": "",
+            "subject": "AI generation not available (OpenAI API key not configured)",
+            "keywords": "",
+            "_ai_error": "OpenAI API key not configured",
+        }
+
+    if not content or len(content.strip()) < 10:
+        from pathlib import Path
+        return {
+            "title": Path(filename).stem,
+            "subject": "Insufficient content for AI analysis",
+            "keywords": "",
+            "_ai_error": "Not enough text content extracted",
+        }
+
+    loop = asyncio.get_event_loop()
+    try:
+        result = await loop.run_in_executor(
+            None, analyzer.analyze_content, content, filename, file_type
+        )
+        if "_tokens_used" in result:
+            logger.info(f"AI tokens used for {filename}: {result['_tokens_used']}")
+        return result
+    except Exception as e:
+        logger.error(f"AI generation failed for {filename}: {e}")
+        from pathlib import Path
+        return {
+            "title": Path(filename).stem,
+            "subject": f"AI generation error: {e}",
+            "keywords": "",
+            "_ai_error": str(e),
+        }
+
+
+async def process_bulk_ai(
+    session_id: str,
+    files_data: list,
+    store,
+    user_id: int,
+):
+    """Process multiple files with AI in background, sending progress via SSE.
+
+    Args:
+        session_id: File session ID.
+        files_data: List of dicts with {file_index, filepath, filename, file_type}.
+        store: SessionStore instance.
+        user_id: User ID for AI usage logging.
+    """
+    from .metadata_service import extract_content
+
+    queue = get_progress_queue(session_id)
+    total = len(files_data)
+    processed = 0
+    errors = 0
+
+    for i, file_info in enumerate(files_data):
+        file_index = file_info["file_index"]
+        filename = file_info["filename"]
+        filepath = file_info["filepath"]
+        file_type = file_info["file_type"]
+
+        # Send 'processing' event
+        await queue.put({
+            "type": "processing",
+            "file_index": file_index,
+            "filename": filename,
+            "current": i + 1,
+            "total": total,
+        })
+
+        try:
+            content = extract_content(filepath, file_type)
+            metadata = await generate_metadata_async(content, filename, file_type)
+
+            # Update session with result
+            store.update_file_in_session(session_id, file_index, {
+                "suggested_metadata": metadata,
+                "ai_status": "complete",
+            })
+
+            # Log AI usage
+            tokens_used = metadata.get("_tokens_used", 0)
+            if tokens_used and user_id:
+                try:
+                    from app.dependencies import get_database
+                    db = get_database()
+                    db.log_ai_usage(
+                        user_id=user_id,
+                        filename=filename,
+                        tokens_total=tokens_used,
+                        model=metadata.get("_model", ""),
+                    )
+                except Exception:
+                    pass
+
+            # Send 'file_complete' event
+            await queue.put({
+                "type": "file_complete",
+                "file_index": file_index,
+                "filename": filename,
+                "metadata": {
+                    "title": metadata.get("title", ""),
+                    "subject": metadata.get("subject", ""),
+                    "keywords": metadata.get("keywords", ""),
+                },
+            })
+            processed += 1
+
+        except Exception as e:
+            logger.error(f"Bulk AI error for {filename}: {e}")
+            errors += 1
+            store.update_file_in_session(session_id, file_index, {
+                "ai_status": "error",
+                "ai_error": str(e),
+            })
+            await queue.put({
+                "type": "error",
+                "file_index": file_index,
+                "filename": filename,
+                "error": str(e),
+            })
+
+    # Send 'done' event
+    await queue.put({
+        "type": "done",
+        "total_processed": processed,
+        "total_errors": errors,
+    })
--- a/app/services/auth_service.py
+++ b/app/services/auth_service.py
@ -0,0 +1,164 @@
+"""Framework-agnostic authentication service."""
+
+import os
+import secrets
+import logging
+from typing import Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+
+class AuthService:
+    """Authentication logic extracted from src/auth.py, without Flask dependencies."""
+
+    def __init__(self, database):
+        self.db = database
+        self._sso = None
+
+    def authenticate_user(self, username: str, password: str) -> Dict:
+        """Authenticate user with username and password.
+
+        Returns dict with 'success' bool and either 'user' dict or 'error' message.
+        """
+        try:
+            from werkzeug.security import check_password_hash
+
+            user = self.db.get_user_by_username(username)
+            if user and user.get("password_hash"):
+                if check_password_hash(user["password_hash"], password):
+                    logger.info(f"User '{username}' authenticated successfully")
+                    return {"success": True, "user": user}
+
+            logger.warning(f"Authentication failed for user '{username}'")
+            return {"success": False, "error": "Invalid username or password"}
+
+        except ImportError:
+            logger.error("werkzeug not available - cannot verify passwords")
+            return {"success": False, "error": "Authentication system not available"}
+        except Exception as e:
+            logger.error(f"Authentication error: {e}")
+            return {"success": False, "error": "Authentication error occurred"}
+
+    def create_session(
+        self,
+        user: Dict,
+        ip_address: Optional[str] = None,
+        user_agent: Optional[str] = None,
+    ) -> Optional[str]:
+        """Create a new auth session for an authenticated user."""
+        session_id = secrets.token_urlsafe(32)
+        user_id = user["id"]
+
+        success = self.db.create_session(
+            user_id=user_id,
+            session_id=session_id,
+            expires_in_hours=24,
+            ip_address=ip_address,
+            user_agent=user_agent,
+        )
+
+        if success:
+            self.db.update_last_login(user_id)
+            self.db.log_action(user_id, "login", f"IP: {ip_address}")
+            logger.info(f"Created session for user {user['username']} (ID: {user_id})")
+            return session_id
+
+        logger.error(f"Failed to create session for user {user_id}")
+        return None
+
+    def destroy_session(self, session_id: str, user_id: Optional[int] = None):
+        """Destroy an auth session (logout)."""
+        self.db.delete_session(session_id)
+        if user_id:
+            self.db.log_action(user_id, "logout", f"Session: {session_id}")
+            logger.info(f"User {user_id} logged out")
+
+    def validate_session(self, session_id: str) -> Optional[Dict]:
+        """Validate a session and return session data if valid."""
+        return self.db.get_session(session_id)
+
+    def get_user_by_id(self, user_id: int) -> Optional[Dict]:
+        """Get user by ID."""
+        return self.db.get_user_by_id(user_id)
+
+    def cleanup_expired_sessions(self):
+        """Clean up expired auth sessions."""
+        self.db.cleanup_expired_sessions()
+
+    # --- Microsoft SSO ---
+
+    @property
+    def sso(self):
+        """Lazy-initialize Microsoft SSO."""
+        if self._sso is None:
+            self._sso = MicrosoftSSO()
+        return self._sso
+
+    @property
+    def sso_enabled(self) -> bool:
+        return self.sso.enabled
+
+
+class MicrosoftSSO:
+    """Microsoft SSO handler. Frontend uses MSAL.js for auth, backend validates via Graph API."""
+
+    def __init__(self):
+        self.client_id = os.getenv("AZURE_CLIENT_ID", "").strip()
+        self.tenant_id = os.getenv("AZURE_TENANT_ID", "").strip()
+
+        if not self.client_id or not self.tenant_id:
+            self.enabled = False
+            logger.warning("Microsoft SSO not configured (missing AZURE_CLIENT_ID or AZURE_TENANT_ID)")
+            return
+
+        self.enabled = True
+        logger.info(f"Microsoft SSO enabled (client_id: {self.client_id[:8]}...)")
+
+    def get_user_info(self, access_token: str) -> Optional[Dict]:
+        if not self.enabled:
+            return None
+        try:
+            import requests
+
+            headers = {"Authorization": f"Bearer {access_token}"}
+            response = requests.get(
+                "https://graph.microsoft.com/v1.0/me",
+                headers=headers,
+                timeout=10,
+            )
+            if response.status_code == 200:
+                return response.json()
+            logger.error(f"Graph API error: {response.status_code}")
+            return None
+        except Exception as e:
+            logger.error(f"Error fetching user info: {e}")
+            return None
+
+    def create_or_update_user(self, user_info: Dict, database) -> Optional[Dict]:
+        """Create or update user from SSO login."""
+        try:
+            email = user_info.get("mail") or user_info.get("userPrincipalName")
+            username = email.split("@")[0] if email else user_info.get("displayName", "unknown")
+            full_name = user_info.get("displayName")
+
+            user = database.get_user_by_username(username)
+            if not user:
+                user_id = database.create_user(
+                    username=username,
+                    email=email,
+                    full_name=full_name,
+                    auth_method="sso",
+                )
+                if user_id:
+                    user = database.get_user_by_id(user_id)
+                    logger.info(f"Created new SSO user: {username}")
+                else:
+                    logger.error(f"Failed to create SSO user: {username}")
+                    return None
+            else:
+                logger.info(f"Existing SSO user logged in: {username}")
+
+            return user
+        except Exception as e:
+            logger.error(f"Error creating/updating SSO user: {e}")
+            return None
--- a/app/services/file_service.py
+++ b/app/services/file_service.py
@ -0,0 +1,94 @@
+"""File handling: upload, naming, cleanup."""
+
+import os
+import shutil
+import unicodedata
+import logging
+from pathlib import Path
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+
+def safe_filename(filename: str) -> str:
+    """Sanitize filename while preserving Unicode characters (CJK, etc.)."""
+    filename = unicodedata.normalize("NFC", filename)
+    filename = filename.replace("/", "_").replace("\\", "_").replace("\x00", "")
+    filename = filename.strip(". ")
+    if not filename:
+        filename = "unnamed_file"
+    return filename
+
+
+class FileService:
+    """Handles file uploads, per-user storage, and cleanup."""
+
+    def __init__(self, upload_folder: str, max_size_mb: int = 500):
+        self.upload_folder = Path(upload_folder)
+        self.upload_folder.mkdir(parents=True, exist_ok=True)
+        self.max_size_bytes = max_size_mb * 1024 * 1024
+
+    async def save_upload(self, upload_file, user_id: int) -> str:
+        """Save an uploaded file to disk using streaming.
+
+        Returns the path to the saved file.
+        """
+        filename = safe_filename(upload_file.filename or "unnamed")
+        user_dir = self.upload_folder / str(user_id)
+        user_dir.mkdir(parents=True, exist_ok=True)
+
+        filepath = user_dir / filename
+
+        # Overwrite if file already exists (user re-uploads same file).
+        # Preserving original filename is critical for Excel metadata lookup.
+
+        # Stream to disk (handles large files without loading into memory)
+        with open(filepath, "wb") as f:
+            shutil.copyfileobj(upload_file.file, f)
+
+        size = filepath.stat().st_size
+        if size > self.max_size_bytes:
+            filepath.unlink()
+            raise ValueError(f"File exceeds {self.max_size_bytes // (1024*1024)}MB limit")
+
+        logger.info(f"Saved upload: {filepath.name} ({size} bytes) for user {user_id}")
+        return str(filepath)
+
+    def delete_file(self, filepath: str):
+        """Delete a file from disk."""
+        try:
+            path = Path(filepath)
+            if path.exists() and path.is_file():
+                path.unlink()
+                logger.info(f"Deleted file: {filepath}")
+        except Exception as e:
+            logger.warning(f"Failed to delete {filepath}: {e}")
+
+    def cleanup_user_files(self, user_id: int):
+        """Delete all files for a user."""
+        user_dir = self.upload_folder / str(user_id)
+        if user_dir.exists():
+            shutil.rmtree(user_dir, ignore_errors=True)
+            logger.info(f"Cleaned up files for user {user_id}")
+
+    def get_filepath(self, filename: str, user_id: Optional[int] = None) -> Optional[str]:
+        """Resolve filepath from filename. Checks user dir first, then root."""
+        if user_id:
+            user_path = self.upload_folder / str(user_id) / safe_filename(filename)
+            if user_path.exists():
+                return str(user_path)
+
+        root_path = self.upload_folder / safe_filename(filename)
+        if root_path.exists():
+            return str(root_path)
+
+        return None
+
+    def validate_filepath(self, filepath: str) -> bool:
+        """Validate that filepath is within upload folder (prevent traversal)."""
+        try:
+            resolved = Path(filepath).resolve()
+            upload_resolved = self.upload_folder.resolve()
+            return str(resolved).startswith(str(upload_resolved))
+        except Exception:
+            return False
--- a/app/services/metadata_service.py
+++ b/app/services/metadata_service.py
@ -0,0 +1,186 @@
+"""Metadata processing orchestration: upload → detect → extract → generate."""
+
+import logging
+from pathlib import Path
+from typing import Dict, Optional
+
+from src.file_detector import FileDetector, FileType
+from src.extractors.pdf_extractor import PDFExtractor
+from src.extractors.image_extractor import ImageExtractor
+from src.extractors.office_extractor import OfficeExtractor
+from src.extractors.video_extractor import VideoExtractor
+from src.updaters.pdf_updater import PDFUpdater
+from src.updaters.image_updater import ImageUpdater
+from src.updaters.office_updater import OfficeUpdater
+from src.updaters.video_updater import VideoUpdater
+
+logger = logging.getLogger(__name__)
+
+# Extractor/updater instances (stateless, safe to share)
+EXTRACTORS = {
+    FileType.PDF: PDFExtractor(),
+    FileType.IMAGE: ImageExtractor(),
+    FileType.OFFICE_DOC: OfficeExtractor(),
+    FileType.OFFICE_SHEET: OfficeExtractor(),
+    FileType.OFFICE_PRESENTATION: OfficeExtractor(),
+    FileType.VIDEO: VideoExtractor(),
+}
+
+UPDATERS = {
+    FileType.PDF: PDFUpdater(),
+    FileType.IMAGE: ImageUpdater(),
+    FileType.OFFICE_DOC: OfficeUpdater(),
+    FileType.OFFICE_SHEET: OfficeUpdater(),
+    FileType.OFFICE_PRESENTATION: OfficeUpdater(),
+    FileType.VIDEO: VideoUpdater(),
+}
+
+
+def detect_file(filepath: str) -> FileType:
+    """Detect the type of a file."""
+    return FileDetector.detect_file_type(filepath)
+
+
+def extract_metadata(filepath: str, file_type: FileType) -> Dict[str, str]:
+    """Read current metadata from file."""
+    extractor = EXTRACTORS.get(file_type)
+    if not extractor:
+        return {}
+    try:
+        return extractor.read_metadata(filepath)
+    except Exception as e:
+        logger.error(f"Failed to extract metadata from {filepath}: {e}")
+        return {}
+
+
+def extract_content(filepath: str, file_type: FileType) -> str:
+    """Extract text content for AI analysis."""
+    extractor = EXTRACTORS.get(file_type)
+    if not extractor:
+        return ""
+    try:
+        return extractor.extract_content(filepath)
+    except Exception as e:
+        logger.error(f"Failed to extract content from {filepath}: {e}")
+        return ""
+
+
+def update_file_metadata(
+    filepath: str,
+    file_type: FileType,
+    metadata: Dict[str, str],
+    backup: bool = False,
+) -> bool:
+    """Write metadata to file. Returns True on success."""
+    updater = UPDATERS.get(file_type)
+    if not updater:
+        logger.error(f"No updater for file type: {file_type}")
+        return False
+    try:
+        return updater.update_metadata(filepath, metadata, backup=backup)
+    except Exception as e:
+        logger.error(f"Failed to update metadata for {filepath}: {e}")
+        return False
+
+
+def verify_file_metadata(
+    filepath: str,
+    file_type: FileType,
+    metadata: Dict[str, str],
+) -> bool:
+    """Verify metadata was written correctly."""
+    updater = UPDATERS.get(file_type)
+    if not updater:
+        return False
+    try:
+        return updater.verify_metadata(filepath, metadata)
+    except Exception as e:
+        logger.error(f"Failed to verify metadata for {filepath}: {e}")
+        return False
+
+
+async def process_uploaded_file(
+    filepath: str,
+    filename: str,
+    metadata_source: str,
+    lookup=None,
+    import_map=None,
+) -> Dict:
+    """Process a single uploaded file through the full pipeline.
+
+    Args:
+        filepath: Path to uploaded file on disk.
+        filename: Original filename.
+        metadata_source: One of 'excel', 'ai', 'manual', 'import'.
+        lookup: Excel lookup instance (for excel source).
+        import_map: Metadata map dict (for import source).
+
+    Returns:
+        Dict with file processing results.
+    """
+    file_type = detect_file(filepath)
+
+    if file_type == FileType.UNSUPPORTED:
+        return {"success": False, "filename": filename, "error": "Unsupported file type"}
+
+    # Read current metadata
+    old_metadata = extract_metadata(filepath, file_type)
+
+    # Generate new metadata based on source
+    excel_found = False
+    new_metadata = {"title": "", "subject": "", "keywords": ""}
+
+    if metadata_source == "excel" and lookup:
+        excel_data = lookup.lookup_by_filename(filename)
+        if excel_data:
+            new_metadata = {
+                "title": excel_data.get("title", ""),
+                "subject": excel_data.get("description", ""),
+                "keywords": "",
+            }
+            excel_found = True
+        else:
+            new_metadata = {
+                "title": Path(filename).stem,
+                "subject": f"No metadata found in Excel for {filename}",
+                "keywords": "",
+            }
+
+    elif metadata_source == "manual":
+        new_metadata = {
+            "title": Path(filename).stem,
+            "subject": "",
+            "keywords": "",
+        }
+
+    elif metadata_source == "ai":
+        from .ai_service import generate_metadata_async
+
+        content = extract_content(filepath, file_type)
+        new_metadata = await generate_metadata_async(content, filename, file_type)
+
+    elif metadata_source == "import" and import_map:
+        from src.metadata_importer import MetadataImporter
+
+        importer = MetadataImporter()
+        imported = importer.get_metadata_for_file(import_map, filename)
+        if imported:
+            new_metadata = imported
+            excel_found = True
+        else:
+            new_metadata = {
+                "title": Path(filename).stem,
+                "subject": f"No metadata found in imported file for {filename}",
+                "keywords": "",
+            }
+
+    return {
+        "success": True,
+        "filename": filename,
+        "file_type": file_type.value,
+        "current_metadata": old_metadata,
+        "suggested_metadata": new_metadata,
+        "filepath": filepath,
+        "metadata_source": metadata_source,
+        "excel_found": excel_found,
+    }
--- a/backend/app/processors/init.py
+++ b/backend/app/processors/init.py
--- a/app/session/store.py
+++ b/app/session/store.py
@ -0,0 +1,311 @@
+"""SQLite-backed session store for file processing and import sessions."""
+
+import json
+import sqlite3
+import secrets
+import logging
+from datetime import datetime, timedelta
+from typing import Optional, Dict, List, Any
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+class SessionStore:
+    """Persistent session store replacing in-memory dicts.
+
+    Stores file processing sessions and imported metadata maps in SQLite,
+    surviving server restarts and supporting multi-worker deployments.
+    """
+
+    def __init__(self, db_path: str):
+        self.db_path = db_path
+        Path(db_path).parent.mkdir(parents=True, exist_ok=True)
+        self._init_tables()
+
+    def _get_conn(self) -> sqlite3.Connection:
+        """Create a new connection per call (thread-safe)."""
+        conn = sqlite3.connect(self.db_path, timeout=10)
+        conn.row_factory = sqlite3.Row
+        conn.execute("PRAGMA journal_mode=WAL")
+        return conn
+
+    def _init_tables(self):
+        conn = self._get_conn()
+        try:
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS file_sessions (
+                    session_id TEXT PRIMARY KEY,
+                    user_id INTEGER NOT NULL,
+                    metadata_source TEXT DEFAULT 'manual',
+                    import_session_id TEXT DEFAULT '',
+                    files_json TEXT DEFAULT '[]',
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    expires_at TIMESTAMP NOT NULL
+                )
+            """)
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS import_sessions (
+                    session_id TEXT PRIMARY KEY,
+                    user_id INTEGER NOT NULL,
+                    session_type TEXT DEFAULT 'import',
+                    metadata_json TEXT DEFAULT '{}',
+                    file_info_json TEXT DEFAULT '{}',
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    expires_at TIMESTAMP NOT NULL
+                )
+            """)
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_fs_user ON file_sessions(user_id)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_fs_expires ON file_sessions(expires_at)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_is_user ON import_sessions(user_id)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_is_expires ON import_sessions(expires_at)")
+            conn.commit()
+            logger.info(f"Session store initialized at {self.db_path}")
+        finally:
+            conn.close()
+
+    # --- File Sessions ---
+
+    def create_file_session(
+        self,
+        user_id: int,
+        metadata_source: str = "manual",
+        import_session_id: str = "",
+        expires_hours: int = 24,
+    ) -> str:
+        """Create a new file processing session with a secure random ID."""
+        session_id = secrets.token_urlsafe(32)
+        expires_at = datetime.now() + timedelta(hours=expires_hours)
+        conn = self._get_conn()
+        try:
+            conn.execute(
+                "INSERT INTO file_sessions (session_id, user_id, metadata_source, import_session_id, expires_at) VALUES (?,?,?,?,?)",
+                (session_id, user_id, metadata_source, import_session_id, expires_at),
+            )
+            conn.commit()
+            logger.info(f"Created file session {session_id[:8]}... for user {user_id}")
+            return session_id
+        finally:
+            conn.close()
+
+    def get_file_session(self, session_id: str) -> Optional[Dict[str, Any]]:
+        """Get file session by ID. Returns None if expired or not found."""
+        conn = self._get_conn()
+        try:
+            row = conn.execute(
+                "SELECT * FROM file_sessions WHERE session_id = ? AND expires_at > datetime('now')",
+                (session_id,),
+            ).fetchone()
+            if row:
+                result = dict(row)
+                result["files"] = json.loads(result.pop("files_json"))
+                return result
+            return None
+        finally:
+            conn.close()
+
+    def add_file_to_session(self, session_id: str, file_entry: Dict[str, Any]):
+        """Add a processed file entry to a session.
+
+        If a file with the same filename already exists in the session,
+        it is replaced (deduplication for re-uploaded files).
+        """
+        conn = self._get_conn()
+        try:
+            row = conn.execute(
+                "SELECT files_json FROM file_sessions WHERE session_id = ?",
+                (session_id,),
+            ).fetchone()
+            if row:
+                files = json.loads(row["files_json"])
+                # Deduplicate: replace existing entry with same filename
+                filename = file_entry.get("filename", "")
+                existing_idx = next(
+                    (i for i, f in enumerate(files) if f.get("filename") == filename),
+                    None,
+                )
+                if existing_idx is not None:
+                    files[existing_idx] = file_entry
+                else:
+                    files.append(file_entry)
+                conn.execute(
+                    "UPDATE file_sessions SET files_json = ? WHERE session_id = ?",
+                    (json.dumps(files, ensure_ascii=False), session_id),
+                )
+                conn.commit()
+        finally:
+            conn.close()
+
+    def update_file_in_session(
+        self, session_id: str, file_index: int, updates: Dict[str, Any]
+    ):
+        """Update specific fields of a file entry within a session."""
+        conn = self._get_conn()
+        try:
+            row = conn.execute(
+                "SELECT files_json FROM file_sessions WHERE session_id = ?",
+                (session_id,),
+            ).fetchone()
+            if row:
+                files = json.loads(row["files_json"])
+                if 0 <= file_index < len(files):
+                    files[file_index].update(updates)
+                    conn.execute(
+                        "UPDATE file_sessions SET files_json = ? WHERE session_id = ?",
+                        (json.dumps(files, ensure_ascii=False), session_id),
+                    )
+                    conn.commit()
+        finally:
+            conn.close()
+
+    def get_file_session_files(self, session_id: str) -> List[Dict[str, Any]]:
+        """Get just the files list from a session."""
+        session = self.get_file_session(session_id)
+        if session:
+            return session["files"]
+        return []
+
+    def delete_file_session(self, session_id: str):
+        """Delete a file session."""
+        conn = self._get_conn()
+        try:
+            conn.execute("DELETE FROM file_sessions WHERE session_id = ?", (session_id,))
+            conn.commit()
+        finally:
+            conn.close()
+
+    def get_user_file_sessions(self, user_id: int) -> List[str]:
+        """Get all active session IDs for a user."""
+        conn = self._get_conn()
+        try:
+            rows = conn.execute(
+                "SELECT session_id FROM file_sessions WHERE user_id = ? AND expires_at > datetime('now')",
+                (user_id,),
+            ).fetchall()
+            return [row["session_id"] for row in rows]
+        finally:
+            conn.close()
+
+    # --- Import Sessions ---
+
+    def create_import_session(
+        self,
+        user_id: int,
+        session_type: str = "import",
+        metadata_map: Optional[Dict] = None,
+        file_info: Optional[Dict] = None,
+        expires_hours: int = 24,
+    ) -> str:
+        """Create an import/excel session."""
+        session_id = f"{session_type}_{secrets.token_urlsafe(8)}"
+        expires_at = datetime.now() + timedelta(hours=expires_hours)
+        conn = self._get_conn()
+        try:
+            conn.execute(
+                "INSERT INTO import_sessions (session_id, user_id, session_type, metadata_json, file_info_json, expires_at) VALUES (?,?,?,?,?,?)",
+                (
+                    session_id,
+                    user_id,
+                    session_type,
+                    json.dumps(metadata_map or {}, ensure_ascii=False),
+                    json.dumps(file_info or {}, ensure_ascii=False),
+                    expires_at,
+                ),
+            )
+            conn.commit()
+            logger.info(f"Created {session_type} session {session_id} for user {user_id}")
+            return session_id
+        finally:
+            conn.close()
+
+    def get_import_session(self, session_id: str) -> Optional[Dict[str, Any]]:
+        """Get import session by ID."""
+        conn = self._get_conn()
+        try:
+            row = conn.execute(
+                "SELECT * FROM import_sessions WHERE session_id = ? AND expires_at > datetime('now')",
+                (session_id,),
+            ).fetchone()
+            if row:
+                result = dict(row)
+                result["metadata_map"] = json.loads(result.pop("metadata_json"))
+                result["file_info"] = json.loads(result.pop("file_info_json"))
+                return result
+            return None
+        finally:
+            conn.close()
+
+    def update_import_session(
+        self,
+        session_id: str,
+        metadata_map: Optional[Dict] = None,
+        file_info: Optional[Dict] = None,
+    ):
+        """Update an import session's metadata map or file info."""
+        conn = self._get_conn()
+        try:
+            updates = []
+            params = []
+            if metadata_map is not None:
+                updates.append("metadata_json = ?")
+                params.append(json.dumps(metadata_map, ensure_ascii=False))
+            if file_info is not None:
+                updates.append("file_info_json = ?")
+                params.append(json.dumps(file_info, ensure_ascii=False))
+            if updates:
+                params.append(session_id)
+                conn.execute(
+                    f"UPDATE import_sessions SET {', '.join(updates)} WHERE session_id = ?",
+                    params,
+                )
+                conn.commit()
+        finally:
+            conn.close()
+
+    def delete_import_session(self, session_id: str):
+        """Delete an import session."""
+        conn = self._get_conn()
+        try:
+            conn.execute("DELETE FROM import_sessions WHERE session_id = ?", (session_id,))
+            conn.commit()
+        finally:
+            conn.close()
+
+    # --- Cleanup ---
+
+    def cleanup_expired(self) -> int:
+        """Remove all expired sessions. Returns count of deleted rows."""
+        conn = self._get_conn()
+        try:
+            c1 = conn.execute("DELETE FROM file_sessions WHERE expires_at < datetime('now')")
+            c2 = conn.execute("DELETE FROM import_sessions WHERE expires_at < datetime('now')")
+            conn.commit()
+            total = c1.rowcount + c2.rowcount
+            if total > 0:
+                logger.info(f"Cleaned up {total} expired sessions")
+            return total
+        finally:
+            conn.close()
+
+    def cleanup_user_sessions(self, user_id: int) -> List[str]:
+        """Delete all sessions for a user. Returns file paths for cleanup."""
+        conn = self._get_conn()
+        try:
+            # Collect file paths before deleting
+            rows = conn.execute(
+                "SELECT files_json FROM file_sessions WHERE user_id = ?",
+                (user_id,),
+            ).fetchall()
+            file_paths = []
+            for row in rows:
+                files = json.loads(row["files_json"])
+                for f in files:
+                    if f.get("filepath"):
+                        file_paths.append(f["filepath"])
+
+            conn.execute("DELETE FROM file_sessions WHERE user_id = ?", (user_id,))
+            conn.execute("DELETE FROM import_sessions WHERE user_id = ?", (user_id,))
+            conn.commit()
+            return file_paths
+        finally:
+            conn.close()
--- a/backend/.env
+++ b/backend/.env
@ -1,37 +0,0 @@
-# Backend Environment Configuration
-# Oliver Metadata Tool v4.0 - FastAPI
-
-# App
-APP_NAME=Oliver Metadata Tool
-APP_ENV=production
-DEBUG=False
-SECRET_KEY=your-secret-key-here-change-in-production
-CORS_ORIGINS=https://ai-sandbox.oliver.solutions
-
-# Database
-DATABASE_URL=sqlite+aiosqlite:///./data/oliver_metadata.db
-
-# Azure AD / MSAL
-AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
-AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
-AZURE_CLIENT_SECRET=your-client-secret
-REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-
-# OpenAI API
-OPENAI_API_KEY=your-openai-api-key-here
-OPENAI_MODEL=gpt-5.2
-OPENAI_API_BASE=https://api.openai.com/v1
-MAX_TOKENS=500
-TEMPERATURE=0.5
-
-# Redis
-REDIS_URL=redis://redis:6379/0
-
-# Application Settings
-BACKEND_PORT=5001
-UPLOAD_DIR=/app/uploads
-FRONTEND_URL=https://ai-sandbox.oliver.solutions/solventum-image-metadata
-
-# Rate Limiting (optional)
-RATE_LIMIT_PER_MINUTE=30
-RATE_LIMIT_PER_DAY=1000
--- a/backend/AI_FLOW_DIAGRAM.md
+++ b/backend/AI_FLOW_DIAGRAM.md
@ -1,322 +0,0 @@
-# AI Metadata Generation Flow Diagram
-
-## Complete Integration Flow
-
-```
-┌─────────────────────────────────────────────────────────────────────┐
-│                          CLIENT REQUEST                              │
-│                                                                       │
-│  POST /api/files/upload                                              │
-│  - files: [file1.pdf, file2.docx, ...]                              │
-│  - metadata_source: "ai"                                             │
-└─────────────────────────┬───────────────────────────────────────────┘
-                          │
-                          ▼
-┌─────────────────────────────────────────────────────────────────────┐
-│                    FILES ROUTER (files.py)                           │
-│                                                                       │
-│  @router.post("/upload")                                             │
-│  async def upload_files(                                             │
-│      files: List[UploadFile],                                        │
-│      metadata_source: str,                                           │
-│      metadata_service: MetadataService = Depends(...)                │
-│  )                                                                   │
-└─────────────────────────┬───────────────────────────────────────────┘
-                          │
-                          │ For each uploaded file:
-                          │
-                          ▼
-┌─────────────────────────────────────────────────────────────────────┐
-│                   FILE SERVICE (file_service.py)                     │
-│                                                                       │
-│  file_info = await file_service.save_upload(uploaded_file, user_id) │
-│  Returns: {file_id, filename, filepath, size, uploaded_at}           │
-└─────────────────────────┬───────────────────────────────────────────┘
-                          │
-                          ▼
-┌─────────────────────────────────────────────────────────────────────┐
-│                FILE DETECTOR (file_detector.py)                      │
-│                                                                       │
-│  file_type = FileDetector.detect_file_type(filepath)                 │
-│  Returns: FileType.PDF | FileType.IMAGE | FileType.OFFICE_DOC | ... │
-└─────────────────────────┬───────────────────────────────────────────┘
-                          │
-                          ▼
-┌─────────────────────────────────────────────────────────────────────┐
-│              METADATA SERVICE (metadata_service.py)                  │
-│                                                                       │
-│  1. Extract current metadata:                                        │
-│     current_metadata = await extract_current_metadata(filepath)      │
-│                                                                       │
-│  2. Generate suggested metadata:                                     │
-│     suggested_metadata = await generate_metadata(                    │
-│         filepath=filepath,                                           │
-│         filename=filename,                                           │
-│         source="ai"  ◄─── Routes to _generate_ai_metadata()         │
-│     )                                                                │
-└─────────────────────────┬───────────────────────────────────────────┘
-                          │
-                          │ source == "ai"
-                          │
-                          ▼
-┌─────────────────────────────────────────────────────────────────────┐
-│          _generate_ai_metadata() [NEW/FIXED]                        │
-│                                                                       │
-│  1. Check AI analyzer availability:                                  │
-│     analyzer = self.ai_analyzer                                      │
-│     if not analyzer:                                                 │
-│         return error_metadata  # No OPENAI_API_KEY                   │
-│                                                                       │
-│  2. Get appropriate extractor:                                       │
-│     extractor = self.get_extractor(file_type)                        │
-│                                                                       │
-│  3. Extract content from file:                                       │
-│     content = extractor.extract_content(filepath)                    │
-│     # PDF: PyPDF/pdfplumber                                          │
-│     # Image: pytesseract OCR                                         │
-│     # Office: python-docx/python-pptx                                │
-│     # Video: metadata-based                                          │
-│                                                                       │
-│  4. Call AI analyzer:                                                │
-│     metadata = analyzer.analyze_content(                             │
-│         content=content,           # Extracted text                  │
-│         filename=filename,         # Original name                   │
-│         file_type=file_type        # FileType enum [FIXED]           │
-│     )                                                                │
-└─────────────────────────┬───────────────────────────────────────────┘
-                          │
-                          ▼
-┌─────────────────────────────────────────────────────────────────────┐
-│           METADATA ANALYZER (metadata_analyzer.py)                   │
-│                                                                       │
-│  1. Count tokens in content:                                         │
-│     tokens = self._count_tokens(content)  # Using tiktoken           │
-│                                                                       │
-│  2. Truncate if needed:                                              │
-│     if tokens > MAX_TEXT_LENGTH:                                     │
-│         content = self._truncate_content(content, 4000)              │
-│                                                                       │
-│  3. Create specialized prompt:                                       │
-│     prompt = self._create_prompt(content, filename, file_type)       │
-│     # Different prompts for PDF, Image, Office, Video                │
-│                                                                       │
-│  4. Call OpenAI API with retry:                                      │
-│     response = self._call_openai_api([                               │
-│         {"role": "system", "content": "You are a metadata expert"},  │
-│         {"role": "user", "content": prompt}                          │
-│     ])                                                               │
-│     # Retry logic: 3 attempts, exponential backoff                   │
-│                                                                       │
-│  5. Parse JSON response:                                             │
-│     metadata = self._parse_metadata_response(response.content)       │
-│     # Returns: {title, subject, keywords}                            │
-│                                                                       │
-│  6. Add tracking info:                                               │
-│     metadata['_tokens_used'] = response.usage.total_tokens           │
-│     metadata['_confidence'] = 0.9                                    │
-└─────────────────────────┬───────────────────────────────────────────┘
-                          │
-                          │ Returns metadata dict
-                          │
-                          ▼
-┌─────────────────────────────────────────────────────────────────────┐
-│                    BACK TO FILES ROUTER                              │
-│                                                                       │
-│  Build FileUploadResponse:                                           │
-│  {                                                                   │
-│    file_id: "abc123",                                                │
-│    filename: "document.pdf",                                         │
-│    current_metadata: {...},  # Extracted from file                   │
-│    suggested_metadata: {     # Generated by AI                       │
-│      title: "3M Filtek Shade Selection Guide",                       │
-│      subject: "Comprehensive shade selection...",                    │
-│      keywords: "Filtek, dental, restorative, 3M, shade",             │
-│      _tokens_used: 1234                                              │
-│    },                                                                │
-│    metadata_source: "ai"                                             │
-│  }                                                                   │
-└─────────────────────────┬───────────────────────────────────────────┘
-                          │
-                          ▼
-┌─────────────────────────────────────────────────────────────────────┐
-│                    REDIS SESSION STORE                               │
-│                                                                       │
-│  session_id = await redis.create_file_session(                       │
-│      user_id=user_id,                                                │
-│      files_data=[file_results],                                      │
-│      metadata_source="ai",                                           │
-│      ttl=3600  # 1 hour                                              │
-│  )                                                                   │
-└─────────────────────────┬───────────────────────────────────────────┘
-                          │
-                          ▼
-┌─────────────────────────────────────────────────────────────────────┐
-│                    AUDIT LOG (database)                              │
-│                                                                       │
-│  await AuditLogRepository.log_action(                                │
-│      db,                                                             │
-│      user_id=user_id,                                                │
-│      action="file_upload",                                           │
-│      details="Uploaded 2 files with ai metadata"                     │
-│  )                                                                   │
-└─────────────────────────┬───────────────────────────────────────────┘
-                          │
-                          ▼
-┌─────────────────────────────────────────────────────────────────────┐
-│                      JSON RESPONSE                                   │
-│                                                                       │
-│  {                                                                   │
-│    success: true,                                                    │
-│    session_id: "file_session:xyz789",                                │
-│    files: [                                                          │
-│      {                                                               │
-│        file_id: "abc123",                                            │
-│        filename: "document.pdf",                                     │
-│        current_metadata: {...},                                      │
-│        suggested_metadata: {                                         │
-│          title: "...",                                               │
-│          subject: "...",                                             │
-│          keywords: "...",                                            │
-│          _tokens_used: 1234                                          │
-│        },                                                            │
-│        metadata_source: "ai"                                         │
-│      }                                                               │
-│    ],                                                                │
-│    message: "Uploaded 1 files successfully"                          │
-│  }                                                                   │
-└─────────────────────────────────────────────────────────────────────┘
-```
-
-## Key Components
-
-### 1. MetadataService (metadata_service.py)
- **Property**: `ai_analyzer` - Lazy-initialized MetadataAnalyzer
- **Method**: `generate_metadata()` - Routes to AI when source="ai"
- **Method**: `_generate_ai_metadata()` - Extracts content and calls AI
-
-### 2. MetadataAnalyzer (metadata_analyzer.py)
- **Method**: `analyze_content()` - Main AI generation method
- **Method**: `_count_tokens()` - Token counting with tiktoken
- **Method**: `_truncate_content()` - Smart content truncation
- **Method**: `_create_prompt()` - File-type-specific prompts
- **Method**: `_call_openai_api()` - API call with retry logic
- **Method**: `_parse_metadata_response()` - JSON parsing
-
-### 3. FileDetector (file_detector.py)
- **Method**: `detect_file_type()` - Returns FileType enum
- **Types**: PDF, IMAGE, OFFICE_DOC, OFFICE_SHEET, OFFICE_PRESENTATION, VIDEO
-
-### 4. Extractors (extractors/*.py)
- **PDFExtractor**: PyPDF + pdfplumber
- **ImageExtractor**: Pillow + pytesseract OCR
- **OfficeExtractor**: python-docx, python-pptx, openpyxl
- **VideoExtractor**: mutagen + pymediainfo
-
-## Error Handling Flow
-
-```
-┌─────────────────────────────────────────┐
-│  AI Generation Request                  │
-└────────────┬────────────────────────────┘
-             │
-             ▼
-┌────────────────────────────────────────────────────────┐
-│  Check: ai_analyzer available?                         │
-├────────────────────────────────────────────────────────┤
-│  NO  → Return: {                                       │
-│          title: filename,                              │
-│          subject: "AI requires OPENAI_API_KEY",        │
-│          keywords: ""                                  │
-│        }                                               │
-│                                                        │
-│  YES → Continue                                        │
-└────────────┬───────────────────────────────────────────┘
-             │
-             ▼
-┌────────────────────────────────────────────────────────┐
-│  Extract content from file                             │
-├────────────────────────────────────────────────────────┤
-│  Check: content sufficient? (>10 chars)                │
-│                                                        │
-│  NO  → Return: {                                       │
-│          title: filename,                              │
-│          subject: "No content for AI analysis",        │
-│          keywords: ""                                  │
-│        }                                               │
-│                                                        │
-│  YES → Continue                                        │
-└────────────┬───────────────────────────────────────────┘
-             │
-             ▼
-┌────────────────────────────────────────────────────────┐
-│  Call OpenAI API                                       │
-├────────────────────────────────────────────────────────┤
-│  Retry logic: 3 attempts with exponential backoff      │
-│                                                        │
-│  FAIL → Return: {                                      │
-│           title: filename,                             │
-│           subject: "AI generation failed: {error}",    │
-│           keywords: "",                                │
-│           _ai_error: error_message                     │
-│         }                                              │
-│                                                        │
-│  SUCCESS → Parse response and return metadata          │
-└────────────────────────────────────────────────────────┘
-```
-
-## Configuration Chain
-
-```
-.env file
-   │
-   ├─ OPENAI_API_KEY → Config.OPENAI_API_KEY
-   │                    ↓
-   │                    MetadataAnalyzer.__init__()
-   │                    (raises ValueError if not set)
-   │
-   ├─ OPENAI_MODEL   → Config.AI_MODEL  [NEW - supports both vars]
-   │  or AI_MODEL      ↓
-   │                    MetadataAnalyzer.model
-   │                    (falls back to gpt-4o-mini)
-   │
-   ├─ MAX_TOKENS     → Config.MAX_TOKENS
-   │                    ↓
-   │                    MetadataAnalyzer.max_tokens
-   │
-   └─ TEMPERATURE    → Config.TEMPERATURE
-                        ↓
-                        MetadataAnalyzer.temperature
-```
-
-## Files Modified
-
-1. ✅ `backend/app/services/metadata_service.py`
-   - ai_analyzer property (returns Optional)
-   - _generate_ai_metadata (fixed FileType parameter)
-
-2. ✅ `backend/app/processors/config.py`
-   - AI_MODEL (supports OPENAI_MODEL and AI_MODEL)
-
-3. ✅ `backend/test_ai_integration.py` (NEW)
-   - Integration test suite
-
-## Testing Commands
-
-```bash
-# 1. Syntax check
-cd backend
-python3 -m py_compile app/services/metadata_service.py
-
-# 2. Integration test
-python3 test_ai_integration.py
-
-# 3. Full backend test
-pip install -r requirements.txt
-uvicorn app.main:app --reload --port 8000
-
-# 4. API test
-curl -X POST http://localhost:8000/api/files/upload \
-  -H "Authorization: Bearer <token>" \
-  -F "files=@test.pdf" \
-  -F "metadata_source=ai"
-```
--- a/backend/AI_INTEGRATION_SUMMARY.md
+++ b/backend/AI_INTEGRATION_SUMMARY.md
@ -1,187 +0,0 @@
-# AI Metadata Generation Integration - Summary
-
-## Overview
-Successfully integrated AI metadata generation into the FastAPI backend. The MetadataAnalyzer is now fully integrated with the file upload endpoint, allowing users to generate metadata using OpenAI's GPT models.
-
-## Changes Made
-
-### 1. Fixed MetadataService AI Integration
-**File:** `backend/app/services/metadata_service.py`
-
-#### Changes:
- **Fixed `ai_analyzer` property** (lines 63-71):
-  - Changed return type from `MetadataAnalyzer` to `Optional[MetadataAnalyzer]`
-  - Added try-except to gracefully handle missing OPENAI_API_KEY
-  - Returns `None` instead of raising ValueError when API key not configured
-
- **Updated `_generate_ai_metadata` method** (lines 172-220):
-  - Added check for AI analyzer availability at the start
-  - Returns helpful error message if OPENAI_API_KEY not configured
-  - Fixed `analyze_content` call to pass `FileType` enum instead of string
-  - Improved error handling and fallback metadata
-
-### 2. Fixed Environment Variable Configuration
-**File:** `backend/app/processors/config.py`
-
-#### Changes:
- **Updated `AI_MODEL` configuration** (line 42):
-  - Changed from: `AI_MODEL = os.getenv('AI_MODEL', 'gpt-4o-mini')`
-  - Changed to: `AI_MODEL = os.getenv('OPENAI_MODEL') or os.getenv('AI_MODEL', 'gpt-4o-mini')`
-  - Now supports both `OPENAI_MODEL` and `AI_MODEL` environment variables
-  - Maintains backward compatibility with existing configs
-
-### 3. Created Integration Test
-**File:** `backend/test_ai_integration.py` (new)
-
-Created comprehensive test script that verifies:
- All imports work correctly
- MetadataService initializes properly
- AI analyzer is available (if OPENAI_API_KEY configured)
- AI metadata generation works end-to-end
-
-Run with: `python3 backend/test_ai_integration.py`
-
-## How AI Integration Works
-
-### Flow:
-1. **User uploads file** → POST `/api/files/upload` with `metadata_source="ai"`
-2. **FileService** saves the uploaded file
-3. **MetadataService.generate_metadata()** is called with `source="ai"`
-4. **Routes to `_generate_ai_metadata()`**:
-   - Detects file type (PDF, Image, Office, Video)
-   - Gets appropriate extractor for the file type
-   - Extracts content from the file
-   - Calls `MetadataAnalyzer.analyze_content()` with:
-     - `content`: Extracted text from file
-     - `filename`: Original filename
-     - `file_type`: FileType enum (PDF, IMAGE, etc.)
-5. **MetadataAnalyzer**:
-   - Truncates content to fit token limits
-   - Creates specialized prompt based on file type
-   - Calls OpenAI API with retry logic
-   - Parses JSON response into metadata dict
-   - Returns: `{title, subject, keywords, _tokens_used, _confidence}`
-6. **Response** sent back to frontend with suggested metadata
-
-### Error Handling:
- **No OPENAI_API_KEY**: Returns error message in metadata
- **Insufficient content**: Returns filename-based fallback metadata
- **API failures**: Automatic retry with exponential backoff (3 attempts)
- **Parsing errors**: Falls back to text-based parsing
-
-## Configuration
-
-### Required Environment Variables:
-```env
-# Required
-OPENAI_API_KEY=sk-...
-
-# Optional (with defaults)
-OPENAI_MODEL=gpt-4o-mini  # or AI_MODEL
-MAX_TOKENS=500
-TEMPERATURE=0.5
-MAX_TEXT_LENGTH=4000
-API_TIMEOUT=30
-API_MAX_RETRIES=3
-API_RETRY_DELAY=1.0
-```
-
-## Testing
-
-### 1. Syntax Check:
-```bash
-cd backend
-python3 -m py_compile app/services/metadata_service.py
-python3 -m py_compile app/api/files.py
-```
-✅ Both files compile without syntax errors
-
-### 2. Integration Test:
-```bash
-cd backend
-pip install -r requirements.txt
-python3 test_ai_integration.py
-```
-
-### 3. Manual API Test:
-```bash
-# Start backend
-cd backend
-uvicorn app.main:app --reload --port 8000
-
-# Upload file with AI generation
-curl -X POST http://localhost:8000/api/files/upload \
-  -H "Authorization: Bearer <token>" \
-  -F "files=@sample.pdf" \
-  -F "metadata_source=ai"
-```
-
-## Files Modified
-
-1. **backend/app/services/metadata_service.py**
-   - Lines 63-71: ai_analyzer property
-   - Lines 172-220: _generate_ai_metadata method
-
-2. **backend/app/processors/config.py**
-   - Line 42: AI_MODEL configuration
-
-3. **backend/test_ai_integration.py** (NEW)
-   - Complete integration test suite
-
-## Dependencies
-
-All required dependencies are already in `backend/requirements.txt`:
- `openai>=1.0.0` - OpenAI API client
- `tiktoken>=0.5.0` - Token counting
- `tenacity>=8.2.0` - Retry logic with exponential backoff
-
-## Notes
-
-### Unicode Support:
- MetadataAnalyzer fully supports Unicode (Chinese, Japanese, Korean)
- Uses custom `safe_filename()` - NEVER use `secure_filename()`
-
-### Token Tracking:
- Token usage logged to audit_log table
- Returned in metadata as `_tokens_used`
- Useful for cost tracking and monitoring
-
-### Model Support:
- Automatically detects model capabilities
- GPT-5/GPT-4o models: use `max_completion_tokens`
- GPT-3.5 models: use `max_tokens` + `temperature`
- Invalid models fall back to `gpt-4o-mini`
-
-### Content Truncation:
- Automatically truncates content to 4000 tokens
- Uses tiktoken for accurate token counting
- Character-based fallback if tiktoken unavailable
-
-## Next Steps
-
-1. Install dependencies: `pip install -r backend/requirements.txt`
-2. Configure OPENAI_API_KEY in backend/.env
-3. Run integration test: `python3 backend/test_ai_integration.py`
-4. Test via API with actual files
-5. Monitor token usage in audit logs
-
-## Verification Checklist
-
- [x] No syntax errors in modified files
- [x] AI analyzer property returns Optional[MetadataAnalyzer]
- [x] Graceful handling of missing OPENAI_API_KEY
- [x] FileType enum passed correctly to analyze_content()
- [x] Environment variable OPENAI_MODEL now supported
- [x] Integration test script created
- [x] All imports verified
- [x] Error handling comprehensive
-
-## Success Criteria Met
-
-✅ AI metadata generation integrated into FastAPI backend
-✅ MetadataAnalyzer properly connected to upload endpoint
-✅ No syntax errors in any modified files
-✅ Graceful error handling for missing API key
-✅ Configuration supports both OPENAI_MODEL and AI_MODEL
-✅ Comprehensive test script provided
-✅ Documentation complete
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -1,33 +0,0 @@
-# FastAPI Backend Dockerfile
-FROM python:3.11-slim
-
-WORKDIR /app
-
-# Install system dependencies
-RUN apt-get update && apt-get install -y \
-    libimage-exiftool-perl \
-    tesseract-ocr \
-    tesseract-ocr-chi-sim \
-    tesseract-ocr-chi-tra \
-    tesseract-ocr-jpn \
-    tesseract-ocr-kor \
-    poppler-utils \
-    ffmpeg \
-    && rm -rf /var/lib/apt/lists/*
-
-# Copy requirements and install Python dependencies
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
-
-# Copy application code
-COPY app/ ./app/
-COPY templates/ ./templates/
-
-# Create directories for data persistence
-RUN mkdir -p /app/uploads /app/data /app/output/templates
-
-# Expose port
-EXPOSE 8000
-
-# Run the application
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/backend/app/api/auth.py
+++ b/backend/app/api/auth.py
@ -1,347 +0,0 @@
-"""
-Authentication API Endpoints
-Handles login, logout, token refresh, and Microsoft SSO.
-"""
-
-from fastapi import APIRouter, Depends, HTTPException, status, Request
-from fastapi.responses import JSONResponse
-from sqlalchemy.ext.asyncio import AsyncSession
-from pydantic import BaseModel
-from typing import Optional
-import msal
-import os
-
-from app.core.database import get_db, UserRepository, AuditLogRepository
-from app.core.auth import (
-    verify_password,
-    hash_password,
-    create_tokens_response,
-    verify_refresh_token,
-    get_current_user_id,
-    validate_azure_id_token
-)
-from app.core.redis_client import RedisSessionStore
-
-
-router = APIRouter()
-
-
-# ===== Request/Response Models =====
-
-class LoginRequest(BaseModel):
-    username: str
-    password: str
-
-
-class LoginResponse(BaseModel):
-    access_token: str
-    refresh_token: str
-    token_type: str
-    expires_in: int
-    user: dict
-
-
-class TokenRefreshRequest(BaseModel):
-    refresh_token: str
-
-
-class LogoutRequest(BaseModel):
-    session_id: Optional[str] = None
-
-
-class MicrosoftLoginRequest(BaseModel):
-    id_token: str
-
-
-# ===== Local Authentication Endpoints =====
-
-@router.post("/login", response_model=LoginResponse)
-async def login(
-    login_data: LoginRequest,
-    request: Request,
-    db: AsyncSession = Depends(get_db)
-):
-    """
-    Local authentication - username/password login.
-
-    Returns JWT tokens + user info.
-    """
-    # Get user from database
-    user = await UserRepository.get_by_username(db, login_data.username)
-
-    # Validate user exists and password correct
-    if not user or not user.password_hash:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Invalid username or password"
-        )
-
-    if not verify_password(login_data.password, user.password_hash):
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Invalid username or password"
-        )
-
-    # Check if user is active
-    if not user.is_active:
-        raise HTTPException(
-            status_code=status.HTTP_403_FORBIDDEN,
-            detail="User account is disabled"
-        )
-
-    # Create JWT tokens
-    tokens = create_tokens_response(user.id)
-
-    # Create user session in Redis
-    redis: RedisSessionStore = request.app.state.redis
-    session_id = await redis.create_user_session(
-        user_id=user.id,
-        refresh_token=tokens["refresh_token"],
-        ip_address=request.client.host,
-        user_agent=request.headers.get("user-agent", "")
-    )
-
-    # Update last login
-    await UserRepository.update_last_login(db, user.id)
-
-    # Log action
-    await AuditLogRepository.log_action(
-        db,
-        user_id=user.id,
-        action="login",
-        details=f"Login from {request.client.host}"
-    )
-
-    return LoginResponse(
-        **tokens,
-        user=user.to_dict()
-    )
-
-
-@router.post("/token/refresh")
-async def refresh_access_token(
-    refresh_data: TokenRefreshRequest,
-    request: Request,
-    db: AsyncSession = Depends(get_db)
-):
-    """
-    Refresh access token using refresh token.
-    """
-    # Verify refresh token
-    try:
-        user_id = verify_refresh_token(refresh_data.refresh_token)
-    except HTTPException as e:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Invalid refresh token"
-        )
-
-    # Check if user still exists and is active
-    user = await UserRepository.get_by_id(db, user_id)
-    if not user or not user.is_active:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="User not found or inactive"
-        )
-
-    # Create new tokens
-    tokens = create_tokens_response(user.id)
-
-    # Update Redis session with new refresh token
-    redis: RedisSessionStore = request.app.state.redis
-    # Note: We keep the old session_id but update the refresh token
-    # In production, you might want to rotate session_id as well
-
-    return {
-        **tokens,
-        "user": user.to_dict()
-    }
-
-
-@router.post("/logout")
-async def logout(
-    logout_data: LogoutRequest,
-    request: Request,
-    user_id: int = Depends(get_current_user_id),
-    db: AsyncSession = Depends(get_db)
-):
-    """
-    Logout user - invalidate session in Redis.
-    """
-    # Delete user session from Redis
-    redis: RedisSessionStore = request.app.state.redis
-
-    if logout_data.session_id:
-        await redis.delete_user_session(logout_data.session_id)
-
-    # Log action
-    await AuditLogRepository.log_action(
-        db,
-        user_id=user_id,
-        action="logout",
-        details=f"Logout from {request.client.host}"
-    )
-
-    return {"message": "Logged out successfully"}
-
-
-# ===== Microsoft SSO Endpoints (Client-Side Flow) =====
-
-# Microsoft OAuth configuration
-AZURE_CLIENT_ID = os.getenv("AZURE_CLIENT_ID")
-AZURE_TENANT_ID = os.getenv("AZURE_TENANT_ID")
-
-
-@router.post("/microsoft/login", response_model=LoginResponse)
-async def login_with_microsoft(
-    login_data: MicrosoftLoginRequest,
-    request: Request,
-    db: AsyncSession = Depends(get_db)
-):
-    """
-    Authenticate with Microsoft id_token (client-side MSAL flow).
-
-    Frontend uses @azure/msal-browser to get id_token from Microsoft,
-    then sends it here for validation. Backend validates the JWT signature
-    and creates application JWT tokens for session management.
-
-    Args:
-        login_data: Request containing id_token from Microsoft
-        request: HTTP request for client info
-        db: Database session
-
-    Returns:
-        LoginResponse with application JWT tokens and user info
-
-    Raises:
-        HTTPException: If id_token is invalid or SSO not configured
-    """
-    if not AZURE_CLIENT_ID or not AZURE_TENANT_ID:
-        raise HTTPException(
-            status_code=status.HTTP_501_NOT_IMPLEMENTED,
-            detail="Microsoft SSO not configured"
-        )
-
-    # Validate id_token (JWT from Azure AD)
-    user_claims = validate_azure_id_token(
-        login_data.id_token,
-        AZURE_CLIENT_ID,
-        AZURE_TENANT_ID
-    )
-
-    # Extract user details from token claims
-    username = user_claims.get("preferred_username") or user_claims.get("email")
-    email = user_claims.get("email")
-    full_name = user_claims.get("name")
-
-    if not username:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Could not extract username from id_token"
-        )
-
-    # Create or update user in database
-    user = await UserRepository.get_by_username(db, username)
-
-    if not user:
-        # Create new SSO user
-        user = await UserRepository.create_user(
-            db,
-            username=username,
-            password_hash=None,  # SSO users don't have passwords
-            email=email,
-            full_name=full_name,
-            auth_method="sso"
-        )
-
-    # Check if user is active
-    if not user.is_active:
-        raise HTTPException(
-            status_code=status.HTTP_403_FORBIDDEN,
-            detail="User account is disabled"
-        )
-
-    # Create JWT tokens (for our app, not Azure tokens)
-    tokens = create_tokens_response(user.id)
-
-    # Create user session in Redis
-    redis: RedisSessionStore = request.app.state.redis
-    session_id = await redis.create_user_session(
-        user_id=user.id,
-        refresh_token=tokens["refresh_token"],
-        ip_address=request.client.host,
-        user_agent=request.headers.get("user-agent", "")
-    )
-
-    # Update last login
-    await UserRepository.update_last_login(db, user.id)
-
-    # Log action
-    await AuditLogRepository.log_action(
-        db,
-        user_id=user.id,
-        action="sso_login",
-        details=f"SSO login (client-side MSAL) from {request.client.host}"
-    )
-
-    return LoginResponse(
-        **tokens,
-        user=user.to_dict()
-    )
-
-
-# ===== User Info Endpoint =====
-
-@router.get("/me")
-async def get_current_user(
-    user_id: int = Depends(get_current_user_id),
-    db: AsyncSession = Depends(get_db)
-):
-    """
-    Get current user info from JWT token.
-    """
-    user = await UserRepository.get_by_id(db, user_id)
-
-    if not user:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail="User not found"
-        )
-
-    return user.to_dict()
-
-
-# ===== Admin Endpoints (for testing) =====
-
-@router.post("/register")
-async def register_user(
-    login_data: LoginRequest,
-    db: AsyncSession = Depends(get_db)
-):
-    """
-    Register new user (for testing/development).
-    In production, disable this or add admin auth.
-    """
-    # Check if user already exists
-    existing_user = await UserRepository.get_by_username(db, login_data.username)
-    if existing_user:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Username already exists"
-        )
-
-    # Create new user
-    password_hashed = hash_password(login_data.password)
-    user = await UserRepository.create_user(
-        db,
-        username=login_data.username,
-        password_hash=password_hashed,
-        email=None,
-        full_name=None,
-        auth_method="local"
-    )
-
-    return {
-        "message": "User created successfully",
-        "user": user.to_dict()
-    }
--- a/backend/app/api/files.py
+++ b/backend/app/api/files.py
@ -1,316 +0,0 @@
-"""
-File API Endpoints
-Handles file upload, download, and session management.
-"""
-
-from fastapi import APIRouter, UploadFile, File, Form, Depends, HTTPException, Request, status
-from fastapi.responses import FileResponse, StreamingResponse
-from sqlalchemy.ext.asyncio import AsyncSession
-from typing import List, Optional
-from pathlib import Path
-
-from app.core.auth import get_current_user_id
-from app.core.database import get_db, AuditLogRepository
-from app.core.redis_client import RedisSessionStore
-from app.services.file_service import get_file_service, FileService
-from app.services.metadata_service import get_metadata_service, MetadataService
-from app.processors.file_detector import FileDetector
-from app.models.file import (
-    UploadSessionResponse,
-    FileUploadResponse,
-    BatchDownloadRequest
-)
-
-
-router = APIRouter()
-
-
-@router.post("/upload", response_model=UploadSessionResponse)
-async def upload_files(
-    files: List[UploadFile] = File(...),
-    metadata_source: str = Form(...),
-    import_session_id: Optional[str] = Form(None),
-    excel_session_id: Optional[str] = Form(None),
-    template_name: Optional[str] = Form(None),
-    request: Request = None,
-    user_id: int = Depends(get_current_user_id),
-    db: AsyncSession = Depends(get_db),
-    file_service: FileService = Depends(get_file_service),
-    metadata_service: MetadataService = Depends(get_metadata_service)
-):
-    """
-    Upload files and generate metadata.
-
-    Args:
-        files: List of files to upload
-        metadata_source: Source of metadata ('manual', 'ai', 'excel', 'import', 'template')
-        import_session_id: Import session ID (for 'import' source)
-        excel_session_id: Excel session ID (for 'excel' source)
-        template_name: Template name (for 'template' source)
-    """
-    if not files:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="No files provided"
-        )
-
-    # Get import metadata if import source
-    import_metadata = None
-    if metadata_source == "import" and import_session_id:
-        redis: RedisSessionStore = request.app.state.redis
-        import_session = await redis.get_import_session(import_session_id)
-        if import_session:
-            import_metadata = import_session.get("metadata", {})
-
-    # Process each file
-    file_results = []
-
-    for uploaded_file in files:
-        try:
-            # Save file
-            file_info = await file_service.save_upload(uploaded_file, user_id)
-
-            # Detect file type
-            file_type = FileDetector.detect_file_type(file_info["filepath"])
-            file_type_str = FileDetector.get_file_type_name(file_type)
-
-            # Extract current metadata
-            current_metadata = await metadata_service.extract_current_metadata(
-                file_info["filepath"]
-            )
-
-            # Generate suggested metadata
-            suggested_metadata = await metadata_service.generate_metadata(
-                filepath=file_info["filepath"],
-                filename=file_info["filename"],
-                source=metadata_source,
-                import_metadata=import_metadata,
-                template_name=template_name
-            )
-
-            # Build file response
-            file_result = FileUploadResponse(
-                file_id=file_info["file_id"],
-                filename=file_info["filename"],
-                filepath=file_info["filepath"],
-                file_type=file_type_str,
-                size=file_info["size"],
-                uploaded_at=file_info["uploaded_at"],
-                current_metadata=current_metadata,
-                suggested_metadata=suggested_metadata,
-                metadata_source=metadata_source
-            )
-
-            file_results.append(file_result)
-
-        except Exception as e:
-            print(f"Error processing file {uploaded_file.filename}: {e}")
-            # Continue with other files
-            continue
-
-    if not file_results:
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail="Failed to process any files"
-        )
-
-    # Create file session in Redis
-    redis: RedisSessionStore = request.app.state.redis
-    session_id = await redis.create_file_session(
-        user_id=user_id,
-        files_data=[file.dict() for file in file_results],
-        metadata_source=metadata_source,
-        ttl=3600  # 1 hour
-    )
-
-    # Log action
-    await AuditLogRepository.log_action(
-        db,
-        user_id=user_id,
-        action="file_upload",
-        details=f"Uploaded {len(file_results)} files with {metadata_source} metadata"
-    )
-
-    return UploadSessionResponse(
-        success=True,
-        session_id=session_id,
-        files=file_results,
-        message=f"Uploaded {len(file_results)} files successfully"
-    )
-
-
-@router.get("/{file_id}/download")
-async def download_file(
-    file_id: str,
-    request: Request,
-    user_id: int = Depends(get_current_user_id),
-    db: AsyncSession = Depends(get_db),
-    file_service: FileService = Depends(get_file_service)
-):
-    """
-    Download a single file by file_id.
-    """
-    # Get all file sessions for user (simplified - in production use better lookup)
-    redis: RedisSessionStore = request.app.state.redis
-
-    # Search through file sessions to find the file
-    # Note: This is simplified. In production, you'd want a better indexing strategy
-    pattern = f"file_session:*"
-    session_keys = await redis.get_all_sessions(pattern)
-
-    file_path = None
-    filename = None
-
-    for session_key in session_keys:
-        session_data = await redis.redis.get(session_key)
-        if session_data:
-            import json
-            session = json.loads(session_data)
-
-            # Check if this session belongs to the user
-            if session.get("user_id") != user_id:
-                continue
-
-            # Search for file with matching file_id
-            for file_info in session.get("files", []):
-                if file_info.get("file_id") == file_id:
-                    file_path = file_info.get("filepath")
-                    filename = file_info.get("filename")
-                    break
-
-            if file_path:
-                break
-
-    if not file_path or not file_service.file_exists(file_path):
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail="File not found or access denied"
-        )
-
-    # Log action
-    await AuditLogRepository.log_action(
-        db,
-        user_id=user_id,
-        action="file_download",
-        details=f"Downloaded file: {filename}"
-    )
-
-    return FileResponse(
-        path=file_path,
-        filename=filename,
-        media_type="application/octet-stream"
-    )
-
-
-@router.post("/download-batch")
-async def download_batch(
-    download_request: BatchDownloadRequest,
-    request: Request,
-    user_id: int = Depends(get_current_user_id),
-    db: AsyncSession = Depends(get_db),
-    file_service: FileService = Depends(get_file_service)
-):
-    """
-    Download multiple files as ZIP archive.
-    """
-    # Get file session
-    redis: RedisSessionStore = request.app.state.redis
-    session_data = await redis.get_file_session(download_request.session_id)
-
-    if not session_data or session_data.get("user_id") != user_id:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail="Session not found or access denied"
-        )
-
-    # Get files from session
-    all_files = session_data.get("files", [])
-
-    # Filter by file_indices
-    selected_files = [
-        all_files[i] for i in download_request.file_indices
-        if i < len(all_files)
-    ]
-
-    if not selected_files:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="No valid files selected"
-        )
-
-    # Create ZIP archive
-    from datetime import datetime
-    zip_filename = f"oliver_metadata_files_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
-
-    zip_path = await file_service.create_zip_archive(
-        files=selected_files,
-        output_filename=zip_filename
-    )
-
-    # Log action
-    await AuditLogRepository.log_action(
-        db,
-        user_id=user_id,
-        action="batch_download",
-        details=f"Downloaded {len(selected_files)} files as ZIP"
-    )
-
-    return FileResponse(
-        path=str(zip_path),
-        filename=zip_filename,
-        media_type="application/zip"
-    )
-
-
-@router.delete("/session/{session_id}")
-async def cleanup_session(
-    session_id: str,
-    request: Request,
-    user_id: int = Depends(get_current_user_id),
-    db: AsyncSession = Depends(get_db),
-    file_service: FileService = Depends(get_file_service)
-):
-    """
-    Cleanup session - delete files and session data.
-    """
-    # Get file session
-    redis: RedisSessionStore = request.app.state.redis
-    session_data = await redis.get_file_session(session_id)
-
-    if not session_data or session_data.get("user_id") != user_id:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail="Session not found or access denied"
-        )
-
-    # Delete all files in session
-    files = session_data.get("files", [])
-    deleted_count = file_service.cleanup_session_files(files)
-
-    # Delete session from Redis
-    await redis.delete_file_session(session_id)
-
-    # Log action
-    await AuditLogRepository.log_action(
-        db,
-        user_id=user_id,
-        action="session_cleanup",
-        details=f"Cleaned up session {session_id}, deleted {deleted_count} files"
-    )
-
-    return {
-        "success": True,
-        "message": f"Session cleaned up, deleted {deleted_count} files"
-    }
-
-
-@router.get("/stats")
-async def get_storage_stats(
-    user_id: int = Depends(get_current_user_id),
-    file_service: FileService = Depends(get_file_service)
-):
-    """
-    Get storage statistics (admin/debug endpoint).
-    """
-    stats = file_service.get_storage_stats()
-    return stats
--- a/backend/app/api/import_api.py
+++ b/backend/app/api/import_api.py
@ -1,216 +0,0 @@
-"""
-Import API Endpoints
-Handles CSV/Excel/JSON import with column mapping.
-"""
-
-from fastapi import APIRouter, UploadFile, File, Depends, HTTPException, Request, status
-from sqlalchemy.ext.asyncio import AsyncSession
-from pathlib import Path
-import secrets
-
-from app.core.auth import get_current_user_id
-from app.core.database import get_db, AuditLogRepository
-from app.core.redis_client import RedisSessionStore
-from app.services.file_service import get_file_service, FileService
-from app.processors.metadata_importer import MetadataImporter
-from app.models.file import (
-    ImportFileResponse,
-    ImportMappingConfig,
-    ExcelSheetPreviewRequest
-)
-
-
-router = APIRouter()
-
-
-@router.post("/file", response_model=ImportFileResponse)
-async def upload_import_file(
-    import_file: UploadFile = File(...),
-    request: Request = None,
-    user_id: int = Depends(get_current_user_id),
-    db: AsyncSession = Depends(get_db),
-    file_service: FileService = Depends(get_file_service)
-):
-    """
-    Upload CSV/Excel/JSON file for metadata import.
-    """
-    # Save import file
-    file_info = await file_service.save_upload(import_file, user_id)
-
-    # Detect file type
-    file_ext = Path(file_info["filename"]).suffix.lower()
-    import_type = file_ext.replace('.', '')  # csv, xlsx, json
-
-    # Preview file structure
-    importer = MetadataImporter()
-    try:
-        columns, sample_data, suggestions = importer.preview_file_structure(file_info["filepath"])
-
-        # For Excel files, get sheet names
-        sheet_names = None
-        if import_type == 'xlsx':
-            import openpyxl
-            wb = openpyxl.load_workbook(file_info["filepath"])
-            sheet_names = wb.sheetnames
-
-        # Create import session in Redis
-        redis: RedisSessionStore = request.app.state.redis
-        import_session_id = await redis.create_import_session(
-            user_id=user_id,
-            import_type=import_type,
-            filename=file_info["filename"],
-            filepath=file_info["filepath"]
-        )
-
-        # Log action
-        await AuditLogRepository.log_action(
-            db,
-            user_id=user_id,
-            action="import_upload",
-            details=f"Uploaded {import_type} import file: {file_info['filename']}"
-        )
-
-        # Clean sample data - replace NaN with None for JSON serialization
-        clean_sample_data = None
-        if sample_data:
-            import json
-            import numpy as np
-            clean_sample_data = []
-            for row in sample_data[:5]:
-                clean_row = {}
-                for key, value in row.items():
-                    # Replace NaN/Inf with None
-                    if isinstance(value, float) and (np.isnan(value) or np.isinf(value)):
-                        clean_row[key] = None
-                    else:
-                        clean_row[key] = value
-                clean_sample_data.append(clean_row)
-
-        return ImportFileResponse(
-            success=True,
-            import_session_id=import_session_id,
-            filename=file_info["filename"],
-            import_type=import_type,
-            columns=columns,
-            sheet_names=sheet_names,
-            sample_data=clean_sample_data,
-            row_count=len(sample_data) if sample_data else 0
-        )
-
-    except Exception as e:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail=f"Failed to parse import file: {str(e)}"
-        )
-
-
-@router.post("/excel/preview")
-async def preview_excel_sheet(
-    preview_request: ExcelSheetPreviewRequest,
-    request: Request,
-    user_id: int = Depends(get_current_user_id)
-):
-    """
-    Preview specific Excel sheet.
-    """
-    # Get import session
-    redis: RedisSessionStore = request.app.state.redis
-    session_data = await redis.get_import_session(preview_request.excel_session_id)
-
-    if not session_data or session_data.get("user_id") != user_id:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail="Import session not found"
-        )
-
-    # Preview sheet
-    importer = MetadataImporter()
-    try:
-        import pandas as pd
-        import numpy as np
-        df = pd.read_excel(session_data["filepath"], sheet_name=preview_request.sheet_name)
-
-        # Clean sample data - replace NaN with None
-        sample_rows = df.head(5).to_dict('records')
-        clean_sample_data = []
-        for row in sample_rows:
-            clean_row = {}
-            for key, value in row.items():
-                if isinstance(value, float) and (np.isnan(value) or np.isinf(value)):
-                    clean_row[key] = None
-                else:
-                    clean_row[key] = value
-            clean_sample_data.append(clean_row)
-
-        return {
-            "success": True,
-            "columns": df.columns.tolist(),
-            "sample_data": clean_sample_data,
-            "row_count": len(df)
-        }
-    except Exception as e:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail=f"Failed to preview sheet: {str(e)}"
-        )
-
-
-@router.post("/configure")
-async def configure_import_mapping(
-    mapping_config: ImportMappingConfig,
-    request: Request,
-    user_id: int = Depends(get_current_user_id),
-    db: AsyncSession = Depends(get_db)
-):
-    """
-    Configure column mapping for import file.
-    """
-    # Get import session
-    redis: RedisSessionStore = request.app.state.redis
-    session_data = await redis.get_import_session(mapping_config.import_session_id)
-
-    if not session_data or session_data.get("user_id") != user_id:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail="Import session not found"
-        )
-
-    # Build column mapping dict
-    column_mapping = {
-        m.source_column: m.target_field
-        for m in mapping_config.column_mappings
-    }
-
-    # Import metadata with mapping
-    importer = MetadataImporter()
-    try:
-        metadata_map = importer.import_with_mapping(
-            session_data["filepath"],
-            column_mapping,
-            sheet_name=mapping_config.sheet_name
-        )
-
-        # Store metadata in session
-        await redis.update_import_metadata(
-            mapping_config.import_session_id,
-            metadata_map
-        )
-
-        # Log action
-        await AuditLogRepository.log_action(
-            db,
-            user_id=user_id,
-            action="import_configure",
-            details=f"Configured import mapping: {len(metadata_map)} records"
-        )
-
-        return {
-            "success": True,
-            "message": f"Import configured with {len(metadata_map)} records"
-        }
-
-    except Exception as e:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail=f"Failed to configure import: {str(e)}"
-        )
--- a/backend/app/api/metadata.py
+++ b/backend/app/api/metadata.py
@ -1,171 +0,0 @@
-"""
-Metadata API Endpoints
-Handles metadata updates and verification.
-"""
-
-from fastapi import APIRouter, Depends, HTTPException, Request, status
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.core.auth import get_current_user_id
-from app.core.database import get_db, AuditLogRepository
-from app.core.redis_client import RedisSessionStore
-from app.services.metadata_service import get_metadata_service, MetadataService
-from app.models.file import (
-    FileMetadataUpdate,
-    BatchMetadataUpdate,
-    MetadataUpdateResponse
-)
-
-
-router = APIRouter()
-
-
-@router.put("/{file_id}")
-async def update_file_metadata(
-    file_id: str,
-    update_data: FileMetadataUpdate,
-    request: Request,
-    user_id: int = Depends(get_current_user_id),
-    db: AsyncSession = Depends(get_db),
-    metadata_service: MetadataService = Depends(get_metadata_service)
-):
-    """
-    Update metadata for a single file.
-    """
-    # Get file session
-    redis: RedisSessionStore = request.app.state.redis
-    session_data = await redis.get_file_session(update_data.session_id)
-
-    if not session_data or session_data.get("user_id") != user_id:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail="Session not found or access denied"
-        )
-
-    # Get file from session
-    files = session_data.get("files", [])
-    if update_data.file_index >= len(files):
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Invalid file index"
-        )
-
-    file_info = files[update_data.file_index]
-
-    if file_info.get("file_id") != file_id:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="File ID mismatch"
-        )
-
-    # Update metadata
-    success, message = await metadata_service.update_file_metadata(
-        filepath=file_info["filepath"],
-        metadata=update_data.metadata.dict(exclude_none=True)
-    )
-
-    if not success:
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail=message
-        )
-
-    # Update session with new metadata
-    file_info["suggested_metadata"] = update_data.metadata.dict(exclude_none=True)
-    files[update_data.file_index] = file_info
-    await redis.update_file_session(update_data.session_id, files)
-
-    # Log action
-    await AuditLogRepository.log_action(
-        db,
-        user_id=user_id,
-        action="metadata_update",
-        details=f"Updated metadata for file: {file_info['filename']}"
-    )
-
-    return MetadataUpdateResponse(
-        success=True,
-        file_id=file_id,
-        filename=file_info["filename"],
-        verified="verified" in message.lower(),
-        message=message
-    )
-
-
-@router.post("/batch-update")
-async def batch_update_metadata(
-    update_data: BatchMetadataUpdate,
-    request: Request,
-    user_id: int = Depends(get_current_user_id),
-    db: AsyncSession = Depends(get_db),
-    metadata_service: MetadataService = Depends(get_metadata_service)
-):
-    """
-    Update metadata for multiple files with same metadata.
-    """
-    # Get file session
-    redis: RedisSessionStore = request.app.state.redis
-    session_data = await redis.get_file_session(update_data.session_id)
-
-    if not session_data or session_data.get("user_id") != user_id:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail="Session not found or access denied"
-        )
-
-    # Get files from session
-    files = session_data.get("files", [])
-
-    # Update each file
-    results = []
-    metadata_dict = update_data.metadata.dict(exclude_none=True)
-
-    for file_index in update_data.file_indices:
-        if file_index >= len(files):
-            continue
-
-        file_info = files[file_index]
-
-        try:
-            # Update metadata
-            success, message = await metadata_service.update_file_metadata(
-                filepath=file_info["filepath"],
-                metadata=metadata_dict
-            )
-
-            results.append({
-                "file_id": file_info["file_id"],
-                "filename": file_info["filename"],
-                "success": success,
-                "message": message
-            })
-
-            # Update session
-            if success:
-                file_info["suggested_metadata"] = metadata_dict
-                files[file_index] = file_info
-
-        except Exception as e:
-            results.append({
-                "file_id": file_info.get("file_id"),
-                "filename": file_info.get("filename"),
-                "success": False,
-                "message": str(e)
-            })
-
-    # Update session with new metadata
-    await redis.update_file_session(update_data.session_id, files)
-
-    # Log action
-    await AuditLogRepository.log_action(
-        db,
-        user_id=user_id,
-        action="batch_metadata_update",
-        details=f"Updated metadata for {len(update_data.file_indices)} files"
-    )
-
-    return {
-        "success": True,
-        "results": results,
-        "message": f"Updated {len(results)} files"
-    }
--- a/backend/app/api/templates.py
+++ b/backend/app/api/templates.py
@ -1,198 +0,0 @@
-"""
-Templates API Endpoints
-Handles template CRUD operations and application.
-"""
-
-from fastapi import APIRouter, Depends, HTTPException, Request, status
-from sqlalchemy.ext.asyncio import AsyncSession
-from typing import List
-
-from app.core.auth import get_current_user_id
-from app.core.database import get_db, AuditLogRepository
-from app.services.metadata_service import get_metadata_service, MetadataService
-from app.models.file import (
-    TemplateCreate,
-    TemplateResponse,
-    TemplateApply,
-    TemplatePreview
-)
-
-
-router = APIRouter()
-
-
-@router.get("/", response_model=List[TemplateResponse])
-async def list_templates(
-    metadata_service: MetadataService = Depends(get_metadata_service),
-    user_id: int = Depends(get_current_user_id)
-):
-    """List all available templates."""
-    templates = metadata_service.template_manager.list_templates()
-    return [TemplateResponse(**template) for template in templates]
-
-
-@router.post("/", status_code=status.HTTP_201_CREATED)
-async def create_template(
-    template_data: TemplateCreate,
-    db: AsyncSession = Depends(get_db),
-    metadata_service: MetadataService = Depends(get_metadata_service),
-    user_id: int = Depends(get_current_user_id)
-):
-    """Create a new template."""
-    template = {
-        "name": template_data.name,
-        "title": template_data.title,
-        "subject": template_data.subject,
-        "keywords": template_data.keywords,
-        "description": template_data.description
-    }
-
-    metadata_service.template_manager.save_template(template)
-
-    await AuditLogRepository.log_action(
-        db,
-        user_id=user_id,
-        action="template_create",
-        details=f"Created template: {template_data.name}"
-    )
-
-    return {"success": True, "message": "Template created", "template": template}
-
-
-@router.get("/{template_name}", response_model=TemplateResponse)
-async def get_template(
-    template_name: str,
-    metadata_service: MetadataService = Depends(get_metadata_service),
-    user_id: int = Depends(get_current_user_id)
-):
-    """Get template by name."""
-    template = metadata_service.template_manager.load_template(template_name)
-    if not template:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"Template '{template_name}' not found"
-        )
-    return TemplateResponse(**template)
-
-
-@router.delete("/{template_name}")
-async def delete_template(
-    template_name: str,
-    db: AsyncSession = Depends(get_db),
-    metadata_service: MetadataService = Depends(get_metadata_service),
-    user_id: int = Depends(get_current_user_id)
-):
-    """Delete template."""
-    success = metadata_service.template_manager.delete_template(template_name)
-
-    if not success:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"Template '{template_name}' not found"
-        )
-
-    await AuditLogRepository.log_action(
-        db,
-        user_id=user_id,
-        action="template_delete",
-        details=f"Deleted template: {template_name}"
-    )
-
-    return {"success": True, "message": "Template deleted"}
-
-
-@router.post("/preview")
-async def preview_template(
-    preview_data: TemplatePreview,
-    metadata_service: MetadataService = Depends(get_metadata_service),
-    user_id: int = Depends(get_current_user_id)
-):
-    """Preview template output."""
-    template = {
-        "title": preview_data.title,
-        "subject": preview_data.subject,
-        "keywords": preview_data.keywords
-    }
-
-    result = metadata_service.template_manager.apply_template(
-        template=template,
-        filename=preview_data.sample_filename,
-        user="user",
-        custom_vars=preview_data.custom_vars or {}
-    )
-
-    return {"preview": result}
-
-
-@router.post("/apply")
-async def apply_template(
-    apply_data: TemplateApply,
-    request: Request,
-    db: AsyncSession = Depends(get_db),
-    metadata_service: MetadataService = Depends(get_metadata_service),
-    user_id: int = Depends(get_current_user_id)
-):
-    """
-    Apply template to files in session with variable substitution.
-
-    Loads template, applies to each file with variable substitution,
-    updates session with suggested metadata.
-    """
-    # Load template
-    template = metadata_service.template_manager.load_template(apply_data.template_name)
-    if not template:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"Template '{apply_data.template_name}' not found"
-        )
-
-    # Get file session from Redis
-    redis = request.app.state.redis
-    file_session = await redis.get_file_session(apply_data.session_id)
-    if not file_session:
-        raise HTTPException(
-            status_code=status.HTTP_404_NOT_FOUND,
-            detail="Session not found or expired"
-        )
-
-    files = file_session.get("files", [])
-    results = []
-
-    # Apply template to each selected file
-    for file_index in apply_data.file_indices:
-        if file_index >= len(files):
-            results.append({"index": file_index, "success": False, "error": "Invalid file index"})
-            continue
-
-        file_info = files[file_index]
-        filename = file_info.get("filename", "")
-
-        # Apply template with variable substitution
-        try:
-            metadata = metadata_service.template_manager.apply_template(
-                template=template,
-                filename=filename,
-                user=f"user_{user_id}",
-                custom_vars=apply_data.custom_vars or {}
-            )
-
-            # Update file's suggested metadata in session
-            file_info["suggested_metadata"] = metadata
-            results.append({"index": file_index, "success": True, "metadata": metadata})
-
-        except Exception as e:
-            results.append({"index": file_index, "success": False, "error": str(e)})
-
-    # Update session with modified files
-    file_session["files"] = files
-    await redis.update_file_session(apply_data.session_id, file_session)
-
-    # Log action
-    await AuditLogRepository.log_action(
-        db,
-        user_id=user_id,
-        action="template_apply",
-        details=f"Applied template '{apply_data.template_name}' to {len(apply_data.file_indices)} files"
-    )
-
-    return {"success": True, "results": results}
--- a/backend/app/core/auth.py
+++ b/backend/app/core/auth.py
@ -1,311 +0,0 @@
-"""
-JWT Authentication
-Replaces Flask session-based auth with JWT tokens + Redis refresh tokens.
-"""
-
-from datetime import datetime, timedelta
-from typing import Optional
-from jose import JWTError, jwt
-from passlib.context import CryptContext
-from fastapi import Depends, HTTPException, status
-from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
-import os
-
-# Password hashing
-pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
-
-# JWT Configuration
-SECRET_KEY = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
-ALGORITHM = "HS256"
-ACCESS_TOKEN_EXPIRE_MINUTES = 30
-REFRESH_TOKEN_EXPIRE_DAYS = 7
-
-# Security scheme
-security = HTTPBearer()
-
-
-# ===== Password Hashing =====
-
-def hash_password(password: str) -> str:
-    """
-    Hash a password using bcrypt.
-
-    Args:
-        password: Plain text password
-
-    Returns:
-        Hashed password
-    """
-    return pwd_context.hash(password)
-
-
-def verify_password(plain_password: str, hashed_password: str) -> bool:
-    """
-    Verify a password against its hash.
-
-    Args:
-        plain_password: Plain text password
-        hashed_password: Hashed password from database
-
-    Returns:
-        True if password matches, False otherwise
-    """
-    return pwd_context.verify(plain_password, hashed_password)
-
-
-# ===== JWT Token Creation =====
-
-def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
-    """
-    Create JWT access token (short-lived, 30 minutes).
-
-    Args:
-        data: Payload data (typically {"sub": user_id})
-        expires_delta: Optional custom expiration time
-
-    Returns:
-        JWT token string
-    """
-    to_encode = data.copy()
-
-    if expires_delta:
-        expire = datetime.utcnow() + expires_delta
-    else:
-        expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
-
-    to_encode.update({
-        "exp": expire,
-        "type": "access"
-    })
-
-    encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
-    return encoded_jwt
-
-
-def create_refresh_token(user_id: int) -> str:
-    """
-    Create JWT refresh token (long-lived, 7 days).
-    Stored in Redis for validation.
-
-    Args:
-        user_id: User ID from database
-
-    Returns:
-        JWT refresh token string
-    """
-    expire = datetime.utcnow() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
-
-    to_encode = {
-        "sub": str(user_id),
-        "exp": expire,
-        "type": "refresh"
-    }
-
-    encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
-    return encoded_jwt
-
-
-# ===== JWT Token Validation =====
-
-def decode_token(token: str) -> dict:
-    """
-    Decode and validate JWT token.
-
-    Args:
-        token: JWT token string
-
-    Returns:
-        Decoded payload
-
-    Raises:
-        HTTPException: If token is invalid or expired
-    """
-    try:
-        payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
-        return payload
-    except JWTError as e:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail=f"Invalid token: {str(e)}",
-            headers={"WWW-Authenticate": "Bearer"},
-        )
-
-
-def verify_access_token(token: str) -> int:
-    """
-    Verify access token and extract user ID.
-
-    Args:
-        token: JWT access token
-
-    Returns:
-        user_id: User ID from token
-
-    Raises:
-        HTTPException: If token is invalid or not an access token
-    """
-    payload = decode_token(token)
-
-    # Check token type
-    if payload.get("type") != "access":
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Invalid token type",
-            headers={"WWW-Authenticate": "Bearer"},
-        )
-
-    # Extract user ID
-    user_id = payload.get("sub")
-    if user_id is None:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Invalid token payload",
-            headers={"WWW-Authenticate": "Bearer"},
-        )
-
-    return int(user_id)
-
-
-def verify_refresh_token(token: str) -> int:
-    """
-    Verify refresh token and extract user ID.
-
-    Args:
-        token: JWT refresh token
-
-    Returns:
-        user_id: User ID from token
-
-    Raises:
-        HTTPException: If token is invalid or not a refresh token
-    """
-    payload = decode_token(token)
-
-    # Check token type
-    if payload.get("type") != "refresh":
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Invalid token type",
-            headers={"WWW-Authenticate": "Bearer"},
-        )
-
-    # Extract user ID
-    user_id = payload.get("sub")
-    if user_id is None:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Invalid token payload",
-            headers={"WWW-Authenticate": "Bearer"},
-        )
-
-    return int(user_id)
-
-
-# ===== FastAPI Dependencies =====
-
-async def get_current_user_id(
-    credentials: HTTPAuthorizationCredentials = Depends(security)
-) -> int:
-    """
-    FastAPI dependency to get current user ID from JWT token.
-    Use this to protect endpoints: @router.get("/protected", dependencies=[Depends(get_current_user_id)])
-
-    Args:
-        credentials: HTTP Bearer credentials from Authorization header
-
-    Returns:
-        user_id: Current user's ID
-
-    Raises:
-        HTTPException: If token is invalid
-    """
-    token = credentials.credentials
-    user_id = verify_access_token(token)
-    return user_id
-
-
-# ===== Helper Functions =====
-
-def create_tokens_response(user_id: int) -> dict:
-    """
-    Create both access and refresh tokens for login response.
-
-    Args:
-        user_id: User ID from database
-
-    Returns:
-        Dict with access_token, refresh_token, token_type
-    """
-    access_token = create_access_token({"sub": str(user_id)})
-    refresh_token = create_refresh_token(user_id)
-
-    return {
-        "access_token": access_token,
-        "refresh_token": refresh_token,
-        "token_type": "bearer",
-        "expires_in": ACCESS_TOKEN_EXPIRE_MINUTES * 60  # seconds
-    }
-
-
-# ===== Azure AD ID Token Validation =====
-
-def validate_azure_id_token(id_token: str, client_id: str, tenant_id: str) -> dict:
-    """
-    Validate Azure AD id_token (JWT from Microsoft).
-
-    This validates the JWT signature using Microsoft's public keys,
-    verifies the issuer and audience, and extracts user claims.
-
-    Args:
-        id_token: ID token JWT string from Azure AD
-        client_id: Azure application client ID (audience)
-        tenant_id: Azure tenant ID
-
-    Returns:
-        Decoded token payload with user claims (email, name, etc.)
-
-    Raises:
-        HTTPException: If token is invalid, expired, or signature verification fails
-    """
-    import jwt
-    from jwt import PyJWKClient
-
-    try:
-        # Get Microsoft's public signing keys
-        jwks_url = f"https://login.microsoftonline.com/{tenant_id}/discovery/v2.0/keys"
-        jwks_client = PyJWKClient(jwks_url)
-
-        # Get the signing key from the JWT header
-        signing_key = jwks_client.get_signing_key_from_jwt(id_token)
-
-        # Decode and validate the token
-        decoded = jwt.decode(
-            id_token,
-            signing_key.key,
-            algorithms=["RS256"],
-            audience=client_id,
-            issuer=f"https://login.microsoftonline.com/{tenant_id}/v2.0"
-        )
-
-        return decoded
-
-    except jwt.ExpiredSignatureError:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="ID token has expired"
-        )
-    except jwt.InvalidAudienceError:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Invalid token audience (client ID mismatch)"
-        )
-    except jwt.InvalidIssuerError:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Invalid token issuer (tenant ID mismatch)"
-        )
-    except Exception as e:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail=f"ID token validation failed: {str(e)}"
-        )
--- a/backend/app/core/database.py
+++ b/backend/app/core/database.py
@ -1,229 +0,0 @@
-"""
-Database Models and Session Management
-Uses SQLAlchemy async ORM for database operations.
-Keeps existing schema: users, audit_log tables.
-"""
-
-from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
-from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
-from sqlalchemy import String, Integer, Boolean, DateTime, Text, func, select
-from datetime import datetime
-from typing import Optional
-import os
-
-
-# Database URL from environment
-DATABASE_URL = os.getenv(
-    "DATABASE_URL",
-    "sqlite+aiosqlite:///./oliver_metadata.db"
-)
-
-# Create async engine
-engine = create_async_engine(
-    DATABASE_URL,
-    echo=os.getenv("DEBUG") == "true",  # Log SQL queries in debug mode
-    future=True
-)
-
-# Create async session factory
-AsyncSessionLocal = async_sessionmaker(
-    engine,
-    class_=AsyncSession,
-    expire_on_commit=False,
-    autocommit=False,
-    autoflush=False
-)
-
-
-# Base class for models
-class Base(DeclarativeBase):
-    pass
-
-
-# ===== Models =====
-
-class User(Base):
-    """User model - keeps existing schema from Flask app"""
-    __tablename__ = "users"
-
-    id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
-    username: Mapped[str] = mapped_column(String(100), unique=True, nullable=False, index=True)
-    password_hash: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)  # Nullable for SSO users
-    email: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
-    full_name: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
-    auth_method: Mapped[str] = mapped_column(String(20), default="local", nullable=False)  # 'local' or 'sso'
-    is_active: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False)
-    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), nullable=False)
-    last_login: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
-
-    def to_dict(self):
-        """Convert model to dict for JSON serialization"""
-        return {
-            "id": self.id,
-            "username": self.username,
-            "email": self.email,
-            "full_name": self.full_name,
-            "auth_method": self.auth_method,
-            "is_active": self.is_active,
-            "created_at": self.created_at.isoformat() if self.created_at else None,
-            "last_login": self.last_login.isoformat() if self.last_login else None,
-        }
-
-
-class AuditLog(Base):
-    """Audit log model - tracks user actions"""
-    __tablename__ = "audit_log"
-
-    id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
-    user_id: Mapped[int] = mapped_column(Integer, nullable=False, index=True)
-    action: Mapped[str] = mapped_column(String(100), nullable=False, index=True)
-    details: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
-    timestamp: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), nullable=False, index=True)
-
-    def to_dict(self):
-        """Convert model to dict for JSON serialization"""
-        return {
-            "id": self.id,
-            "user_id": self.user_id,
-            "action": self.action,
-            "details": self.details,
-            "timestamp": self.timestamp.isoformat() if self.timestamp else None,
-        }
-
-
-# ===== Database Initialization =====
-
-async def init_db():
-    """
-    Initialize database - create tables if they don't exist.
-    Called on application startup.
-    """
-    async with engine.begin() as conn:
-        # Create all tables
-        await conn.run_sync(Base.metadata.create_all)
-
-
-# ===== Database Session Dependency =====
-
-async def get_db() -> AsyncSession:
-    """
-    FastAPI dependency to get database session.
-    Use as: db: AsyncSession = Depends(get_db)
-    """
-    async with AsyncSessionLocal() as session:
-        try:
-            yield session
-        finally:
-            await session.close()
-
-
-# ===== Database Helper Functions =====
-
-class UserRepository:
-    """Repository pattern for User operations"""
-
-    @staticmethod
-    async def get_by_id(db: AsyncSession, user_id: int) -> Optional[User]:
-        """Get user by ID"""
-        result = await db.execute(select(User).where(User.id == user_id))
-        return result.scalar_one_or_none()
-
-    @staticmethod
-    async def get_by_username(db: AsyncSession, username: str) -> Optional[User]:
-        """Get user by username"""
-        result = await db.execute(select(User).where(User.username == username))
-        return result.scalar_one_or_none()
-
-    @staticmethod
-    async def get_by_email(db: AsyncSession, email: str) -> Optional[User]:
-        """Get user by email"""
-        result = await db.execute(select(User).where(User.email == email))
-        return result.scalar_one_or_none()
-
-    @staticmethod
-    async def create_user(
-        db: AsyncSession,
-        username: str,
-        password_hash: Optional[str],
-        email: Optional[str],
-        full_name: Optional[str],
-        auth_method: str = "local"
-    ) -> User:
-        """Create new user"""
-        user = User(
-            username=username,
-            password_hash=password_hash,
-            email=email,
-            full_name=full_name,
-            auth_method=auth_method,
-            is_active=True
-        )
-        db.add(user)
-        await db.commit()
-        await db.refresh(user)
-        return user
-
-    @staticmethod
-    async def update_last_login(db: AsyncSession, user_id: int):
-        """Update user's last login timestamp"""
-        result = await db.execute(select(User).where(User.id == user_id))
-        user = result.scalar_one_or_none()
-        if user:
-            user.last_login = datetime.utcnow()
-            await db.commit()
-
-    @staticmethod
-    async def get_all_users(db: AsyncSession) -> list[User]:
-        """Get all users"""
-        result = await db.execute(select(User))
-        return list(result.scalars().all())
-
-
-class AuditLogRepository:
-    """Repository pattern for AuditLog operations"""
-
-    @staticmethod
-    async def log_action(
-        db: AsyncSession,
-        user_id: int,
-        action: str,
-        details: Optional[str] = None
-    ) -> AuditLog:
-        """Create audit log entry"""
-        log_entry = AuditLog(
-            user_id=user_id,
-            action=action,
-            details=details
-        )
-        db.add(log_entry)
-        await db.commit()
-        await db.refresh(log_entry)
-        return log_entry
-
-    @staticmethod
-    async def get_user_activity(
-        db: AsyncSession,
-        user_id: int,
-        limit: int = 100
-    ) -> list[AuditLog]:
-        """Get user activity logs"""
-        result = await db.execute(
-            select(AuditLog)
-            .where(AuditLog.user_id == user_id)
-            .order_by(AuditLog.timestamp.desc())
-            .limit(limit)
-        )
-        return list(result.scalars().all())
-
-    @staticmethod
-    async def get_all_activity(
-        db: AsyncSession,
-        limit: int = 1000
-    ) -> list[AuditLog]:
-        """Get all activity logs"""
-        result = await db.execute(
-            select(AuditLog)
-            .order_by(AuditLog.timestamp.desc())
-            .limit(limit)
-        )
-        return list(result.scalars().all())
--- a/backend/app/core/redis_client.py
+++ b/backend/app/core/redis_client.py
@ -1,341 +0,0 @@
-"""
-Redis Session Store
-Replaces in-memory session dictionaries with persistent Redis storage.
-Solves the main problem: sessions lost on restart.
-"""
-
-from redis.asyncio import Redis
-from typing import Optional, Dict, Any
-import json
-import secrets
-
-
-class RedisSessionStore:
-    """
-    Redis-based session storage for:
-    1. User login sessions (JWT refresh tokens)
-    2. File processing sessions (uploaded files + metadata)
-    3. Import sessions (Excel/CSV metadata lookups)
-    """
-
-    def __init__(self, redis_url: str):
-        """
-        Initialize Redis connection.
-
-        Args:
-            redis_url: Redis connection string (e.g., "redis://localhost:6379/0")
-        """
-        self.redis = Redis.from_url(redis_url, decode_responses=True)
-
-    async def close(self):
-        """Close Redis connection"""
-        await self.redis.close()
-
-    # ===== User Session Methods =====
-
-    async def create_user_session(
-        self,
-        user_id: int,
-        refresh_token: str,
-        ip_address: str,
-        user_agent: str,
-        ttl: int = 7 * 86400  # 7 days
-    ) -> str:
-        """
-        Create a new user login session.
-
-        Args:
-            user_id: User ID from database
-            refresh_token: JWT refresh token
-            ip_address: Client IP address
-            user_agent: Client user agent string
-            ttl: Time to live in seconds (default: 7 days)
-
-        Returns:
-            session_id: Unique session identifier
-        """
-        session_id = secrets.token_urlsafe(32)
-
-        session_data = {
-            "user_id": user_id,
-            "refresh_token": refresh_token,
-            "ip_address": ip_address,
-            "user_agent": user_agent
-        }
-
-        await self.redis.setex(
-            f"user_session:{session_id}",
-            ttl,
-            json.dumps(session_data)
-        )
-
-        return session_id
-
-    async def get_user_session(self, session_id: str) -> Optional[Dict[str, Any]]:
-        """
-        Retrieve user session data.
-
-        Args:
-            session_id: Session identifier
-
-        Returns:
-            Session data dict or None if not found/expired
-        """
-        data = await self.redis.get(f"user_session:{session_id}")
-        return json.loads(data) if data else None
-
-    async def delete_user_session(self, session_id: str) -> bool:
-        """
-        Delete user session (logout).
-
-        Args:
-            session_id: Session identifier
-
-        Returns:
-            True if deleted, False if not found
-        """
-        result = await self.redis.delete(f"user_session:{session_id}")
-        return result > 0
-
-    # ===== File Processing Session Methods =====
-
-    async def create_file_session(
-        self,
-        user_id: int,
-        files_data: list[Dict[str, Any]],
-        metadata_source: str,
-        ttl: int = 3600  # 1 hour
-    ) -> str:
-        """
-        Create file processing session (replaces in-memory sessions dict).
-
-        Args:
-            user_id: User ID who uploaded files
-            files_data: List of file info dicts (filename, filepath, metadata, etc.)
-            metadata_source: Source of metadata ('excel', 'ai', 'manual', 'import', 'template')
-            ttl: Time to live in seconds (default: 1 hour)
-
-        Returns:
-            session_id: Unique session identifier
-        """
-        session_id = secrets.token_urlsafe(16)
-
-        session_data = {
-            "user_id": user_id,
-            "files": files_data,
-            "metadata_source": metadata_source
-        }
-
-        await self.redis.setex(
-            f"file_session:{session_id}",
-            ttl,
-            json.dumps(session_data)
-        )
-
-        return session_id
-
-    async def get_file_session(self, session_id: str) -> Optional[Dict[str, Any]]:
-        """
-        Retrieve file processing session.
-
-        Args:
-            session_id: Session identifier
-
-        Returns:
-            Session data dict or None if not found/expired
-        """
-        data = await self.redis.get(f"file_session:{session_id}")
-        return json.loads(data) if data else None
-
-    async def update_file_session(
-        self,
-        session_id: str,
-        files_data: list[Dict[str, Any]]
-    ) -> bool:
-        """
-        Update file session with new metadata (after user edits).
-
-        Args:
-            session_id: Session identifier
-            files_data: Updated file data list
-
-        Returns:
-            True if updated, False if session not found
-        """
-        # Get current session to preserve TTL
-        current_data = await self.get_file_session(session_id)
-        if not current_data:
-            return False
-
-        # Update files data
-        current_data["files"] = files_data
-
-        # Get remaining TTL
-        ttl = await self.redis.ttl(f"file_session:{session_id}")
-        if ttl <= 0:
-            ttl = 3600  # Default 1 hour if expired
-
-        # Save with preserved TTL
-        await self.redis.setex(
-            f"file_session:{session_id}",
-            ttl,
-            json.dumps(current_data)
-        )
-
-        return True
-
-    async def delete_file_session(self, session_id: str) -> bool:
-        """
-        Delete file processing session (cleanup after download).
-
-        Args:
-            session_id: Session identifier
-
-        Returns:
-            True if deleted, False if not found
-        """
-        result = await self.redis.delete(f"file_session:{session_id}")
-        return result > 0
-
-    # ===== Import Session Methods =====
-
-    async def create_import_session(
-        self,
-        user_id: int,
-        import_type: str,  # 'excel' or 'csv' or 'json'
-        filename: str,
-        filepath: str,
-        metadata: Optional[Dict[str, Any]] = None,
-        ttl: int = 3600  # 1 hour
-    ) -> str:
-        """
-        Create import session for Excel/CSV/JSON metadata lookup.
-
-        Args:
-            user_id: User ID who uploaded import file
-            import_type: Type of import file
-            filename: Original filename
-            filepath: Path to uploaded file
-            metadata: Optional metadata map (after configuration)
-            ttl: Time to live in seconds (default: 1 hour)
-
-        Returns:
-            session_id: Unique session identifier
-        """
-        session_id = secrets.token_urlsafe(16)
-
-        session_data = {
-            "user_id": user_id,
-            "import_type": import_type,
-            "filename": filename,
-            "filepath": filepath,
-            "metadata": metadata or {}
-        }
-
-        await self.redis.setex(
-            f"import_session:{session_id}",
-            ttl,
-            json.dumps(session_data)
-        )
-
-        return session_id
-
-    async def get_import_session(self, session_id: str) -> Optional[Dict[str, Any]]:
-        """
-        Retrieve import session.
-
-        Args:
-            session_id: Session identifier
-
-        Returns:
-            Session data dict or None if not found/expired
-        """
-        data = await self.redis.get(f"import_session:{session_id}")
-        return json.loads(data) if data else None
-
-    async def update_import_metadata(
-        self,
-        session_id: str,
-        metadata: Dict[str, Any]
-    ) -> bool:
-        """
-        Update import session with configured metadata mappings.
-
-        Args:
-            session_id: Session identifier
-            metadata: Metadata lookup map (filename -> metadata dict)
-
-        Returns:
-            True if updated, False if session not found
-        """
-        current_data = await self.get_import_session(session_id)
-        if not current_data:
-            return False
-
-        current_data["metadata"] = metadata
-
-        ttl = await self.redis.ttl(f"import_session:{session_id}")
-        if ttl <= 0:
-            ttl = 3600
-
-        await self.redis.setex(
-            f"import_session:{session_id}",
-            ttl,
-            json.dumps(current_data)
-        )
-
-        return True
-
-    # ===== Utility Methods =====
-
-    async def ping(self) -> bool:
-        """
-        Check if Redis is connected.
-
-        Returns:
-            True if connected, False otherwise
-        """
-        try:
-            await self.redis.ping()
-            return True
-        except Exception:
-            return False
-
-    async def get_all_sessions(self, pattern: str = "*") -> list[str]:
-        """
-        Get all session keys matching pattern (for debugging).
-
-        Args:
-            pattern: Redis key pattern (e.g., "file_session:*")
-
-        Returns:
-            List of session keys
-        """
-        cursor = 0
-        keys = []
-        while True:
-            cursor, batch = await self.redis.scan(cursor, match=pattern, count=100)
-            keys.extend(batch)
-            if cursor == 0:
-                break
-        return keys
-
-    async def cleanup_expired_sessions(self):
-        """
-        Cleanup expired sessions (Redis does this automatically with TTL,
-        but this can be called for manual cleanup if needed).
-        """
-        # Redis automatically removes expired keys, but we can force cleanup
-        # This is mainly for monitoring/logging purposes
-        patterns = ["user_session:*", "file_session:*", "import_session:*"]
-        total_cleaned = 0
-
-        for pattern in patterns:
-            keys = await self.get_all_sessions(pattern)
-            for key in keys:
-                ttl = await self.redis.ttl(key)
-                if ttl <= 0:
-                    await self.redis.delete(key)
-                    total_cleaned += 1
-
-        return total_cleaned
--- a/backend/app/main.py
+++ b/backend/app/main.py
@ -1,143 +0,0 @@
-"""
-Oliver Metadata Tool - FastAPI Backend
-Main application entry point with CORS, middleware, and routers.
-"""
-
-from fastapi import FastAPI, Request
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse, FileResponse
-from fastapi.staticfiles import StaticFiles
-from fastapi.templating import Jinja2Templates
-from contextlib import asynccontextmanager
-import os
-from pathlib import Path
-
-from app.api import auth, files, metadata, templates
-from app.api import import_api
-from app.core.redis_client import RedisSessionStore
-from app.core.database import init_db
-
-# Jinja2 Templates for Flask UI compatibility
-TEMPLATE_DIR = Path(__file__).parent.parent / "templates"
-jinja_templates = Jinja2Templates(directory=str(TEMPLATE_DIR))
-
-
-# Lifespan context manager for startup/shutdown events
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    """Application lifespan: startup and shutdown logic"""
-    # Startup
-    print("🚀 Starting Oliver Metadata Tool API...")
-
-    # Initialize database
-    await init_db()
-    print("✅ Database initialized")
-
-    # Initialize Redis
-    redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
-    app.state.redis = RedisSessionStore(redis_url)
-    print(f"✅ Redis connected: {redis_url}")
-
-    yield
-
-    # Shutdown
-    print("👋 Shutting down Oliver Metadata Tool API...")
-    await app.state.redis.close()
-
-
-# Create FastAPI app
-app = FastAPI(
-    title="Oliver Metadata Tool API",
-    description="Universal metadata creation and management API for files",
-    version="4.0.0",
-    lifespan=lifespan
-)
-
-
-# CORS Configuration
-# Allow React frontend to make requests from different origin
-origins = [
-    "http://localhost:3000",  # React dev server
-    "http://localhost:5173",  # Vite dev server
-    "http://localhost:80",    # Production frontend
-    os.getenv("FRONTEND_URL", ""),  # Custom frontend URL from env
-]
-
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-
-# Include routers with /api prefix
-app.include_router(auth.router, prefix="/api/auth", tags=["auth"])
-app.include_router(files.router, prefix="/api/files", tags=["files"])
-app.include_router(metadata.router, prefix="/api/metadata", tags=["metadata"])
-app.include_router(templates.router, prefix="/api/templates", tags=["templates"])
-app.include_router(import_api.router, prefix="/api/import", tags=["import"])
-
-
-# Serve Flask HTML templates (hybrid mode)
-@app.get("/")
-async def root(request: Request):
-    """Serve Flask index.html template"""
-    # Check if user is authenticated (simplified for now)
-    return jinja_templates.TemplateResponse(
-        "index.html",
-        {
-            "request": request,
-            "username": None,  # Will be set by JavaScript from JWT
-            "docker_mode": os.getenv("DOCKER_MODE", "false") == "true"
-        }
-    )
-
-@app.get("/login")
-async def login_page(request: Request):
-    """Serve Flask login.html template"""
-    return jinja_templates.TemplateResponse(
-        "login.html",
-        {
-            "request": request,
-            "sso_enabled": bool(os.getenv("AZURE_CLIENT_ID"))
-        }
-    )
-
-
-# Health check endpoint
-@app.get("/health")
-async def health_check():
-    """Health check endpoint for Docker/K8s"""
-    return {
-        "status": "healthy",
-        "database": "connected",  # Will check actual DB later
-        "redis": "connected"      # Will check actual Redis later
-    }
-
-
-# Global exception handler
-@app.exception_handler(Exception)
-async def global_exception_handler(request, exc):
-    """Handle all uncaught exceptions"""
-    return JSONResponse(
-        status_code=500,
-        content={
-            "error": "Internal server error",
-            "detail": str(exc) if os.getenv("DEBUG") == "true" else "An error occurred"
-        }
-    )
-
-
-if __name__ == "__main__":
-    import uvicorn
-
-    # Run with: python -m app.main
-    uvicorn.run(
-        "app.main:app",
-        host="0.0.0.0",
-        port=8000,
-        reload=True,  # Auto-reload on code changes
-        log_level="info"
-    )
--- a/backend/app/models/file.py
+++ b/backend/app/models/file.py
@ -1,172 +0,0 @@
-"""
-Pydantic Models for File Operations
-Request/Response schemas for file upload, metadata, etc.
-"""
-
-from pydantic import BaseModel, Field
-from typing import Optional, List, Dict, Any
-from datetime import datetime
-
-
-# ===== File Upload Models =====
-
-class FileUploadResponse(BaseModel):
-    """Response after file upload"""
-    file_id: str
-    filename: str
-    filepath: str
-    file_type: str
-    size: int
-    uploaded_at: str
-    current_metadata: Dict[str, Optional[str]]
-    suggested_metadata: Dict[str, Optional[str]]
-    metadata_source: str
-
-
-class UploadSessionResponse(BaseModel):
-    """Response with session ID and uploaded files"""
-    success: bool
-    session_id: str
-    files: List[FileUploadResponse]
-    message: Optional[str] = None
-
-
-# ===== Metadata Models =====
-
-class MetadataUpdate(BaseModel):
-    """Metadata update request"""
-    title: str = Field(..., max_length=200, description="Title (required)")
-    subject: Optional[str] = Field(None, max_length=300, description="Subject")
-    keywords: Optional[str] = Field(None, max_length=500, description="Keywords")
-    author: Optional[str] = Field(None, max_length=100, description="Author")
-    copyright: Optional[str] = Field(None, max_length=150, description="Copyright")
-    comments: Optional[str] = Field(None, max_length=500, description="Comments")
-    custom_fields: Optional[Dict[str, str]] = Field(None, description="Custom metadata fields")
-
-
-class FileMetadataUpdate(BaseModel):
-    """Update metadata for a single file"""
-    session_id: str
-    file_index: int
-    metadata: MetadataUpdate
-
-
-class BatchMetadataUpdate(BaseModel):
-    """Update metadata for multiple files"""
-    session_id: str
-    file_indices: List[int]
-    metadata: MetadataUpdate
-
-
-class MetadataUpdateResponse(BaseModel):
-    """Response after metadata update"""
-    success: bool
-    file_id: str
-    filename: str
-    verified: bool
-    message: str
-
-
-# ===== Download Models =====
-
-class BatchDownloadRequest(BaseModel):
-    """Request to download multiple files as ZIP"""
-    session_id: str
-    file_indices: List[int]
-
-
-# ===== Import/Excel Models =====
-
-class ImportFileResponse(BaseModel):
-    """Response after importing metadata file"""
-    success: bool
-    import_session_id: str
-    filename: str
-    import_type: str  # 'csv', 'excel', 'json'
-    columns: Optional[List[str]] = None
-    sheet_names: Optional[List[str]] = None  # For Excel only
-    sample_data: Optional[List[Dict[str, Any]]] = None
-    row_count: Optional[int] = None
-
-
-class ColumnMapping(BaseModel):
-    """Column mapping configuration"""
-    source_column: str
-    target_field: str  # 'filename', 'title', 'subject', 'keywords', 'author', etc.
-    confidence: Optional[float] = None
-
-
-class ImportMappingConfig(BaseModel):
-    """Import mapping configuration"""
-    import_session_id: str
-    sheet_name: Optional[str] = None  # For Excel
-    column_mappings: List[ColumnMapping]
-
-
-class ExcelSheetPreviewRequest(BaseModel):
-    """Request to preview Excel sheet"""
-    excel_session_id: str
-    sheet_name: str
-
-
-# ===== Template Models =====
-
-class TemplateCreate(BaseModel):
-    """Create new template"""
-    name: str = Field(..., max_length=100)
-    title: str = Field(..., max_length=500)
-    subject: Optional[str] = Field(None, max_length=500)
-    keywords: Optional[str] = Field(None, max_length=500)
-    description: Optional[str] = Field(None, max_length=1000)
-
-
-class TemplateApply(BaseModel):
-    """Apply template to files"""
-    session_id: str
-    template_name: str
-    file_indices: List[int]
-    custom_vars: Optional[Dict[str, str]] = None
-
-
-class TemplatePreview(BaseModel):
-    """Preview template output"""
-    title: str
-    subject: Optional[str] = None
-    keywords: Optional[str] = None
-    sample_filename: str = "example.pdf"
-    custom_vars: Optional[Dict[str, str]] = None
-
-
-class TemplateResponse(BaseModel):
-    """Template data response"""
-    name: str
-    title: str
-    subject: Optional[str] = None
-    keywords: Optional[str] = None
-    description: Optional[str] = None
-
-
-# ===== Session Cleanup =====
-
-class SessionCleanupRequest(BaseModel):
-    """Request to cleanup session files"""
-    session_id: str
-
-
-# ===== Stats Models =====
-
-class StorageStats(BaseModel):
-    """Storage statistics"""
-    total_files: int
-    total_size_bytes: int
-    total_size_mb: float
-    total_users: int
-
-
-class UserActivity(BaseModel):
-    """User activity log entry"""
-    id: int
-    user_id: int
-    action: str
-    details: Optional[str]
-    timestamp: str
--- a/backend/app/processors/base_extractor.py
+++ b/backend/app/processors/base_extractor.py
@ -1,64 +0,0 @@
-"""Base class for all content extractors."""
-
-from abc import ABC, abstractmethod
-from typing import Dict, Optional
-
-class BaseExtractor(ABC):
-    """Abstract base class for content extractors."""
-
-    @abstractmethod
-    def extract_content(self, file_path: str) -> str:
-        """
-        Extract text content from file.
-
-        Args:
-            file_path: Path to the file
-
-        Returns:
-            Extracted text content
-        """
-        pass
-
-    @abstractmethod
-    def read_metadata(self, file_path: str) -> Dict[str, str]:
-        """
-        Read existing metadata from file.
-
-        Args:
-            file_path: Path to the file
-
-        Returns:
-            Dictionary of metadata fields
-        """
-        pass
-
-    def truncate_content(self, content: str, max_length: int = 3000) -> str:
-        """
-        Truncate content to maximum length for AI processing.
-
-        Args:
-            content: Text content
-            max_length: Maximum length
-
-        Returns:
-            Truncated content
-        """
-        if len(content) <= max_length:
-            return content
-        return content[:max_length] + "..."
-
-    def clean_text(self, text: str) -> str:
-        """
-        Clean extracted text (remove excessive whitespace, etc.).
-
-        Args:
-            text: Raw text
-
-        Returns:
-            Cleaned text
-        """
-        # Remove multiple spaces
-        text = ' '.join(text.split())
-        # Remove multiple newlines
-        text = '\n'.join(line for line in text.split('\n') if line.strip())
-        return text.strip()
--- a/backend/app/processors/base_updater.py
+++ b/backend/app/processors/base_updater.py
@ -1,60 +0,0 @@
-"""Base class for all metadata updaters."""
-
-from abc import ABC, abstractmethod
-from typing import Dict, Optional
-
-class BaseUpdater(ABC):
-    """Abstract base class for metadata updaters."""
-
-    @abstractmethod
-    def update_metadata(self, file_path: str, metadata: Dict[str, str], backup: bool = True) -> bool:
-        """
-        Update file metadata.
-
-        Args:
-            file_path: Path to the file
-            metadata: Dictionary of metadata to update
-            backup: Whether to create backup before updating
-
-        Returns:
-            True if successful, False otherwise
-        """
-        pass
-
-    @abstractmethod
-    def verify_metadata(self, file_path: str, expected_metadata: Dict[str, str]) -> bool:
-        """
-        Verify metadata was written correctly.
-
-        Args:
-            file_path: Path to the file
-            expected_metadata: Expected metadata values
-
-        Returns:
-            True if metadata matches expected values
-        """
-        pass
-
-    def validate_metadata(self, metadata: Dict[str, str]) -> bool:
-        """
-        Validate metadata before writing.
-
-        Args:
-            metadata: Metadata dictionary
-
-        Returns:
-            True if valid
-        """
-        # Check for required fields
-        required_fields = ['title']
-        for field in required_fields:
-            if field not in metadata or not metadata[field]:
-                return False
-
-        # Check field lengths
-        if len(metadata.get('title', '')) > 200:
-            return False
-        if len(metadata.get('keywords', '')) > 500:
-            return False
-
-        return True
--- a/backend/app/processors/config.py
+++ b/backend/app/processors/config.py
@ -1,70 +0,0 @@
-"""Configuration management for Oliver Metadata Tool."""
-
-import os
-import shutil
-import logging
-from pathlib import Path
-from dotenv import load_dotenv
-
-# Load environment variables
-load_dotenv()
-
-logger = logging.getLogger(__name__)
-
-class Config:
-    """Configuration class for managing settings."""
-
-    # App Info
-    APP_NAME = "Oliver Metadata Tool"
-    APP_VERSION = "3.0.0"
-    APP_DESCRIPTION = "Universal metadata creation and management tool"
-
-    # Paths
-    PROJECT_ROOT = Path(__file__).parent.parent
-    OUTPUT_DIR = PROJECT_ROOT / 'output'
-    BACKUP_DIR = OUTPUT_DIR / 'backup'
-    REPORTS_DIR = OUTPUT_DIR / 'reports'
-
-    # External tool paths (optional)
-    TESSERACT_PATH = os.getenv('TESSERACT_PATH')
-    FFMPEG_PATH = os.getenv('FFMPEG_PATH')
-
-    # Processing Settings
-    PDF_MAX_PAGES = 3  # Maximum pages to extract from PDF
-
-    # OCR Settings - languages for Tesseract (CGA region support)
-    # eng=English, chi_sim=Chinese Simplified, chi_tra=Chinese Traditional,
-    # jpn=Japanese, kor=Korean
-    OCR_LANGUAGES = os.getenv('OCR_LANGUAGES', 'eng+chi_sim+chi_tra+jpn+kor')
-
-    # AI Settings (for CLI and Web AI mode)
-    OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
-    AI_MODEL = os.getenv('OPENAI_MODEL') or os.getenv('AI_MODEL', 'gpt-4o-mini')  # Support both env vars
-    MAX_TOKENS = int(os.getenv('MAX_TOKENS', '500'))
-    TEMPERATURE = float(os.getenv('TEMPERATURE', '0.5'))  # 0.5 better for factual content
-    MAX_TEXT_LENGTH = int(os.getenv('MAX_TEXT_LENGTH', '4000'))
-
-    # API Rate Limiting & Retry (from open source analysis)
-    API_TIMEOUT = int(os.getenv('API_TIMEOUT', '30'))
-    API_MAX_RETRIES = int(os.getenv('API_MAX_RETRIES', '3'))
-    API_RETRY_DELAY = float(os.getenv('API_RETRY_DELAY', '1.0'))  # exponential backoff multiplier
-
-    @classmethod
-    def ensure_directories(cls):
-        """Ensure required directories exist."""
-        cls.OUTPUT_DIR.mkdir(exist_ok=True)
-        cls.BACKUP_DIR.mkdir(exist_ok=True)
-        cls.REPORTS_DIR.mkdir(exist_ok=True)
-
-    @classmethod
-    def check_exiftool(cls):
-        """Check if ExifTool is installed."""
-        exiftool_path = shutil.which('exiftool')
-        if not exiftool_path:
-            logger.warning("⚠️  ExifTool not found. Install with: brew install exiftool (macOS) or apt-get install libimage-exiftool-perl (Linux)")
-            return False
-        logger.info(f"✓ ExifTool found at {exiftool_path}")
-        return True
-
-# Ensure directories on import
-Config.ensure_directories()
--- a/backend/app/processors/excel_metadata_lookup.py
+++ b/backend/app/processors/excel_metadata_lookup.py
@ -1,171 +0,0 @@
-"""Excel-based metadata lookup service."""
-
-import pandas as pd
-from pathlib import Path
-from typing import Dict, Optional
-from .utils import get_logger
-
-logger = get_logger(__name__)
-
-
-class ExcelMetadataLookup:
-    """Lookup metadata from Excel spreadsheet by filename."""
-
-    def __init__(self, excel_path: str):
-        """
-        Initialize the lookup service.
-
-        Args:
-            excel_path: Path to the Excel file with metadata
-        """
-        self.excel_path = Path(excel_path)
-        self.filename_to_metadata = {}
-        self._load_excel()
-
-    def _load_excel(self):
-        """Load and index the Excel file from multiple sheets."""
-        try:
-            logger.info(f"Loading metadata from: {self.excel_path}")
-
-            # Load Sheet 1: DSB Celum ID to Path mapping
-            self._load_dsb_sheet()
-
-            # Load Sheet 2: Medsurg Metadata Cheat (fallback)
-            self._load_medsurg_sheet()
-
-            logger.info(f"✅ Total loaded: {len(self.filename_to_metadata)} metadata records")
-
-        except Exception as e:
-            logger.error(f"Failed to load Excel file: {e}", exc_info=True)
-            raise
-
-    def _load_dsb_sheet(self):
-        """Load DSB Celum ID to Path mapping sheet."""
-        try:
-            df = pd.read_excel(
-                self.excel_path,
-                sheet_name="DSB Celum ID to Path mapping"
-            )
-
-            # Skip header row (first row contains template)
-            df = df[df['Celum ID'].notna()][1:]
-
-            count = 0
-            for _, row in df.iterrows():
-                filename = row.get('File Name')
-                if pd.notna(filename):
-                    # Get filename without extension for indexing
-                    filename_stem = Path(str(filename).strip()).stem.lower()
-
-                    metadata = {
-                        'celum_id': str(row['Celum ID']) if pd.notna(row.get('Celum ID')) else '',
-                        'title': str(row['Title']) if pd.notna(row.get('Title')) else '',
-                        'description': str(row['External Description/Alt Text']) if pd.notna(row.get('External Description/Alt Text')) else '',
-                        'business': str(row['Business']) if pd.notna(row.get('Business')) else '',
-                        'original_filename': str(filename).strip(),
-                        'source_sheet': 'DSB'
-                    }
-
-                    # Only add if not already exists
-                    if filename_stem not in self.filename_to_metadata:
-                        self.filename_to_metadata[filename_stem] = metadata
-                        count += 1
-
-            logger.info(f"✅ Loaded {count} records from DSB sheet")
-
-        except Exception as e:
-            logger.warning(f"Failed to load DSB sheet: {e}")
-
-    def _load_medsurg_sheet(self):
-        """Load Medsurg Metadata Cheat sheet."""
-        try:
-            df = pd.read_excel(
-                self.excel_path,
-                sheet_name="Medsurg Metadata Cheat"
-            )
-
-            # Skip header row
-            df = df[df['Celum ID'].notna()][1:]
-
-            count = 0
-            for _, row in df.iterrows():
-                # Get filename from Solventum DAM Asset Path (extract filename from path)
-                asset_path = row.get('Solventum DAM Asset Path')
-                if pd.notna(asset_path):
-                    # Extract filename from path
-                    filename = Path(str(asset_path).strip()).name
-                    filename_stem = Path(filename).stem.lower()
-
-                    metadata = {
-                        'celum_id': str(row['Celum ID']) if pd.notna(row.get('Celum ID')) else '',
-                        'title': str(row['Title']) if pd.notna(row.get('Title')) else '',
-                        'description': str(row['External Description/Alt Text']) if pd.notna(row.get('External Description/Alt Text')) else '',
-                        'business': str(row['Business']) if pd.notna(row.get('Business')) else '',
-                        'original_filename': filename,
-                        'source_sheet': 'Medsurg'
-                    }
-
-                    # Only add if not already exists (DSB has priority)
-                    if filename_stem not in self.filename_to_metadata:
-                        self.filename_to_metadata[filename_stem] = metadata
-                        count += 1
-
-            logger.info(f"✅ Loaded {count} records from Medsurg sheet")
-
-        except Exception as e:
-            logger.warning(f"Failed to load Medsurg sheet: {e}")
-
-    def lookup_by_filename(self, filename: str) -> Optional[Dict[str, str]]:
-        """
-        Lookup metadata by filename (ignoring extension).
-
-        Args:
-            filename: Name of the file (with or without extension)
-
-        Returns:
-            Dictionary with metadata fields, or None if not found
-        """
-        # Extract just the filename without path and extension
-        filename_stem = Path(filename).stem.lower()
-
-        # Direct lookup by stem (case-insensitive)
-        if filename_stem in self.filename_to_metadata:
-            result = self.filename_to_metadata[filename_stem]
-            logger.info(f"✅ Found match for: {filename} (from {result.get('source_sheet', 'unknown')} sheet)")
-            return result
-
-        logger.warning(f"⚠️ No metadata found for: {filename} (searched: {filename_stem})")
-        return None
-
-    def search_by_celum_id(self, celum_id: str) -> Optional[Dict[str, str]]:
-        """
-        Search metadata by Celum ID.
-
-        Args:
-            celum_id: Celum ID to search for
-
-        Returns:
-            Dictionary with metadata fields, or None if not found
-        """
-        celum_id = str(celum_id).strip()
-
-        for metadata in self.filename_to_metadata.values():
-            if metadata['celum_id'] == celum_id:
-                logger.info(f"✅ Found metadata for Celum ID: {celum_id}")
-                return metadata
-
-        logger.warning(f"⚠️ No metadata found for Celum ID: {celum_id}")
-        return None
-
-    def get_stats(self) -> Dict[str, int]:
-        """Get statistics about loaded metadata."""
-        dsb_count = sum(1 for m in self.filename_to_metadata.values() if m.get('source_sheet') == 'DSB')
-        medsurg_count = sum(1 for m in self.filename_to_metadata.values() if m.get('source_sheet') == 'Medsurg')
-
-        return {
-            'total_records': len(self.filename_to_metadata),
-            'dsb_records': dsb_count,
-            'medsurg_records': medsurg_count,
-            'with_title': sum(1 for m in self.filename_to_metadata.values() if m['title']),
-            'with_description': sum(1 for m in self.filename_to_metadata.values() if m['description']),
-        }
--- a/backend/app/processors/extractors/init.py
+++ b/backend/app/processors/extractors/init.py
@ -1 +0,0 @@
-"""Content extractors for different file types."""
--- a/backend/app/processors/extractors/exiftool_extractor.py
+++ b/backend/app/processors/extractors/exiftool_extractor.py
@ -1,174 +0,0 @@
-"""Unified metadata extractor using ExifTool for images, video, and PDF files."""
-
-from typing import Dict, Optional
-from pathlib import Path
-import logging
-
-try:
-    from exiftool import ExifToolHelper
-    EXIFTOOL_AVAILABLE = True
-except ImportError:
-    EXIFTOOL_AVAILABLE = False
-
-from ..base_extractor import BaseExtractor
-from ..utils import get_logger
-
-logger = get_logger(__name__)
-
-
-class ExifToolExtractor(BaseExtractor):
-    """
-    Extract metadata using ExifTool.
-
-    Supports images (JPEG, PNG, GIF, TIFF, HEIC, RAW),
-    videos (MP4, MOV, AVI, MKV), and PDF metadata extraction.
-
-    Note: This does NOT extract content (text) from files - only metadata.
-    For content extraction, use the regular extractors (PDFExtractor, ImageExtractor with OCR).
-    """
-
-    # Map ExifTool tags to our standard metadata fields
-    TAG_MAPPING = {
-        # Images (JPEG/PNG/TIFF)
-        'EXIF:ImageDescription': 'title',
-        'XMP:Description': 'subject',
-        'IPTC:Caption-Abstract': 'subject',
-        'IPTC:Headline': 'title',
-        'XMP:Title': 'title',
-        'EXIF:XPSubject': 'subject',
-        'EXIF:XPKeywords': 'keywords',
-        'IPTC:Keywords': 'keywords',
-        'XMP:Subject': 'keywords',
-
-        # PDF
-        'PDF:Title': 'title',
-        'PDF:Subject': 'subject',
-        'PDF:Keywords': 'keywords',
-
-        # Video (QuickTime/MP4)
-        'QuickTime:Title': 'title',
-        'QuickTime:Description': 'subject',
-        'QuickTime:Keywords': 'keywords',
-        'UserData:Title': 'title',
-        'UserData:Description': 'subject',
-    }
-
-    def __init__(self):
-        """Initialize ExifTool extractor."""
-        if not EXIFTOOL_AVAILABLE:
-            raise ImportError(
-                "PyExifTool not installed. Install with: pip install PyExifTool>=0.5.6\n"
-                "Also ensure ExifTool is installed on your system."
-            )
-
-    def extract_content(self, file_path: str) -> str:
-        """
-        ExifTool does not extract text content - only metadata.
-
-        This method returns empty string. For content extraction:
-        - PDFs: Use PDFExtractor
-        - Images: Use ImageExtractor with OCR
-        - Office docs: Use OfficeExtractor
-
-        Args:
-            file_path: Path to the file
-
-        Returns:
-            Empty string (ExifTool doesn't extract content)
-        """
-        logger.debug(f"ExifToolExtractor.extract_content called for {file_path} - returning empty (metadata only)")
-        return ""
-
-    def read_metadata(self, file_path: str) -> Dict[str, str]:
-        """
-        Read metadata using ExifTool.
-
-        Extracts title, subject, and keywords from various metadata fields.
-        Supports images, videos, and PDFs.
-
-        Args:
-            file_path: Path to the file
-
-        Returns:
-            Dictionary with metadata (title, subject, keywords)
-        """
-        try:
-            with ExifToolHelper() as et:
-                metadata_list = et.get_metadata([file_path])
-                if not metadata_list:
-                    logger.warning(f"No metadata returned by ExifTool for {file_path}")
-                    return {'title': '', 'subject': '', 'keywords': ''}
-
-                exif_data = metadata_list[0]
-                result = {'title': '', 'subject': '', 'keywords': ''}
-
-                # Map ExifTool tags to standard fields
-                for exif_tag, standard_key in self.TAG_MAPPING.items():
-                    if exif_tag in exif_data and exif_data[exif_tag]:
-                        value = exif_data[exif_tag]
-
-                        # Handle list values (keywords often come as arrays)
-                        if isinstance(value, list):
-                            value = ', '.join(str(v) for v in value)
-                        else:
-                            value = str(value)
-
-                        # First non-empty value wins (priority based on TAG_MAPPING order)
-                        if not result[standard_key] and value.strip():
-                            result[standard_key] = value.strip()
-
-                logger.info(f"Extracted metadata from {Path(file_path).name}: "
-                           f"title={bool(result['title'])}, "
-                           f"subject={bool(result['subject'])}, "
-                           f"keywords={bool(result['keywords'])}")
-
-                return result
-
-        except Exception as e:
-            logger.error(f"ExifTool extraction failed for {file_path}: {e}")
-            return {'title': '', 'subject': '', 'keywords': ''}
-
-    def get_all_tags(self, file_path: str) -> Dict:
-        """
-        Get all available metadata tags from a file.
-
-        Useful for debugging or exploring available metadata fields.
-
-        Args:
-            file_path: Path to the file
-
-        Returns:
-            Dictionary of all metadata tags
-        """
-        try:
-            with ExifToolHelper() as et:
-                metadata_list = et.get_metadata([file_path])
-                if metadata_list:
-                    return metadata_list[0]
-                return {}
-        except Exception as e:
-            logger.error(f"Failed to get all tags for {file_path}: {e}")
-            return {}
-
-    def get_specific_tags(self, file_path: str, tags: list) -> Dict:
-        """
-        Get specific metadata tags from a file.
-
-        More efficient than get_all_tags when you know which tags you need.
-
-        Args:
-            file_path: Path to the file
-            tags: List of tag names (e.g., ['EXIF:ImageDescription', 'PDF:Title'])
-
-        Returns:
-            Dictionary of requested tags
-        """
-        try:
-            with ExifToolHelper() as et:
-                metadata_list = et.get_tags([file_path], tags=tags)
-                if metadata_list:
-                    return metadata_list[0]
-                return {}
-        except Exception as e:
-            logger.error(f"Failed to get specific tags for {file_path}: {e}")
-            return {}
--- a/backend/app/processors/extractors/image_extractor.py
+++ b/backend/app/processors/extractors/image_extractor.py
@ -1,179 +0,0 @@
-"""Image content and metadata extractor."""
-
-import pytesseract
-import piexif
-from PIL import Image
-from typing import Dict
-import os
-
-from ..base_extractor import BaseExtractor
-from ..config import Config
-from ..utils import get_logger
-
-logger = get_logger(__name__)
-
-
-class ImageExtractor(BaseExtractor):
-    """Extractor for image files (JPEG, PNG, etc.) with OCR and EXIF metadata."""
-
-    def __init__(self):
-        """Initialize image extractor."""
-        self.tesseract_path = Config.TESSERACT_PATH
-        if self.tesseract_path and os.path.exists(self.tesseract_path):
-            pytesseract.pytesseract.pytesseract_cmd = self.tesseract_path
-        # Get OCR languages from config (supports Chinese, Japanese, Korean, etc.)
-        self.ocr_lang = Config.OCR_LANGUAGES
-
-    def extract_content(self, file_path: str) -> str:
-        """
-        Extract text content from image using OCR.
-
-        Uses pytesseract to perform optical character recognition on the image.
-        Supports multiple languages including Chinese, Japanese, Korean.
-
-        Args:
-            file_path: Path to the image file
-
-        Returns:
-            Extracted text content
-
-        Raises:
-            Exception: If extraction fails
-        """
-        try:
-            logger.info(f"Starting image OCR extraction from {file_path}")
-
-            # Open image
-            image = Image.open(file_path)
-
-            # Apply OCR with multi-language support
-            text = pytesseract.image_to_string(image, lang=self.ocr_lang)
-
-            if text and len(text.strip()) > 0:
-                cleaned_text = self.clean_text(text)
-                logger.info(f"Successfully extracted {len(cleaned_text)} characters from {file_path}")
-                return cleaned_text
-            else:
-                logger.warning(f"OCR extraction returned empty content for {file_path}")
-                return ""
-
-        except Exception as e:
-            logger.error(f"Failed to extract content from image {file_path}: {e}", exc_info=True)
-            return ""
-
-    def read_metadata(self, file_path: str) -> Dict[str, str]:
-        """
-        Read image metadata from EXIF and IPTC data.
-
-        Extracts standard image metadata fields including camera info, date taken,
-        copyright, etc.
-
-        Args:
-            file_path: Path to the image file
-
-        Returns:
-            Dictionary of metadata fields
-
-        Raises:
-            Exception: If metadata reading fails
-        """
-        metadata = {}
-
-        try:
-            # Get file extension to determine format
-            file_ext = file_path.lower().split('.')[-1]
-
-            # Try EXIF data
-            metadata = self._read_exif_metadata(file_path)
-
-            # For PNG files, try IPTC data
-            if file_ext in ['png']:
-                iptc_metadata = self._read_iptc_metadata(file_path)
-                metadata.update(iptc_metadata)
-
-            logger.info(f"Successfully read metadata from {file_path}")
-            return metadata
-
-        except Exception as e:
-            logger.error(f"Failed to read image metadata from {file_path}: {e}", exc_info=True)
-            return {}
-
-    def _read_exif_metadata(self, file_path: str) -> Dict[str, str]:
-        """
-        Read EXIF metadata from image.
-
-        Args:
-            file_path: Path to image file
-
-        Returns:
-            Dictionary of EXIF metadata
-        """
-        try:
-            # Try piexif first for JPEG
-            if file_path.lower().endswith(('.jpg', '.jpeg')):
-                try:
-                    exif_dict = piexif.load(file_path)
-                    metadata = {}
-
-                    # Extract commonly useful EXIF fields
-                    if "0th" in exif_dict:
-                        for tag, value in exif_dict["0th"].items():
-                            tag_name = piexif.TAGS["0th"][tag]["name"]
-                            try:
-                                if isinstance(value, bytes):
-                                    value = value.decode('utf-8', errors='ignore')
-                                metadata[tag_name.lower()] = str(value).strip()
-                            except Exception:
-                                pass
-
-                    return metadata
-                except Exception as e:
-                    logger.debug(f"piexif extraction failed: {e}")
-
-            # Fallback to PIL for all image types
-            image = Image.open(file_path)
-            metadata = {}
-
-            if hasattr(image, '_getexif') and image._getexif() is not None:
-                exif_data = image._getexif()
-                for tag_id, value in exif_data.items():
-                    tag_name = piexif.TAGS["0th"].get(tag_id, {}).get("name", f"tag_{tag_id}")
-                    if isinstance(value, bytes):
-                        value = value.decode('utf-8', errors='ignore')
-                    metadata[tag_name.lower()] = str(value).strip()
-
-            return metadata
-
-        except Exception as e:
-            logger.debug(f"EXIF metadata extraction failed: {e}")
-            return {}
-
-    def _read_iptc_metadata(self, file_path: str) -> Dict[str, str]:
-        """
-        Read IPTC metadata from image.
-
-        Args:
-            file_path: Path to image file
-
-        Returns:
-            Dictionary of IPTC metadata
-        """
-        try:
-            from PIL import Image
-            from PIL.PngImagePlugin import PngInfo
-
-            image = Image.open(file_path)
-            metadata = {}
-
-            # Check for PNG info
-            if hasattr(image, 'info'):
-                for key, value in image.info.items():
-                    if isinstance(value, bytes):
-                        value = value.decode('utf-8', errors='ignore')
-                    metadata[str(key).lower()] = str(value).strip()
-
-            return metadata
-
-        except Exception as e:
-            logger.debug(f"IPTC metadata extraction failed: {e}")
-            return {}
--- a/backend/app/processors/extractors/office_extractor.py
+++ b/backend/app/processors/extractors/office_extractor.py
@ -1,207 +0,0 @@
-"""Office document content and metadata extractor."""
-
-from docx import Document as DocxDocument
-from openpyxl import load_workbook
-from pptx import Presentation
-from typing import Dict
-
-from ..base_extractor import BaseExtractor
-from ..utils import get_logger
-
-logger = get_logger(__name__)
-
-
-class OfficeExtractor(BaseExtractor):
-    """Extractor for Office files (DOCX, XLSX, PPTX)."""
-
-    SUPPORTED_FORMATS = ['docx', 'xlsx', 'pptx']
-
-    def extract_content(self, file_path: str) -> str:
-        """
-        Extract text content from Office document.
-
-        Routes to appropriate extraction method based on file format.
-
-        Args:
-            file_path: Path to the Office file
-
-        Returns:
-            Extracted text content
-        """
-        try:
-            file_ext = file_path.lower().split('.')[-1]
-
-            if file_ext == 'docx':
-                return self._extract_docx_content(file_path)
-            elif file_ext == 'xlsx':
-                return self._extract_xlsx_content(file_path)
-            elif file_ext == 'pptx':
-                return self._extract_pptx_content(file_path)
-            else:
-                logger.error(f"Unsupported Office format: {file_ext}")
-                return ""
-
-        except Exception as e:
-            logger.error(f"Failed to extract content from Office file {file_path}: {e}", exc_info=True)
-            return ""
-
-    def read_metadata(self, file_path: str) -> Dict[str, str]:
-        """
-        Read metadata from Office document.
-
-        Routes to appropriate metadata reading method based on file format.
-
-        Args:
-            file_path: Path to the Office file
-
-        Returns:
-            Dictionary of metadata fields
-        """
-        try:
-            file_ext = file_path.lower().split('.')[-1]
-
-            if file_ext == 'docx':
-                return self._read_docx_metadata(file_path)
-            elif file_ext == 'xlsx':
-                return self._read_xlsx_metadata(file_path)
-            elif file_ext == 'pptx':
-                return self._read_pptx_metadata(file_path)
-            else:
-                logger.error(f"Unsupported Office format: {file_ext}")
-                return {}
-
-        except Exception as e:
-            logger.error(f"Failed to read metadata from Office file {file_path}: {e}", exc_info=True)
-            return {}
-
-    def _extract_docx_content(self, file_path: str) -> str:
-        """Extract text content from DOCX file."""
-        try:
-            logger.info(f"Extracting content from DOCX: {file_path}")
-            doc = DocxDocument(file_path)
-            paragraphs = [para.text for para in doc.paragraphs if para.text.strip()]
-            content = "\n".join(paragraphs)
-            cleaned_content = self.clean_text(content)
-            logger.info(f"Successfully extracted {len(cleaned_content)} characters from DOCX")
-            return cleaned_content
-        except Exception as e:
-            logger.error(f"Failed to extract DOCX content: {e}", exc_info=True)
-            return ""
-
-    def _extract_xlsx_content(self, file_path: str) -> str:
-        """Extract text content from XLSX file."""
-        try:
-            logger.info(f"Extracting content from XLSX: {file_path}")
-            workbook = load_workbook(file_path)
-            content_parts = []
-
-            for sheet_name in workbook.sheetnames:
-                sheet = workbook[sheet_name]
-                content_parts.append(f"Sheet: {sheet_name}")
-
-                for row in sheet.iter_rows(values_only=True):
-                    row_text = " | ".join(str(cell) if cell is not None else "" for cell in row)
-                    if row_text.strip():
-                        content_parts.append(row_text)
-
-            content = "\n".join(content_parts)
-            cleaned_content = self.clean_text(content)
-            logger.info(f"Successfully extracted {len(cleaned_content)} characters from XLSX")
-            return cleaned_content
-        except Exception as e:
-            logger.error(f"Failed to extract XLSX content: {e}", exc_info=True)
-            return ""
-
-    def _extract_pptx_content(self, file_path: str) -> str:
-        """Extract text content from PPTX file."""
-        try:
-            logger.info(f"Extracting content from PPTX: {file_path}")
-            presentation = Presentation(file_path)
-            content_parts = []
-
-            for slide_num, slide in enumerate(presentation.slides, 1):
-                content_parts.append(f"Slide {slide_num}:")
-
-                for shape in slide.shapes:
-                    if hasattr(shape, "text") and shape.text.strip():
-                        content_parts.append(shape.text)
-
-            content = "\n".join(content_parts)
-            cleaned_content = self.clean_text(content)
-            logger.info(f"Successfully extracted {len(cleaned_content)} characters from PPTX")
-            return cleaned_content
-        except Exception as e:
-            logger.error(f"Failed to extract PPTX content: {e}", exc_info=True)
-            return ""
-
-    def _read_docx_metadata(self, file_path: str) -> Dict[str, str]:
-        """Read metadata from DOCX file."""
-        try:
-            logger.info(f"Reading metadata from DOCX: {file_path}")
-            doc = DocxDocument(file_path)
-            core_props = doc.core_properties
-
-            metadata = {
-                'title': getattr(core_props, 'title', '') or '',
-                'subject': getattr(core_props, 'subject', '') or '',
-                'keywords': getattr(core_props, 'keywords', '') or '',
-                'author': getattr(core_props, 'author', '') or '',
-                'comments': getattr(core_props, 'comments', '') or '',
-                'category': getattr(core_props, 'category', '') or '',
-            }
-
-            # Remove empty values
-            metadata = {k: v for k, v in metadata.items() if v}
-            logger.info(f"Successfully read metadata from DOCX")
-            return metadata
-        except Exception as e:
-            logger.error(f"Failed to read DOCX metadata: {e}", exc_info=True)
-            return {}
-
-    def _read_xlsx_metadata(self, file_path: str) -> Dict[str, str]:
-        """Read metadata from XLSX file."""
-        try:
-            logger.info(f"Reading metadata from XLSX: {file_path}")
-            workbook = load_workbook(file_path)
-            props = workbook.properties
-
-            metadata = {
-                'title': getattr(props, 'title', '') or '',
-                'subject': getattr(props, 'subject', '') or '',
-                'keywords': getattr(props, 'keywords', '') or '',
-                'author': getattr(props, 'author', '') or '',
-                'comments': getattr(props, 'comments', '') or '',
-                'category': getattr(props, 'category', '') or '',
-            }
-
-            # Remove empty values
-            metadata = {k: v for k, v in metadata.items() if v}
-            logger.info(f"Successfully read metadata from XLSX")
-            return metadata
-        except Exception as e:
-            logger.error(f"Failed to read XLSX metadata: {e}", exc_info=True)
-            return {}
-
-    def _read_pptx_metadata(self, file_path: str) -> Dict[str, str]:
-        """Read metadata from PPTX file."""
-        try:
-            logger.info(f"Reading metadata from PPTX: {file_path}")
-            presentation = Presentation(file_path)
-            core_props = presentation.core_properties
-
-            metadata = {
-                'title': getattr(core_props, 'title', '') or '',
-                'subject': getattr(core_props, 'subject', '') or '',
-                'keywords': getattr(core_props, 'keywords', '') or '',
-                'author': getattr(core_props, 'author', '') or '',
-                'comments': getattr(core_props, 'comments', '') or '',
-                'category': getattr(core_props, 'category', '') or '',
-            }
-
-            # Remove empty values
-            metadata = {k: v for k, v in metadata.items() if v}
-            logger.info(f"Successfully read metadata from PPTX")
-            return metadata
-        except Exception as e:
-            logger.error(f"Failed to read PPTX metadata: {e}", exc_info=True)
-            return {}
--- a/backend/app/processors/extractors/pdf_extractor.py
+++ b/backend/app/processors/extractors/pdf_extractor.py
@ -1,228 +0,0 @@
-"""PDF content extractor."""
-
-import pypdf
-import pdfplumber
-from pdf2image import convert_from_path
-import pytesseract
-from typing import Dict
-from pathlib import Path
-import os
-
-from ..base_extractor import BaseExtractor
-from ..config import Config
-from ..utils import get_logger
-
-logger = get_logger(__name__)
-
-
-class PDFExtractor(BaseExtractor):
-    """Extractor for PDF files with fallback to OCR."""
-
-    def __init__(self):
-        """Initialize PDF extractor."""
-        self.tesseract_path = Config.TESSERACT_PATH
-        if self.tesseract_path and os.path.exists(self.tesseract_path):
-            pytesseract.pytesseract.pytesseract_cmd = self.tesseract_path
-        self.max_pages = Config.PDF_MAX_PAGES
-
-    def extract_content(self, file_path: str) -> str:
-        """
-        Extract text content from PDF using multiple fallback strategies.
-
-        First tries pypdf, then pdfplumber, then OCR if both fail.
-        Limits extraction to the first MAX_PDF_PAGES pages.
-
-        Args:
-            file_path: Path to the PDF file
-
-        Returns:
-            Extracted text content
-
-        Raises:
-            Exception: If all extraction methods fail
-        """
-        try:
-            logger.info(f"Starting PDF extraction from {file_path}")
-
-            # Strategy 1: Try pypdf
-            content = self._extract_with_pypdf(file_path)
-            if content and len(content.strip()) > 100:
-                logger.info(f"Successfully extracted {len(content)} characters using pypdf")
-                return self.clean_text(content)
-
-            logger.debug("pypdf returned minimal content, trying pdfplumber")
-
-            # Strategy 2: Try pdfplumber
-            content = self._extract_with_pdfplumber(file_path)
-            if content and len(content.strip()) > 100:
-                logger.info(f"Successfully extracted {len(content)} characters using pdfplumber")
-                return self.clean_text(content)
-
-            logger.debug("pdfplumber returned minimal content, attempting OCR")
-
-            # Strategy 3: Try OCR as last resort
-            content = self._extract_with_ocr(file_path)
-            if content and len(content.strip()) > 50:
-                logger.info(f"Successfully extracted {len(content)} characters using OCR")
-                return self.clean_text(content)
-
-            logger.warning(f"All extraction methods returned minimal content for {file_path}")
-            return ""
-
-        except Exception as e:
-            logger.error(f"Failed to extract PDF content from {file_path}: {e}", exc_info=True)
-            return ""
-
-    def _extract_with_pypdf(self, file_path: str) -> str:
-        """
-        Extract text using pypdf library.
-
-        Args:
-            file_path: Path to PDF file
-
-        Returns:
-            Extracted text
-        """
-        try:
-            content = []
-            with open(file_path, 'rb') as f:
-                pdf_reader = pypdf.PdfReader(f)
-                num_pages = min(len(pdf_reader.pages), self.max_pages)
-
-                for page_num in range(num_pages):
-                    try:
-                        page = pdf_reader.pages[page_num]
-                        text = page.extract_text()
-                        if text:
-                            content.append(text)
-                    except Exception as e:
-                        logger.debug(f"Error extracting page {page_num} with pypdf: {e}")
-                        continue
-
-            return "\n".join(content)
-
-        except Exception as e:
-            logger.debug(f"pypdf extraction failed: {e}")
-            return ""
-
-    def _extract_with_pdfplumber(self, file_path: str) -> str:
-        """
-        Extract text using pdfplumber library.
-
-        Args:
-            file_path: Path to PDF file
-
-        Returns:
-            Extracted text
-        """
-        try:
-            content = []
-            with pdfplumber.open(file_path) as pdf:
-                num_pages = min(len(pdf.pages), self.max_pages)
-
-                for page_num in range(num_pages):
-                    try:
-                        page = pdf.pages[page_num]
-                        text = page.extract_text()
-                        if text:
-                            content.append(text)
-                    except Exception as e:
-                        logger.debug(f"Error extracting page {page_num} with pdfplumber: {e}")
-                        continue
-
-            return "\n".join(content)
-
-        except Exception as e:
-            logger.debug(f"pdfplumber extraction failed: {e}")
-            return ""
-
-    def _extract_with_ocr(self, file_path: str) -> str:
-        """
-        Extract text using OCR via pdf2image and pytesseract.
-
-        Args:
-            file_path: Path to PDF file
-
-        Returns:
-            Extracted text
-        """
-        try:
-            content = []
-
-            # Convert PDF pages to images
-            images = convert_from_path(file_path)
-
-            # Limit to max_pages
-            images = images[:self.max_pages]
-
-            # Get OCR languages from config (supports Chinese, Japanese, Korean, etc.)
-            ocr_lang = Config.OCR_LANGUAGES
-
-            # Apply OCR to each image
-            for page_num, image in enumerate(images):
-                try:
-                    text = pytesseract.image_to_string(image, lang=ocr_lang)
-                    if text:
-                        content.append(text)
-                except Exception as e:
-                    logger.debug(f"Error running OCR on page {page_num}: {e}")
-                    continue
-
-            return "\n".join(content)
-
-        except Exception as e:
-            logger.debug(f"OCR extraction failed: {e}")
-            return ""
-
-    def read_metadata(self, file_path: str) -> Dict[str, str]:
-        """
-        Read PDF metadata from document properties.
-
-        Extracts standard PDF metadata fields: Title, Subject, Keywords, Author, Creator.
-
-        Args:
-            file_path: Path to PDF file
-
-        Returns:
-            Dictionary of metadata fields with lowercase keys
-
-        Raises:
-            Exception: If metadata reading fails
-        """
-        metadata = {}
-
-        try:
-            with open(file_path, 'rb') as f:
-                pdf_reader = pypdf.PdfReader(f)
-
-                # Get document information
-                doc_info = pdf_reader.metadata
-
-                if doc_info:
-                    # Map PDF metadata fields to standardized keys
-                    field_mapping = {
-                        '/Title': 'title',
-                        '/Subject': 'subject',
-                        '/Keywords': 'keywords',
-                        '/Author': 'author',
-                        '/Creator': 'creator',
-                    }
-
-                    for pdf_field, standard_field in field_mapping.items():
-                        try:
-                            value = doc_info.get(pdf_field)
-                            if value:
-                                # Convert bytes to string if necessary
-                                if isinstance(value, bytes):
-                                    value = value.decode('utf-8', errors='ignore')
-                                metadata[standard_field] = str(value).strip()
-                        except Exception as e:
-                            logger.debug(f"Error reading field {pdf_field}: {e}")
-                            continue
-
-            logger.info(f"Successfully read metadata from {file_path}")
-            return metadata
-
-        except Exception as e:
-            logger.error(f"Failed to read PDF metadata from {file_path}: {e}", exc_info=True)
-            return {}
--- a/backend/app/processors/extractors/video_extractor.py
+++ b/backend/app/processors/extractors/video_extractor.py
@ -1,153 +0,0 @@
-"""Video metadata extractor."""
-
-from typing import Dict
-
-from ..base_extractor import BaseExtractor
-from ..utils import get_logger
-
-logger = get_logger(__name__)
-
-
-class VideoExtractor(BaseExtractor):
-    """Extractor for video files (MP4, MOV, AVI) - metadata extraction only."""
-
-    SUPPORTED_FORMATS = ['mp4', 'mov', 'avi', 'mkv', 'flv', 'wmv', 'webm']
-
-    def extract_content(self, file_path: str) -> str:
-        """
-        Extract text content from video (not supported).
-
-        Video files cannot be easily processed for text content without expensive
-        OCR/speech-to-text processing. This method returns empty string.
-
-        Args:
-            file_path: Path to the video file
-
-        Returns:
-            Empty string (not supported for video)
-        """
-        logger.info(f"Text extraction not supported for video files: {file_path}")
-        return ""
-
-    def read_metadata(self, file_path: str) -> Dict[str, str]:
-        """
-        Read metadata from video file using mutagen.
-
-        Extracts standard video metadata tags.
-
-        Args:
-            file_path: Path to the video file
-
-        Returns:
-            Dictionary of metadata fields
-        """
-        try:
-            logger.info(f"Reading metadata from video: {file_path}")
-            metadata = self._read_with_mutagen(file_path)
-            logger.info(f"Successfully read metadata from video")
-            return metadata
-
-        except Exception as e:
-            logger.error(f"Failed to read video metadata from {file_path}: {e}", exc_info=True)
-            return {}
-
-    def _read_with_mutagen(self, file_path: str) -> Dict[str, str]:
-        """
-        Read video metadata using mutagen.
-
-        Args:
-            file_path: Path to video file
-
-        Returns:
-            Dictionary of metadata
-        """
-        try:
-            from mutagen import File
-        except ImportError:
-            logger.warning("mutagen not installed, attempting pymediainfo fallback")
-            return self._read_with_pymediainfo(file_path)
-
-        try:
-            audio = File(file_path)
-            metadata = {}
-
-            if audio is not None:
-                # Extract common tags
-                tag_mapping = {
-                    'TIT2': 'title',
-                    '\xa9nam': 'title',
-                    'Title': 'title',
-                    'TIT3': 'subtitle',
-                    '\xa9cmt': 'comments',
-                    'Comments': 'comments',
-                    'TPE1': 'artist',
-                    '\xa9ART': 'artist',
-                    'Artist': 'artist',
-                    'TALB': 'album',
-                    '\xa9alb': 'album',
-                    'Album': 'album',
-                    'TXXX:KEYWORDS': 'keywords',
-                    'TXXX:Description': 'description',
-                }
-
-                for key, value in audio.items():
-                    # Check direct mapping
-                    if key in tag_mapping:
-                        standard_key = tag_mapping[key]
-                        if isinstance(value, list):
-                            value = value[0] if value else ""
-                        if value:
-                            metadata[standard_key] = str(value).strip()
-
-                    # Generic fallback for other tags
-                    elif isinstance(value, (list, tuple)):
-                        if value:
-                            metadata[key.lower()] = str(value[0]).strip()
-                    else:
-                        metadata[key.lower()] = str(value).strip()
-
-            return metadata
-
-        except Exception as e:
-            logger.debug(f"Mutagen extraction failed: {e}")
-            return self._read_with_pymediainfo(file_path)
-
-    def _read_with_pymediainfo(self, file_path: str) -> Dict[str, str]:
-        """
-        Read video metadata using pymediainfo.
-
-        Args:
-            file_path: Path to video file
-
-        Returns:
-            Dictionary of metadata
-        """
-        try:
-            from pymediainfo import MediaInfo
-        except ImportError:
-            logger.warning("pymediainfo not installed, cannot extract video metadata")
-            return {}
-
-        try:
-            media_info = MediaInfo.parse(file_path)
-            metadata = {}
-
-            # Extract from general track
-            for track in media_info.tracks:
-                if track.track_type == "General":
-                    if hasattr(track, 'title') and track.title:
-                        metadata['title'] = track.title
-                    if hasattr(track, 'comment') and track.comment:
-                        metadata['comments'] = track.comment
-                    if hasattr(track, 'performer') and track.performer:
-                        metadata['artist'] = track.performer
-                    if hasattr(track, 'description') and track.description:
-                        metadata['description'] = track.description
-
-                    break
-
-            return metadata
-
-        except Exception as e:
-            logger.debug(f"pymediainfo extraction failed: {e}")
-            return {}
--- a/backend/app/processors/field_mapper.py
+++ b/backend/app/processors/field_mapper.py
@ -1,409 +0,0 @@
-"""Field mapping with automatic detection and manual override."""
-
-import json
-from typing import Dict, List, Optional, Tuple
-from difflib import SequenceMatcher
-from pathlib import Path
-from .utils import get_logger
-
-logger = get_logger(__name__)
-
-
-class FieldMapper:
-    """Map source fields to standard metadata fields with fuzzy matching."""
-
-    # Standard metadata fields used in Oliver Metadata Tool
-    STANDARD_FIELDS = ['title', 'subject', 'keywords', 'description']
-
-    # Common aliases for fuzzy matching (case-insensitive)
-    FIELD_ALIASES = {
-        'title': [
-            'title', 'name', 'heading', 'filename', 'file_name', 'document_title',
-            'asset_title', 'resource_title', 'object_name', 'label'
-        ],
-        'subject': [
-            'subject', 'description', 'summary', 'abstract', 'alt_text',
-            'external_description', 'caption', 'about', 'overview', 'details',
-            'desc', 'long_description', 'content'
-        ],
-        'keywords': [
-            'keywords', 'tags', 'categories', 'labels', 'subjects', 'topics',
-            'taxonomy', 'classification', 'key_words', 'search_terms'
-        ],
-        'description': [
-            'description', 'desc', 'summary', 'notes', 'comments', 'remarks',
-            'details', 'about', 'information', 'info'
-        ]
-    }
-
-    # Similarity threshold for fuzzy matching (0.0 to 1.0)
-    SIMILARITY_THRESHOLD = 0.6
-
-    def __init__(self, presets_path: Optional[str] = None):
-        """
-        Initialize field mapper.
-
-        Args:
-            presets_path: Path to JSON file for saving/loading mapping presets
-        """
-        self.presets_path = presets_path or 'field_mapping_presets.json'
-
-    def auto_map(self, source_fields: List[str], strict: bool = False) -> Dict[str, Tuple[str, float]]:
-        """
-        Automatically map source fields to standard fields using fuzzy matching.
-
-        Args:
-            source_fields: List of field names from source data
-            strict: If True, only accept matches above high confidence threshold (0.8)
-
-        Returns:
-            Dictionary mapping {source_field: (target_field, confidence_score)}
-            Example: {'File Name': ('title', 0.85), 'Alt Text': ('subject', 0.92)}
-        """
-        mapping = {}
-        threshold = 0.8 if strict else self.SIMILARITY_THRESHOLD
-
-        for source_field in source_fields:
-            best_match = self._find_best_match(source_field, threshold)
-            if best_match:
-                target_field, score = best_match
-                mapping[source_field] = (target_field, score)
-                logger.info(f"Auto-mapped '{source_field}' -> '{target_field}' (confidence: {score:.2f})")
-
-        return mapping
-
-    def _find_best_match(self, source_field: str, threshold: float = 0.6) -> Optional[Tuple[str, float]]:
-        """
-        Find best matching standard field for source field.
-
-        Args:
-            source_field: Source field name
-            threshold: Minimum similarity score (0.0 to 1.0)
-
-        Returns:
-            Tuple of (target_field, confidence_score) or None
-        """
-        source_lower = source_field.lower().replace(' ', '_').replace('-', '_')
-        best_score = 0.0
-        best_field = None
-
-        for standard_field, aliases in self.FIELD_ALIASES.items():
-            for alias in aliases:
-                # Calculate similarity score
-                score = SequenceMatcher(None, source_lower, alias).ratio()
-
-                # Exact match bonus
-                if source_lower == alias:
-                    score = 1.0
-
-                # Substring match bonus
-                elif alias in source_lower or source_lower in alias:
-                    score = max(score, 0.85)
-
-                if score > best_score and score >= threshold:
-                    best_score = score
-                    best_field = standard_field
-
-        if best_field:
-            return (best_field, best_score)
-        return None
-
-    def validate_mapping(self, mapping: Dict[str, str]) -> Dict[str, List[str]]:
-        """
-        Validate a field mapping configuration.
-
-        Args:
-            mapping: Dictionary mapping {source_field: target_field}
-
-        Returns:
-            Dictionary with validation results:
-            {
-                'valid': [list of valid mappings],
-                'invalid': [list of invalid mappings],
-                'warnings': [list of warnings]
-            }
-        """
-        result = {
-            'valid': [],
-            'invalid': [],
-            'warnings': []
-        }
-
-        # Track which target fields are used
-        target_usage = {}
-
-        for source_field, target_field in mapping.items():
-            # Check if target field is valid
-            if target_field not in self.STANDARD_FIELDS:
-                result['invalid'].append(
-                    f"'{target_field}' is not a valid target field (source: '{source_field}')"
-                )
-                continue
-
-            result['valid'].append(f"'{source_field}' -> '{target_field}'")
-
-            # Track multiple sources mapping to same target
-            if target_field in target_usage:
-                target_usage[target_field].append(source_field)
-            else:
-                target_usage[target_field] = [source_field]
-
-        # Warn about multiple sources mapping to same target
-        for target_field, sources in target_usage.items():
-            if len(sources) > 1:
-                result['warnings'].append(
-                    f"Multiple source fields map to '{target_field}': {', '.join(sources)}"
-                )
-
-        return result
-
-    def apply_mapping(self, data: Dict[str, str], mapping: Dict[str, str]) -> Dict[str, str]:
-        """
-        Apply field mapping to transform source data to standard format.
-
-        Args:
-            data: Source data dictionary
-            mapping: Field mapping {source_field: target_field}
-
-        Returns:
-            Transformed data with standard field names
-        """
-        result = {field: '' for field in self.STANDARD_FIELDS}
-
-        for source_field, target_field in mapping.items():
-            if source_field in data and target_field in self.STANDARD_FIELDS:
-                value = data[source_field]
-
-                # Handle multiple values mapping to same target (concatenate)
-                if result[target_field]:
-                    result[target_field] += f"; {value}"
-                else:
-                    result[target_field] = value
-
-        return result
-
-    def save_preset(self, name: str, mapping: Dict[str, str], description: str = ""):
-        """
-        Save mapping preset to file.
-
-        Args:
-            name: Preset name
-            mapping: Field mapping dictionary
-            description: Optional description
-        """
-        presets = self._load_presets()
-
-        presets[name] = {
-            'mapping': mapping,
-            'description': description,
-            'created_at': self._get_timestamp()
-        }
-
-        try:
-            with open(self.presets_path, 'w') as f:
-                json.dump(presets, f, indent=2)
-            logger.info(f"Saved mapping preset: {name}")
-        except Exception as e:
-            logger.error(f"Failed to save preset '{name}': {e}")
-            raise
-
-    def load_preset(self, name: str) -> Optional[Dict[str, str]]:
-        """
-        Load mapping preset from file.
-
-        Args:
-            name: Preset name
-
-        Returns:
-            Mapping dictionary or None if not found
-        """
-        presets = self._load_presets()
-
-        if name in presets:
-            logger.info(f"Loaded mapping preset: {name}")
-            return presets[name].get('mapping', {})
-
-        logger.warning(f"Preset not found: {name}")
-        return None
-
-    def list_presets(self) -> List[Dict[str, str]]:
-        """
-        List all saved presets.
-
-        Returns:
-            List of preset information dictionaries
-        """
-        presets = self._load_presets()
-
-        return [
-            {
-                'name': name,
-                'description': data.get('description', ''),
-                'created_at': data.get('created_at', ''),
-                'fields': len(data.get('mapping', {}))
-            }
-            for name, data in presets.items()
-        ]
-
-    def delete_preset(self, name: str) -> bool:
-        """
-        Delete a mapping preset.
-
-        Args:
-            name: Preset name
-
-        Returns:
-            True if deleted, False if not found
-        """
-        presets = self._load_presets()
-
-        if name in presets:
-            del presets[name]
-
-            try:
-                with open(self.presets_path, 'w') as f:
-                    json.dump(presets, f, indent=2)
-                logger.info(f"Deleted mapping preset: {name}")
-                return True
-            except Exception as e:
-                logger.error(f"Failed to delete preset '{name}': {e}")
-                raise
-
-        return False
-
-    def suggest_mapping(self, source_fields: List[str]) -> Dict:
-        """
-        Generate mapping suggestions with confidence scores and alternatives.
-
-        Args:
-            source_fields: List of source field names
-
-        Returns:
-            Dictionary with suggestions:
-            {
-                'source_field': {
-                    'best_match': 'target_field',
-                    'confidence': 0.85,
-                    'alternatives': [
-                        {'field': 'other_target', 'confidence': 0.65},
-                        ...
-                    ]
-                }
-            }
-        """
-        suggestions = {}
-
-        for source_field in source_fields:
-            # Find all potential matches
-            matches = self._find_all_matches(source_field)
-
-            if matches:
-                best_match = matches[0]
-                suggestions[source_field] = {
-                    'best_match': best_match[0],
-                    'confidence': best_match[1],
-                    'alternatives': [
-                        {'field': field, 'confidence': score}
-                        for field, score in matches[1:3]  # Top 2 alternatives
-                    ]
-                }
-            else:
-                suggestions[source_field] = {
-                    'best_match': None,
-                    'confidence': 0.0,
-                    'alternatives': []
-                }
-
-        return suggestions
-
-    def _find_all_matches(self, source_field: str, min_threshold: float = 0.4) -> List[Tuple[str, float]]:
-        """
-        Find all matching standard fields above threshold, sorted by score.
-
-        Args:
-            source_field: Source field name
-            min_threshold: Minimum similarity score
-
-        Returns:
-            List of (target_field, score) tuples sorted by score descending
-        """
-        source_lower = source_field.lower().replace(' ', '_').replace('-', '_')
-        matches = []
-
-        for standard_field, aliases in self.FIELD_ALIASES.items():
-            best_score = 0.0
-
-            for alias in aliases:
-                score = SequenceMatcher(None, source_lower, alias).ratio()
-
-                # Exact match
-                if source_lower == alias:
-                    score = 1.0
-                # Substring match
-                elif alias in source_lower or source_lower in alias:
-                    score = max(score, 0.85)
-
-                best_score = max(best_score, score)
-
-            if best_score >= min_threshold:
-                matches.append((standard_field, best_score))
-
-        # Sort by score descending
-        matches.sort(key=lambda x: x[1], reverse=True)
-        return matches
-
-    def _load_presets(self) -> Dict:
-        """Load all presets from file."""
-        if Path(self.presets_path).exists():
-            try:
-                with open(self.presets_path, 'r') as f:
-                    return json.load(f)
-            except Exception as e:
-                logger.error(f"Failed to load presets: {e}")
-                return {}
-        return {}
-
-    def _get_timestamp(self) -> str:
-        """Get current timestamp as ISO format string."""
-        from datetime import datetime
-        return datetime.now().isoformat()
-
-    def get_unmapped_fields(self, source_fields: List[str], mapping: Dict[str, str]) -> List[str]:
-        """
-        Get list of source fields that are not mapped.
-
-        Args:
-            source_fields: All source field names
-            mapping: Current mapping dictionary
-
-        Returns:
-            List of unmapped source fields
-        """
-        return [field for field in source_fields if field not in mapping]
-
-    def get_mapping_coverage(self, source_fields: List[str], mapping: Dict[str, str]) -> Dict:
-        """
-        Calculate mapping coverage statistics.
-
-        Args:
-            source_fields: All source field names
-            mapping: Current mapping dictionary
-
-        Returns:
-            Statistics dictionary with coverage info
-        """
-        total_fields = len(source_fields)
-        mapped_fields = len(mapping)
-        unmapped = self.get_unmapped_fields(source_fields, mapping)
-
-        # Count unique target fields used
-        unique_targets = len(set(mapping.values()))
-
-        return {
-            'total_source_fields': total_fields,
-            'mapped_fields': mapped_fields,
-            'unmapped_fields': len(unmapped),
-            'coverage_percent': (mapped_fields / total_fields * 100) if total_fields > 0 else 0,
-            'unique_targets_used': unique_targets,
-            'unmapped_field_list': unmapped
-        }
--- a/backend/app/processors/file_detector.py
+++ b/backend/app/processors/file_detector.py
@ -1,97 +0,0 @@
-"""File type detection and routing."""
-
-from enum import Enum
-from pathlib import Path
-from typing import Optional
-import mimetypes
-
-class FileType(Enum):
-    """Supported file types."""
-    PDF = "pdf"
-    IMAGE = "image"
-    OFFICE_DOC = "office_doc"
-    OFFICE_SHEET = "office_sheet"
-    OFFICE_PRESENTATION = "office_presentation"
-    VIDEO = "video"
-    UNSUPPORTED = "unsupported"
-
-class FileDetector:
-    """Detect file type and route to appropriate handlers."""
-
-    # File extension mappings
-    PDF_EXTENSIONS = {'.pdf'}
-    IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.tiff', '.tif', '.bmp', '.webp'}
-    OFFICE_DOC_EXTENSIONS = {'.docx'}
-    OFFICE_SHEET_EXTENSIONS = {'.xlsx'}
-    OFFICE_PRESENTATION_EXTENSIONS = {'.pptx'}
-    VIDEO_EXTENSIONS = {'.mp4', '.mov', '.avi', '.mkv', '.m4v', '.wmv'}
-
-    @classmethod
-    def detect_file_type(cls, file_path: str) -> FileType:
-        """
-        Detect file type based on extension and MIME type.
-
-        Args:
-            file_path: Path to the file
-
-        Returns:
-            FileType enum value
-        """
-        path = Path(file_path)
-
-        if not path.exists():
-            raise FileNotFoundError(f"File not found: {file_path}")
-
-        extension = path.suffix.lower()
-
-        # Check by extension first
-        if extension in cls.PDF_EXTENSIONS:
-            return FileType.PDF
-        elif extension in cls.IMAGE_EXTENSIONS:
-            return FileType.IMAGE
-        elif extension in cls.OFFICE_DOC_EXTENSIONS:
-            return FileType.OFFICE_DOC
-        elif extension in cls.OFFICE_SHEET_EXTENSIONS:
-            return FileType.OFFICE_SHEET
-        elif extension in cls.OFFICE_PRESENTATION_EXTENSIONS:
-            return FileType.OFFICE_PRESENTATION
-        elif extension in cls.VIDEO_EXTENSIONS:
-            return FileType.VIDEO
-
-        # Fallback to MIME type check
-        mime_type, _ = mimetypes.guess_type(str(path))
-        if mime_type:
-            if 'pdf' in mime_type:
-                return FileType.PDF
-            elif 'image' in mime_type:
-                return FileType.IMAGE
-            elif 'video' in mime_type:
-                return FileType.VIDEO
-            elif 'officedocument.wordprocessingml' in mime_type:
-                return FileType.OFFICE_DOC
-            elif 'officedocument.spreadsheetml' in mime_type:
-                return FileType.OFFICE_SHEET
-            elif 'officedocument.presentationml' in mime_type:
-                return FileType.OFFICE_PRESENTATION
-
-        return FileType.UNSUPPORTED
-
-    @classmethod
-    def is_supported(cls, file_path: str) -> bool:
-        """Check if file type is supported."""
-        file_type = cls.detect_file_type(file_path)
-        return file_type != FileType.UNSUPPORTED
-
-    @classmethod
-    def get_file_type_name(cls, file_type: FileType) -> str:
-        """Get human-readable file type name."""
-        type_names = {
-            FileType.PDF: "PDF Document",
-            FileType.IMAGE: "Image",
-            FileType.OFFICE_DOC: "Word Document",
-            FileType.OFFICE_SHEET: "Excel Spreadsheet",
-            FileType.OFFICE_PRESENTATION: "PowerPoint Presentation",
-            FileType.VIDEO: "Video",
-            FileType.UNSUPPORTED: "Unsupported File"
-        }
-        return type_names.get(file_type, "Unknown")
--- a/backend/app/processors/metadata_analyzer.py
+++ b/backend/app/processors/metadata_analyzer.py
@ -1,424 +0,0 @@
-"""AI-powered metadata analysis using OpenAI GPT with production-ready features."""
-
-import json
-from openai import OpenAI
-from typing import Dict, Optional
-from .config import Config
-from .file_detector import FileType
-from .utils import get_logger, sanitize_metadata_value
-
-# Production-ready imports
-try:
-    import tiktoken
-    TIKTOKEN_AVAILABLE = True
-except ImportError:
-    TIKTOKEN_AVAILABLE = False
-
-try:
-    from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
-    TENACITY_AVAILABLE = True
-except ImportError:
-    TENACITY_AVAILABLE = False
-
-logger = get_logger(__name__)
-
-class MetadataAnalyzer:
-    """Analyze content and generate metadata using OpenAI GPT with production-ready error handling."""
-
-    # Valid OpenAI models (as of January 2026)
-    VALID_MODELS = [
-        # GPT-5 models (2026 release)
-        'gpt-5', 'gpt-5-mini', 'gpt-5-nano',
-        'gpt-5-mini-2025-08-07', 'gpt-5-nano-2025-08-07',
-        # GPT-4 models
-        'gpt-4o', 'gpt-4o-mini', 'gpt-4o-mini-2024-07-18',
-        'gpt-4-turbo', 'gpt-4', 'gpt-3.5-turbo',
-        # Reasoning models
-        'o1', 'o1-mini', 'o1-preview'
-    ]
-
-    def __init__(self):
-        """Initialize the analyzer with OpenAI client."""
-        if not Config.OPENAI_API_KEY:
-            raise ValueError("OpenAI API key not configured")
-
-        self.client = OpenAI(api_key=Config.OPENAI_API_KEY)
-        self.model = Config.AI_MODEL
-
-        # Validate model name
-        if not self._is_valid_model(self.model):
-            logger.warning(f"⚠️  Model '{self.model}' may not be valid. Valid models: {', '.join(self.VALID_MODELS)}")
-            logger.warning(f"⚠️  Using fallback model: gpt-4o-mini")
-            self.model = 'gpt-4o-mini'
-
-        self.max_tokens = Config.MAX_TOKENS
-        self.temperature = Config.TEMPERATURE
-
-        logger.info(f"Initialized MetadataAnalyzer with model: {self.model}")
-
-        # Initialize tiktoken encoding for proper token counting
-        if TIKTOKEN_AVAILABLE:
-            try:
-                self.encoding = tiktoken.encoding_for_model(self.model)
-            except KeyError:
-                # Fallback for models not in tiktoken registry
-                self.encoding = tiktoken.get_encoding("cl100k_base")
-        else:
-            self.encoding = None
-            logger.warning("tiktoken not available - using character-based truncation")
-
-    def _count_tokens(self, text: str) -> int:
-        """Count tokens using tiktoken (proper tokenization)."""
-        if self.encoding:
-            return len(self.encoding.encode(text))
-        else:
-            # Fallback: rough estimate (1 token ≈ 4 characters)
-            return len(text) // 4
-
-    def _truncate_content(self, content: str, max_tokens: int = 3000) -> str:
-        """Intelligently truncate content to fit token limit."""
-        if not self.encoding:
-            # Character-based fallback
-            max_chars = max_tokens * 4
-            if len(content) <= max_chars:
-                return content
-            return content[:max_chars]
-
-        tokens = self.encoding.encode(content)
-        if len(tokens) <= max_tokens:
-            return content
-
-        # Truncate and decode back
-        truncated_tokens = tokens[:max_tokens]
-        return self.encoding.decode(truncated_tokens)
-
-    def _is_valid_model(self, model: str) -> bool:
-        """Check if model name is valid."""
-        # Exact match
-        if model in self.VALID_MODELS:
-            return True
-        # Check if it starts with a valid prefix (for dated versions)
-        for valid_model in self.VALID_MODELS:
-            if model.startswith(valid_model):
-                return True
-        return False
-
-    def _is_new_model(self) -> bool:
-        """
-        Check if model is a new generation model.
-        New models (GPT-5, GPT-4o, o1) use max_completion_tokens and don't support custom temperature.
-        """
-        new_models = ['gpt-5', 'gpt-4o', 'gpt-4-turbo', 'o1']
-        return any(self.model.startswith(prefix) for prefix in new_models)
-
-    def _get_api_params(self) -> dict:
-        """
-        Get the correct API parameters based on model.
-        Newer models (GPT-5, GPT-4o, o1) use max_completion_tokens and don't support custom temperature.
-        Older models (GPT-3.5-turbo) use max_tokens and support temperature.
-        """
-        params = {}
-
-        # Token parameter
-        if self._is_new_model():
-            params['max_completion_tokens'] = self.max_tokens
-            # New models (GPT-5, GPT-4o, o1) don't support custom temperature (only default value 1)
-            logger.debug(f"Using max_completion_tokens for {self.model}")
-        else:
-            params['max_tokens'] = self.max_tokens
-            params['temperature'] = self.temperature
-            logger.debug(f"Using max_tokens + temperature for {self.model}")
-
-        return params
-
-    def _call_openai_api(self, messages: list) -> dict:
-        """
-        Call OpenAI API with automatic retry on failures.
-        Uses tenacity for exponential backoff if available.
-        """
-        # Get the correct API parameters
-        api_params = self._get_api_params()
-
-        if TENACITY_AVAILABLE:
-            # Use retry decorator dynamically
-            retry_decorator = retry(
-                stop=stop_after_attempt(Config.API_MAX_RETRIES),
-                wait=wait_exponential(multiplier=Config.API_RETRY_DELAY, min=2, max=10),
-                retry=retry_if_exception_type((Exception,)),
-                reraise=True
-            )
-
-            @retry_decorator
-            def _api_call():
-                return self.client.chat.completions.create(
-                    model=self.model,
-                    messages=messages,
-                    timeout=Config.API_TIMEOUT,
-                    **api_params
-                )
-
-            return _api_call()
-        else:
-            # Fallback: simple retry without exponential backoff
-            import time
-            last_error = None
-
-            for attempt in range(Config.API_MAX_RETRIES):
-                try:
-                    return self.client.chat.completions.create(
-                        model=self.model,
-                        messages=messages,
-                        timeout=Config.API_TIMEOUT,
-                        **api_params
-                    )
-                except Exception as e:
-                    last_error = e
-                    if attempt < Config.API_MAX_RETRIES - 1:
-                        wait_time = Config.API_RETRY_DELAY * (2 ** attempt)
-                        logger.warning(f"API call failed (attempt {attempt + 1}/{Config.API_MAX_RETRIES}), retrying in {wait_time}s: {e}")
-                        time.sleep(wait_time)
-
-            raise last_error
-
-    def analyze_content(self, content: str, filename: str, file_type: FileType) -> Dict[str, str]:
-        """
-        Analyze content and generate appropriate metadata with production-ready error handling.
-
-        Args:
-            content: Extracted text content
-            filename: Original filename
-            file_type: Type of file
-
-        Returns:
-            Dictionary with metadata (title, subject, keywords, _tokens_used, _confidence)
-        """
-        try:
-            # Truncate content if needed with proper token counting
-            content_tokens = self._count_tokens(content)
-            if content_tokens > Config.MAX_TEXT_LENGTH:
-                content = self._truncate_content(content, Config.MAX_TEXT_LENGTH)
-                logger.info(f"Truncated content from {content_tokens} to {self._count_tokens(content)} tokens")
-
-            # Generate prompt based on file type
-            prompt = self._create_prompt(content, filename, file_type)
-
-            # Count total tokens before API call
-            prompt_tokens = self._count_tokens(prompt)
-            logger.info(f"API call for {filename}: {prompt_tokens} prompt tokens")
-
-            # Call API with retry logic
-            response = self._call_openai_api([
-                {"role": "system", "content": "You are a metadata expert who generates professional, accurate metadata for documents in English."},
-                {"role": "user", "content": prompt}
-            ])
-
-            # Parse response with detailed logging
-            logger.info(f"API Response for {filename}:")
-            logger.info(f"  - Model used: {response.model}")
-            logger.info(f"  - Finish reason: {response.choices[0].finish_reason}")
-            logger.info(f"  - Tokens: prompt={response.usage.prompt_tokens}, completion={response.usage.completion_tokens}, total={response.usage.total_tokens}")
-
-            metadata_text = response.choices[0].message.content
-            logger.info(f"  - Content length: {len(metadata_text) if metadata_text else 0} chars")
-            logger.info(f"  - Content preview: {metadata_text[:200] if metadata_text else '(empty)'}")
-
-            # Check if content is None or empty
-            if not metadata_text or len(metadata_text.strip()) == 0:
-                logger.error(f"❌ API returned empty content for {filename}!")
-                logger.error(f"   This usually means:")
-                logger.error(f"   1. Invalid model name: {self.model}")
-                logger.error(f"   2. Model doesn't support this request type")
-                logger.error(f"   3. Content was filtered/refused")
-                logger.error(f"   Using fallback metadata instead.")
-                return self._generate_fallback_metadata(filename, file_type)
-
-            metadata = self._parse_metadata_response(metadata_text)
-
-            # Sanitize metadata values
-            metadata = {
-                key: sanitize_metadata_value(value)
-                for key, value in metadata.items()
-            }
-
-            # Add metadata about the generation
-            metadata['_tokens_used'] = response.usage.total_tokens
-            metadata['_confidence'] = 0.9  # Could calculate based on response
-
-            logger.info(f"Generated metadata for {filename} (tokens used: {metadata['_tokens_used']})")
-            return metadata
-
-        except Exception as e:
-            logger.error(f"Error analyzing content for {filename}: {e}")
-            # Return fallback metadata with error info
-            fallback = self._generate_fallback_metadata(filename, file_type)
-            fallback['_ai_error'] = str(e)
-            fallback['_tokens_used'] = 0
-            return fallback
-
-    def _create_prompt(self, content: str, filename: str, file_type: FileType) -> str:
-        """Create AI prompt based on file type."""
-        file_type_descriptions = {
-            FileType.PDF: "PDF document",
-            FileType.IMAGE: "image file",
-            FileType.OFFICE_DOC: "Word document",
-            FileType.OFFICE_SHEET: "Excel spreadsheet",
-            FileType.OFFICE_PRESENTATION: "PowerPoint presentation",
-            FileType.VIDEO: "video file"
-        }
-
-        file_desc = file_type_descriptions.get(file_type, "file")
-
-        prompt = f"""Analyze the following {file_desc} content and generate professional metadata in English.
-
-Filename: {filename}
-Content: {content}
-
-Generate metadata with these fields:
-1. Title: A concise, professional title (50-100 characters) that clearly describes the document/content
-2. Subject: A brief description (1-2 sentences) of the document's purpose and content
-3. Keywords: 5-10 relevant keywords separated by commas (include product names, categories, topics)
-
-Rules:
- All text MUST be in English
- Title should identify the main product/service and document type (e.g., "guide", "brochure", "manual")
- Subject should explain what the document is about and its purpose
- Keywords should be searchable terms relevant to the content
- Be professional and concise
- Return ONLY a JSON object with fields: title, subject, keywords
-
-Example output format:
-{{
-  "title": "3M Filtek Universal Restorative - Shade Selection Guide",
-  "subject": "Shade selection guide for 3M Filtek Universal Restorative dental material",
-  "keywords": "Filtek, Universal Restorative, shade selection, dental, restorative material, 3M, dentistry, composite"
-}}
-
-Return only the JSON object, no additional text."""
-
-        return prompt
-
-    def _parse_metadata_response(self, response_text: str) -> Dict[str, str]:
-        """Parse AI response into metadata dictionary."""
-        try:
-            # Try to parse as JSON first
-            response_text = response_text.strip()
-            logger.info(f"Parsing response (length={len(response_text)}): {response_text[:200]}")
-
-            # Remove markdown code blocks if present
-            if response_text.startswith('```'):
-                lines = response_text.split('\n')
-                # Find first and last code block markers
-                start_idx = 0
-                end_idx = len(lines)
-                for i, line in enumerate(lines):
-                    if line.startswith('```'):
-                        if start_idx == 0:
-                            start_idx = i + 1
-                        else:
-                            end_idx = i
-                            break
-                response_text = '\n'.join(lines[start_idx:end_idx])
-
-            # Try to find JSON object in text
-            # Look for { ... } pattern
-            start = response_text.find('{')
-            end = response_text.rfind('}')
-            if start != -1 and end != -1:
-                json_str = response_text[start:end+1]
-                metadata = json.loads(json_str)
-            else:
-                metadata = json.loads(response_text)
-
-            # Ensure all required fields are present
-            required_fields = ['title', 'subject', 'keywords']
-            for field in required_fields:
-                if field not in metadata:
-                    metadata[field] = ""
-
-            # Validate that we got actual content
-            if not metadata.get('title') or len(metadata.get('title', '').strip()) < 3:
-                logger.warning("JSON parsed but title is empty or too short, using text parsing")
-                return self._parse_metadata_text(response_text)
-
-            return metadata
-
-        except (json.JSONDecodeError, ValueError, KeyError) as e:
-            logger.warning(f"Failed to parse JSON response ({str(e)}), using text parsing")
-            return self._parse_metadata_text(response_text)
-
-    def _parse_metadata_text(self, text: str) -> Dict[str, str]:
-        """Parse metadata from plain text response."""
-        metadata = {
-            'title': '',
-            'subject': '',
-            'keywords': ''
-        }
-
-        # Improved text parsing
-        lines = text.split('\n')
-
-        for line in lines:
-            line = line.strip()
-            if not line or line.startswith('#') or line.startswith('//'):
-                continue
-
-            # Remove quotes and extra whitespace
-            line_clean = line.strip('"\'')
-
-            # Look for field indicators (case insensitive)
-            line_lower = line_clean.lower()
-
-            if ':' in line_clean:
-                parts = line_clean.split(':', 1)
-                key = parts[0].strip().lower()
-                value = parts[1].strip().strip('",\'')
-
-                if 'title' in key and not metadata['title']:
-                    metadata['title'] = value
-                elif 'subject' in key and not metadata['subject']:
-                    metadata['subject'] = value
-                elif 'keyword' in key and not metadata['keywords']:
-                    metadata['keywords'] = value
-
-        # If still empty, try to extract from unstructured text
-        if not metadata['title']:
-            # Look for first substantial line as title
-            for line in lines:
-                line = line.strip().strip('"\'')
-                if len(line) > 10 and not line.lower().startswith(('title', 'subject', 'keyword')):
-                    metadata['title'] = line[:200]  # Limit length
-                    break
-
-        logger.info(f"Text parsing result: title='{metadata['title'][:50]}...', subject='{metadata['subject'][:50]}...'")
-        return metadata
-
-    def _generate_fallback_metadata(self, filename: str, file_type: FileType) -> Dict[str, str]:
-        """Generate basic metadata based on filename when AI fails."""
-        # Remove extension and clean filename
-        from pathlib import Path
-        clean_name = Path(filename).stem.replace('_', ' ').replace('-', ' ')
-
-        return {
-            'title': clean_name,
-            'subject': f"{clean_name} - {FileType(file_type).value}",
-            'keywords': clean_name.replace(' ', ', ')
-        }
-
-    def generate_metadata_for_pdf(self, text: str) -> Dict[str, str]:
-        """Specialized metadata generation for PDF documents."""
-        # Wrapper for PDF-specific logic if needed
-        return self.analyze_content(text, "document.pdf", FileType.PDF)
-
-    def generate_metadata_for_image(self, text: str) -> Dict[str, str]:
-        """Specialized metadata generation for images."""
-        return self.analyze_content(text, "image.jpg", FileType.IMAGE)
-
-    def generate_metadata_for_office(self, text: str) -> Dict[str, str]:
-        """Specialized metadata generation for Office documents."""
-        return self.analyze_content(text, "document.docx", FileType.OFFICE_DOC)
-
-    def generate_metadata_for_video(self, metadata: Dict[str, str]) -> Dict[str, str]:
-        """Specialized metadata generation for videos."""
-        # For videos, we might use existing metadata as input
-        text = f"Video title: {metadata.get('title', 'N/A')}"
-        return self.analyze_content(text, "video.mp4", FileType.VIDEO)
--- a/backend/app/processors/metadata_importer.py
+++ b/backend/app/processors/metadata_importer.py
@ -1,427 +0,0 @@
-"""Metadata importer for external files (CSV, Excel, JSON)."""
-
-import pandas as pd
-import json
-from pathlib import Path
-from typing import Dict, Optional, List, Tuple
-from .utils import get_logger
-from .field_mapper import FieldMapper
-
-logger = get_logger(__name__)
-
-
-class MetadataImporter:
-    """Import metadata from various file formats (CSV, Excel, JSON)."""
-
-    def import_from_csv(self, csv_path: str) -> Dict[str, Dict]:
-        """
-        Import metadata from CSV file.
-        Expected columns: filename, title, subject/description, keywords
-
-        Args:
-            csv_path: Path to CSV file
-
-        Returns:
-            Dictionary mapping filename stems to metadata dicts
-        """
-        try:
-            df = pd.read_csv(csv_path, encoding='utf-8')
-            logger.info(f"Loaded CSV with {len(df)} rows from {csv_path}")
-            return self._parse_dataframe(df)
-
-        except UnicodeDecodeError:
-            # Try alternative encodings
-            for encoding in ['latin1', 'iso-8859-1', 'cp1252']:
-                try:
-                    df = pd.read_csv(csv_path, encoding=encoding)
-                    logger.info(f"Loaded CSV with {len(df)} rows using {encoding} encoding")
-                    return self._parse_dataframe(df)
-                except Exception:
-                    continue
-
-            raise ValueError(f"Could not read CSV file with any supported encoding")
-
-        except Exception as e:
-            logger.error(f"Error importing from CSV: {e}")
-            raise
-
-    def import_from_excel(self, excel_path: str, sheet_name: Optional[str] = None) -> Dict[str, Dict]:
-        """
-        Import metadata from Excel file.
-
-        Args:
-            excel_path: Path to Excel file (.xlsx, .xls)
-            sheet_name: Name of sheet to read (None = first sheet)
-
-        Returns:
-            Dictionary mapping filename stems to metadata dicts
-        """
-        try:
-            # Read Excel file
-            if sheet_name:
-                df = pd.read_excel(excel_path, sheet_name=sheet_name)
-                logger.info(f"Loaded Excel sheet '{sheet_name}' with {len(df)} rows")
-            else:
-                df = pd.read_excel(excel_path)
-                logger.info(f"Loaded Excel with {len(df)} rows from first sheet")
-
-            return self._parse_dataframe(df)
-
-        except Exception as e:
-            logger.error(f"Error importing from Excel: {e}")
-            raise
-
-    def import_from_json(self, json_path: str) -> Dict[str, Dict]:
-        """
-        Import metadata from JSON file.
-
-        Expected format:
-        {
-            "filename.pdf": {"title": "...", "subject": "...", "keywords": "..."},
-            "image.jpg": {"title": "...", "subject": "...", "keywords": "..."}
-        }
-
-        Or array format:
-        [
-            {"filename": "file.pdf", "title": "...", "subject": "...", "keywords": "..."},
-            {"filename": "image.jpg", "title": "...", "subject": "...", "keywords": "..."}
-        ]
-
-        Args:
-            json_path: Path to JSON file
-
-        Returns:
-            Dictionary mapping filename stems to metadata dicts
-        """
-        try:
-            with open(json_path, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-
-            metadata_map = {}
-
-            if isinstance(data, dict):
-                # Object format: {"filename": {metadata}}
-                for filename, metadata in data.items():
-                    filename_stem = Path(filename).stem.lower()
-                    metadata_map[filename_stem] = self._normalize_metadata(metadata)
-
-            elif isinstance(data, list):
-                # Array format: [{filename, metadata}]
-                for item in data:
-                    if not isinstance(item, dict):
-                        continue
-
-                    # Find filename field
-                    filename = None
-                    for key in ['filename', 'file', 'name', 'file_name']:
-                        if key in item:
-                            filename = item[key]
-                            break
-
-                    if not filename:
-                        logger.warning(f"Skipping item without filename: {item}")
-                        continue
-
-                    filename_stem = Path(filename).stem.lower()
-                    metadata_map[filename_stem] = self._normalize_metadata(item)
-
-            else:
-                raise ValueError("JSON must be an object or array")
-
-            logger.info(f"Loaded {len(metadata_map)} metadata records from JSON")
-            return metadata_map
-
-        except Exception as e:
-            logger.error(f"Error importing from JSON: {e}")
-            raise
-
-    def _parse_dataframe(self, df: pd.DataFrame) -> Dict[str, Dict]:
-        """
-        Parse pandas DataFrame into metadata map.
-
-        Args:
-            df: DataFrame with metadata
-
-        Returns:
-            Dictionary mapping filename stems to metadata dicts
-        """
-        metadata_map = {}
-
-        # Detect filename column (try common names)
-        filename_col = self._detect_column(df, ['filename', 'file', 'name', 'file_name', 'path'])
-
-        if not filename_col:
-            raise ValueError("Could not find filename column in data. Tried: filename, file, name, file_name, path")
-
-        # Detect metadata columns
-        title_col = self._detect_column(df, ['title', 'heading', 'name', 'document_title'])
-        subject_col = self._detect_column(df, ['subject', 'description', 'summary', 'desc', 'external_description', 'alt_text'])
-        keywords_col = self._detect_column(df, ['keywords', 'tags', 'categories', 'labels'])
-
-        logger.info(f"Detected columns - filename: {filename_col}, title: {title_col}, subject: {subject_col}, keywords: {keywords_col}")
-
-        # Parse rows
-        for _, row in df.iterrows():
-            filename = str(row.get(filename_col, '')).strip()
-            if not filename or pd.isna(filename):
-                continue
-
-            filename_stem = Path(filename).stem.lower()
-
-            metadata_map[filename_stem] = {
-                'title': self._get_value(row, title_col),
-                'subject': self._get_value(row, subject_col),
-                'keywords': self._get_value(row, keywords_col)
-            }
-
-        logger.info(f"Parsed {len(metadata_map)} metadata records from DataFrame")
-        return metadata_map
-
-    def _detect_column(self, df: pd.DataFrame, candidates: List[str]) -> Optional[str]:
-        """
-        Detect column name from a list of candidates (case-insensitive).
-
-        Args:
-            df: DataFrame to search
-            candidates: List of possible column names
-
-        Returns:
-            Actual column name if found, None otherwise
-        """
-        # Create lowercase mapping
-        col_map = {col.lower(): col for col in df.columns}
-
-        # Try each candidate
-        for candidate in candidates:
-            if candidate.lower() in col_map:
-                return col_map[candidate.lower()]
-
-        return None
-
-    def _get_value(self, row: pd.Series, column: Optional[str]) -> str:
-        """
-        Get value from row, handling None column and NaN values.
-
-        Args:
-            row: DataFrame row
-            column: Column name (can be None)
-
-        Returns:
-            String value or empty string
-        """
-        if column is None:
-            return ''
-
-        value = row.get(column, '')
-
-        if pd.isna(value):
-            return ''
-
-        return str(value).strip()
-
-    def _normalize_metadata(self, metadata: Dict) -> Dict[str, str]:
-        """
-        Normalize metadata dictionary to standard format.
-
-        Args:
-            metadata: Raw metadata dict
-
-        Returns:
-            Normalized metadata with title, subject, keywords keys
-        """
-        normalized = {
-            'title': '',
-            'subject': '',
-            'keywords': ''
-        }
-
-        # Map title
-        for key in ['title', 'heading', 'name', 'document_title']:
-            if key in metadata and metadata[key]:
-                normalized['title'] = str(metadata[key]).strip()
-                break
-
-        # Map subject/description
-        for key in ['subject', 'description', 'summary', 'desc', 'external_description', 'alt_text']:
-            if key in metadata and metadata[key]:
-                normalized['subject'] = str(metadata[key]).strip()
-                break
-
-        # Map keywords
-        for key in ['keywords', 'tags', 'categories', 'labels']:
-            if key in metadata and metadata[key]:
-                value = metadata[key]
-                # Handle arrays
-                if isinstance(value, list):
-                    normalized['keywords'] = ', '.join(str(v) for v in value)
-                else:
-                    normalized['keywords'] = str(value).strip()
-                break
-
-        return normalized
-
-    def get_metadata_for_file(self, metadata_map: Dict[str, Dict], filename: str) -> Optional[Dict[str, str]]:
-        """
-        Get metadata for a specific file from imported map.
-
-        Args:
-            metadata_map: Dictionary returned by import_* methods
-            filename: Filename to look up (with or without extension)
-
-        Returns:
-            Metadata dict if found, None otherwise
-        """
-        filename_stem = Path(filename).stem.lower()
-        return metadata_map.get(filename_stem)
-
-    def validate_import(self, metadata_map: Dict[str, Dict]) -> Dict:
-        """
-        Validate imported metadata and return statistics.
-
-        Args:
-            metadata_map: Dictionary returned by import_* methods
-
-        Returns:
-            Statistics about the import
-        """
-        stats = {
-            'total_records': len(metadata_map),
-            'with_title': 0,
-            'with_subject': 0,
-            'with_keywords': 0,
-            'empty_records': 0
-        }
-
-        for metadata in metadata_map.values():
-            if metadata.get('title'):
-                stats['with_title'] += 1
-            if metadata.get('subject'):
-                stats['with_subject'] += 1
-            if metadata.get('keywords'):
-                stats['with_keywords'] += 1
-
-            if not any([metadata.get('title'), metadata.get('subject'), metadata.get('keywords')]):
-                stats['empty_records'] += 1
-
-        return stats
-
-    def preview_file_structure(self, file_path: str, file_type: str = 'auto') -> Tuple[List[str], List[Dict], Dict]:
-        """
-        Preview file structure and suggest field mappings without importing.
-
-        Args:
-            file_path: Path to file (CSV, Excel, JSON)
-            file_type: File type ('csv', 'excel', 'json', or 'auto')
-
-        Returns:
-            Tuple of (column_names, sample_rows, suggested_mapping)
-        """
-        if file_type == 'auto':
-            ext = Path(file_path).suffix.lower()
-            if ext == '.csv':
-                file_type = 'csv'
-            elif ext in ['.xlsx', '.xls']:
-                file_type = 'excel'
-            elif ext == '.json':
-                file_type = 'json'
-            else:
-                raise ValueError(f"Unsupported file type: {ext}")
-
-        # Load file
-        if file_type == 'csv':
-            df = pd.read_csv(file_path, encoding='utf-8', nrows=10)
-        elif file_type == 'excel':
-            df = pd.read_excel(file_path, nrows=10)
-        elif file_type == 'json':
-            with open(file_path, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-                if isinstance(data, list) and len(data) > 0:
-                    df = pd.DataFrame(data[:10])
-                elif isinstance(data, dict):
-                    # Convert dict to list
-                    items = [{'filename': k, **v} for k, v in list(data.items())[:10]]
-                    df = pd.DataFrame(items)
-                else:
-                    raise ValueError("JSON format not supported for preview")
-
-        # Get column names
-        columns = df.columns.tolist()
-
-        # Get sample rows
-        sample_rows = df.head(5).to_dict('records')
-
-        # Suggest field mapping
-        mapper = FieldMapper()
-        suggestions = mapper.suggest_mapping(columns)
-
-        return (columns, sample_rows, suggestions)
-
-    def import_with_mapping(self, file_path: str, mapping: Dict[str, str], file_type: str = 'auto') -> Dict[str, Dict]:
-        """
-        Import file with custom field mapping.
-
-        Args:
-            file_path: Path to file
-            mapping: Field mapping {source_field: target_field}
-            file_type: File type ('csv', 'excel', 'json', or 'auto')
-
-        Returns:
-            Dictionary mapping filename stems to metadata dicts
-        """
-        # Load file
-        if file_type == 'auto':
-            ext = Path(file_path).suffix.lower()
-            if ext == '.csv':
-                file_type = 'csv'
-            elif ext in ['.xlsx', '.xls']:
-                file_type = 'excel'
-            elif ext == '.json':
-                file_type = 'json'
-
-        if file_type == 'csv':
-            df = pd.read_csv(file_path, encoding='utf-8')
-        elif file_type == 'excel':
-            df = pd.read_excel(file_path)
-        elif file_type == 'json':
-            with open(file_path, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-                if isinstance(data, list):
-                    df = pd.DataFrame(data)
-                elif isinstance(data, dict):
-                    items = [{'filename': k, **v} for k, v in data.items()]
-                    df = pd.DataFrame(items)
-
-        # Apply field mapper
-        mapper = FieldMapper()
-        metadata_map = {}
-
-        # Find filename column
-        filename_col = None
-        for col in df.columns:
-            if col.lower() in ['filename', 'file', 'name', 'file_name']:
-                filename_col = col
-                break
-
-        if not filename_col:
-            raise ValueError("Could not find filename column")
-
-        # Process each row
-        for _, row in df.iterrows():
-            filename = str(row.get(filename_col, '')).strip()
-            if not filename or pd.isna(filename):
-                continue
-
-            filename_stem = Path(filename).stem.lower()
-
-            # Apply mapping to transform row data
-            row_dict = row.to_dict()
-            metadata = mapper.apply_mapping(row_dict, mapping)
-
-            metadata_map[filename_stem] = {
-                'title': str(metadata.get('title', '')).strip(),
-                'subject': str(metadata.get('subject', '')).strip(),
-                'keywords': str(metadata.get('keywords', '')).strip()
-            }
-
-        logger.info(f"Imported {len(metadata_map)} records with custom mapping")
-        return metadata_map
--- a/backend/app/processors/template_manager.py
+++ b/backend/app/processors/template_manager.py
@ -1,410 +0,0 @@
-"""Metadata template manager with variable substitution."""
-
-import json
-from pathlib import Path
-from typing import Dict, List, Optional
-from datetime import datetime
-from .utils import get_logger
-
-logger = get_logger(__name__)
-
-
-class TemplateManager:
-    """Manage metadata templates with variable substitution."""
-
-    # Available variables for substitution
-    AVAILABLE_VARIABLES = {
-        '{filename}': 'Original filename without extension',
-        '{date}': 'Current date (YYYY-MM-DD)',
-        '{datetime}': 'Current date and time',
-        '{user}': 'Current username',
-        '{year}': 'Current year',
-        '{month}': 'Current month',
-        '{day}': 'Current day'
-    }
-
-    def __init__(self, templates_path: Optional[str] = None):
-        """
-        Initialize template manager.
-
-        Args:
-            templates_path: Path to JSON file for storing templates
-        """
-        self.templates_path = templates_path or 'metadata_templates.json'
-
-    def create_template(
-        self,
-        name: str,
-        title_template: str,
-        subject_template: str,
-        keywords_template: str,
-        description: str = ''
-    ) -> Dict:
-        """
-        Create a new metadata template.
-
-        Args:
-            name: Template name
-            title_template: Title template with variables (e.g., "{filename} - Product Guide")
-            subject_template: Subject template with variables
-            keywords_template: Keywords template with variables
-            description: Optional description of template usage
-
-        Returns:
-            Template dictionary
-        """
-        template = {
-            'name': name,
-            'description': description,
-            'title': title_template,
-            'subject': subject_template,
-            'keywords': keywords_template,
-            'created_at': self._get_timestamp(),
-            'updated_at': self._get_timestamp()
-        }
-
-        # Validate template
-        validation = self.validate_template(template)
-        if validation['invalid']:
-            logger.warning(f"Template '{name}' has invalid variables: {validation['invalid']}")
-
-        return template
-
-    def save_template(self, template: Dict) -> bool:
-        """
-        Save template to storage.
-
-        Args:
-            template: Template dictionary
-
-        Returns:
-            True if successful
-        """
-        try:
-            templates = self._load_templates()
-            template['updated_at'] = self._get_timestamp()
-            templates[template['name']] = template
-
-            with open(self.templates_path, 'w', encoding='utf-8') as f:
-                json.dump(templates, f, indent=2, ensure_ascii=False)
-
-            logger.info(f"Saved template: {template['name']}")
-            return True
-
-        except Exception as e:
-            logger.error(f"Failed to save template '{template['name']}': {e}")
-            return False
-
-    def load_template(self, name: str) -> Optional[Dict]:
-        """
-        Load template by name.
-
-        Args:
-            name: Template name
-
-        Returns:
-            Template dictionary or None if not found
-        """
-        templates = self._load_templates()
-        template = templates.get(name)
-
-        if template:
-            logger.info(f"Loaded template: {name}")
-        else:
-            logger.warning(f"Template not found: {name}")
-
-        return template
-
-    def list_templates(self) -> List[Dict]:
-        """
-        List all available templates.
-
-        Returns:
-            List of template summaries
-        """
-        templates = self._load_templates()
-
-        return [
-            {
-                'name': name,
-                'description': data.get('description', ''),
-                'created_at': data.get('created_at', ''),
-                'updated_at': data.get('updated_at', ''),
-                'variables_used': self._extract_variables(data)
-            }
-            for name, data in templates.items()
-        ]
-
-    def delete_template(self, name: str) -> bool:
-        """
-        Delete a template.
-
-        Args:
-            name: Template name
-
-        Returns:
-            True if deleted, False if not found
-        """
-        templates = self._load_templates()
-
-        if name in templates:
-            del templates[name]
-
-            try:
-                with open(self.templates_path, 'w', encoding='utf-8') as f:
-                    json.dump(templates, f, indent=2, ensure_ascii=False)
-
-                logger.info(f"Deleted template: {name}")
-                return True
-            except Exception as e:
-                logger.error(f"Failed to delete template '{name}': {e}")
-                return False
-
-        logger.warning(f"Template not found: {name}")
-        return False
-
-    def apply_template(
-        self,
-        template: Dict,
-        filename: str,
-        user: str = 'Unknown',
-        custom_vars: Optional[Dict[str, str]] = None
-    ) -> Dict[str, str]:
-        """
-        Apply template to generate metadata for a file.
-
-        Args:
-            template: Template dictionary
-            filename: Filename to process
-            user: Username for {user} variable
-            custom_vars: Additional custom variables (e.g., {'product_line': 'Dental'})
-
-        Returns:
-            Dictionary with title, subject, keywords
-        """
-        # Build variable substitution map
-        variables = self._build_variable_map(filename, user, custom_vars)
-
-        # Apply substitutions
-        metadata = {
-            'title': self._substitute_variables(template.get('title', ''), variables),
-            'subject': self._substitute_variables(template.get('subject', ''), variables),
-            'keywords': self._substitute_variables(template.get('keywords', ''), variables)
-        }
-
-        logger.info(f"Applied template '{template['name']}' to {filename}")
-        return metadata
-
-    def validate_template(self, template: Dict) -> Dict[str, List[str]]:
-        """
-        Validate template for correct variable usage.
-
-        Args:
-            template: Template dictionary
-
-        Returns:
-            Dictionary with 'valid' and 'invalid' variable lists
-        """
-        result = {
-            'valid': [],
-            'invalid': []
-        }
-
-        # Extract all variables from template
-        all_text = (
-            template.get('title', '') +
-            template.get('subject', '') +
-            template.get('keywords', '')
-        )
-
-        # Find all {variable} patterns
-        import re
-        variables = re.findall(r'\{[^}]+\}', all_text)
-
-        for var in variables:
-            if var in self.AVAILABLE_VARIABLES:
-                if var not in result['valid']:
-                    result['valid'].append(var)
-            else:
-                if var not in result['invalid']:
-                    result['invalid'].append(var)
-
-        return result
-
-    def _load_templates(self) -> Dict:
-        """Load all templates from file."""
-        if Path(self.templates_path).exists():
-            try:
-                with open(self.templates_path, 'r', encoding='utf-8') as f:
-                    return json.load(f)
-            except Exception as e:
-                logger.error(f"Failed to load templates: {e}")
-                return {}
-        return {}
-
-    def _get_timestamp(self) -> str:
-        """Get current timestamp as ISO format string."""
-        return datetime.now().isoformat()
-
-    def _build_variable_map(
-        self,
-        filename: str,
-        user: str,
-        custom_vars: Optional[Dict[str, str]]
-    ) -> Dict[str, str]:
-        """
-        Build variable substitution map.
-
-        Args:
-            filename: Filename (with or without extension)
-            user: Username
-            custom_vars: Custom variables
-
-        Returns:
-            Dictionary mapping variable names to values
-        """
-        # Get filename without extension
-        filename_stem = Path(filename).stem
-
-        # Current date/time
-        now = datetime.now()
-
-        variables = {
-            '{filename}': filename_stem,
-            '{date}': now.strftime('%Y-%m-%d'),
-            '{datetime}': now.strftime('%Y-%m-%d %H:%M:%S'),
-            '{user}': user,
-            '{year}': str(now.year),
-            '{month}': now.strftime('%m'),
-            '{day}': now.strftime('%d')
-        }
-
-        # Add custom variables
-        if custom_vars:
-            for key, value in custom_vars.items():
-                # Ensure custom variables are wrapped in {}
-                var_key = f'{{{key}}}' if not key.startswith('{') else key
-                variables[var_key] = value
-
-        return variables
-
-    def _substitute_variables(self, template_text: str, variables: Dict[str, str]) -> str:
-        """
-        Substitute variables in template text.
-
-        Args:
-            template_text: Text with {variable} placeholders
-            variables: Variable substitution map
-
-        Returns:
-            Text with variables replaced
-        """
-        result = template_text
-
-        for var, value in variables.items():
-            result = result.replace(var, value)
-
-        return result
-
-    def _extract_variables(self, template: Dict) -> List[str]:
-        """
-        Extract all variables used in a template.
-
-        Args:
-            template: Template dictionary
-
-        Returns:
-            List of variable names (e.g., ['{filename}', '{date}'])
-        """
-        import re
-        all_text = (
-            template.get('title', '') +
-            template.get('subject', '') +
-            template.get('keywords', '')
-        )
-
-        variables = re.findall(r'\{[^}]+\}', all_text)
-        return list(set(variables))
-
-    def get_available_variables(self) -> Dict[str, str]:
-        """
-        Get list of available variables with descriptions.
-
-        Returns:
-            Dictionary mapping variable names to descriptions
-        """
-        return self.AVAILABLE_VARIABLES.copy()
-
-    def preview_template(
-        self,
-        template: Dict,
-        sample_filename: str = 'example.pdf',
-        user: str = 'User',
-        custom_vars: Optional[Dict[str, str]] = None
-    ) -> Dict[str, str]:
-        """
-        Preview template output with sample data.
-
-        Args:
-            template: Template dictionary
-            sample_filename: Sample filename for preview
-            user: Sample username
-            custom_vars: Sample custom variables
-
-        Returns:
-            Preview metadata
-        """
-        return self.apply_template(template, sample_filename, user, custom_vars)
-
-    def export_template(self, name: str, export_path: str) -> bool:
-        """
-        Export single template to JSON file.
-
-        Args:
-            name: Template name
-            export_path: Path to save template
-
-        Returns:
-            True if successful
-        """
-        template = self.load_template(name)
-        if not template:
-            return False
-
-        try:
-            with open(export_path, 'w', encoding='utf-8') as f:
-                json.dump(template, f, indent=2, ensure_ascii=False)
-
-            logger.info(f"Exported template '{name}' to {export_path}")
-            return True
-
-        except Exception as e:
-            logger.error(f"Failed to export template '{name}': {e}")
-            return False
-
-    def import_template(self, import_path: str) -> Optional[Dict]:
-        """
-        Import template from JSON file.
-
-        Args:
-            import_path: Path to template JSON file
-
-        Returns:
-            Imported template dictionary or None
-        """
-        try:
-            with open(import_path, 'r', encoding='utf-8') as f:
-                template = json.load(f)
-
-            # Validate required fields
-            required_fields = ['name', 'title', 'subject', 'keywords']
-            if not all(field in template for field in required_fields):
-                logger.error(f"Invalid template file: missing required fields")
-                return None
-
-            logger.info(f"Imported template from {import_path}")
-            return template
-
-        except Exception as e:
-            logger.error(f"Failed to import template: {e}")
-            return None
--- a/backend/app/processors/updaters/init.py
+++ b/backend/app/processors/updaters/init.py
@ -1 +0,0 @@
-"""Metadata updaters for different file types."""
--- a/backend/app/processors/updaters/exiftool_updater.py
+++ b/backend/app/processors/updaters/exiftool_updater.py
@ -1,223 +0,0 @@
-"""Unified metadata updater using ExifTool for images, video, and PDF files."""
-
-from typing import Dict
-from pathlib import Path
-import logging
-
-try:
-    from exiftool import ExifToolHelper
-    EXIFTOOL_AVAILABLE = True
-except ImportError:
-    EXIFTOOL_AVAILABLE = False
-
-from ..base_updater import BaseUpdater
-from ..utils import get_logger, create_backup
-
-logger = get_logger(__name__)
-
-
-class ExifToolUpdater(BaseUpdater):
-    """
-    Update metadata using ExifTool.
-
-    Supports images (JPEG, PNG, GIF, TIFF, HEIC, RAW),
-    videos (MP4, MOV, AVI, MKV), and PDF files.
-
-    Provides a unified API for metadata updates across all supported formats.
-    """
-
-    def __init__(self):
-        """Initialize ExifTool updater."""
-        if not EXIFTOOL_AVAILABLE:
-            raise ImportError(
-                "PyExifTool not installed. Install with: pip install PyExifTool>=0.5.6\n"
-                "Also ensure ExifTool is installed on your system."
-            )
-
-    def update_metadata(self, file_path: str, metadata: Dict[str, str], backup: bool = True) -> bool:
-        """
-        Update file metadata using ExifTool.
-
-        Writes title, subject, and keywords to appropriate metadata fields
-        based on file type (images use EXIF/IPTC/XMP, PDFs use PDF fields, etc.).
-
-        Args:
-            file_path: Path to the file
-            metadata: Dictionary with 'title', 'subject', 'keywords' keys
-            backup: Whether to create backup before updating (default: True)
-
-        Returns:
-            True if successful, False otherwise
-        """
-        try:
-            # Validate metadata
-            if not self.validate_metadata(metadata):
-                logger.error(f"Invalid metadata for {file_path}")
-                return False
-
-            # Create backup if requested
-            if backup:
-                backup_path = create_backup(file_path)
-                if not backup_path:
-                    logger.warning(f"Failed to create backup for {file_path}, proceeding anyway")
-
-            # Build ExifTool tags dict
-            updates = {}
-
-            # Determine file type and set appropriate tags
-            file_ext = Path(file_path).suffix.lower()
-
-            if self._is_image(file_ext):
-                updates = self._build_image_tags(metadata)
-            elif self._is_video(file_ext):
-                updates = self._build_video_tags(metadata)
-            elif self._is_pdf(file_ext):
-                updates = self._build_pdf_tags(metadata)
-            else:
-                logger.warning(f"Unknown file type {file_ext}, trying generic metadata tags")
-                updates = self._build_generic_tags(metadata)
-
-            # Apply updates using ExifTool
-            if not updates:
-                logger.warning(f"No metadata tags to update for {file_path}")
-                return True
-
-            with ExifToolHelper() as et:
-                et.set_tags(
-                    [file_path],
-                    tags=updates,
-                    params=["-overwrite_original", "-P"]  # Preserve file modification date
-                )
-
-            logger.info(f"Successfully updated metadata for {Path(file_path).name}")
-
-            # Verify the update
-            if self.verify_update(file_path, metadata):
-                logger.info(f"Metadata verification passed for {Path(file_path).name}")
-                return True
-            else:
-                logger.warning(f"Metadata verification failed for {Path(file_path).name}, but update succeeded")
-                return True  # Still return True as update itself worked
-
-        except Exception as e:
-            logger.error(f"Failed to update metadata for {file_path}: {e}")
-            return False
-
-    def verify_update(self, file_path: str, expected_metadata: Dict[str, str]) -> bool:
-        """
-        Verify that metadata was successfully written to the file.
-
-        Args:
-            file_path: Path to the file
-            expected_metadata: Metadata that was supposed to be written
-
-        Returns:
-            True if verification passes, False otherwise
-        """
-        try:
-            from .exiftool_extractor import ExifToolExtractor
-            extractor = ExifToolExtractor()
-            actual_metadata = extractor.read_metadata(file_path)
-
-            # Check each field (allow partial matches for verification)
-            for key in ['title', 'subject', 'keywords']:
-                expected = expected_metadata.get(key, '').strip()
-                actual = actual_metadata.get(key, '').strip()
-
-                if expected and expected not in actual:
-                    logger.warning(f"Verification mismatch for {key}: expected '{expected}', got '{actual}'")
-                    return False
-
-            return True
-
-        except Exception as e:
-            logger.error(f"Verification failed for {file_path}: {e}")
-            return False
-
-    def _is_image(self, ext: str) -> bool:
-        """Check if file extension is an image format."""
-        image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.tif', '.tiff', '.bmp', '.webp', '.heic', '.heif'}
-        return ext in image_exts
-
-    def _is_video(self, ext: str) -> bool:
-        """Check if file extension is a video format."""
-        video_exts = {'.mp4', '.mov', '.avi', '.mkv', '.m4v', '.wmv', '.flv', '.webm'}
-        return ext in video_exts
-
-    def _is_pdf(self, ext: str) -> bool:
-        """Check if file extension is PDF."""
-        return ext == '.pdf'
-
-    def _build_image_tags(self, metadata: Dict[str, str]) -> Dict[str, str]:
-        """
-        Build ExifTool tags for image files.
-
-        Uses EXIF, IPTC, and XMP tags for maximum compatibility.
-        """
-        tags = {}
-
-        if metadata.get('title'):
-            tags['EXIF:ImageDescription'] = metadata['title']
-            tags['IPTC:Headline'] = metadata['title']
-            tags['XMP:Title'] = metadata['title']
-
-        if metadata.get('subject'):
-            tags['EXIF:XPSubject'] = metadata['subject']
-            tags['IPTC:Caption-Abstract'] = metadata['subject']
-            tags['XMP:Description'] = metadata['subject']
-
-        if metadata.get('keywords'):
-            tags['EXIF:XPKeywords'] = metadata['keywords']
-            tags['IPTC:Keywords'] = metadata['keywords']
-            tags['XMP:Subject'] = metadata['keywords']
-
-        return tags
-
-    def _build_video_tags(self, metadata: Dict[str, str]) -> Dict[str, str]:
-        """Build ExifTool tags for video files."""
-        tags = {}
-
-        if metadata.get('title'):
-            tags['QuickTime:Title'] = metadata['title']
-            tags['UserData:Title'] = metadata['title']
-
-        if metadata.get('subject'):
-            tags['QuickTime:Description'] = metadata['subject']
-            tags['UserData:Description'] = metadata['subject']
-
-        if metadata.get('keywords'):
-            tags['QuickTime:Keywords'] = metadata['keywords']
-
-        return tags
-
-    def _build_pdf_tags(self, metadata: Dict[str, str]) -> Dict[str, str]:
-        """Build ExifTool tags for PDF files."""
-        tags = {}
-
-        if metadata.get('title'):
-            tags['PDF:Title'] = metadata['title']
-
-        if metadata.get('subject'):
-            tags['PDF:Subject'] = metadata['subject']
-
-        if metadata.get('keywords'):
-            tags['PDF:Keywords'] = metadata['keywords']
-
-        return tags
-
-    def _build_generic_tags(self, metadata: Dict[str, str]) -> Dict[str, str]:
-        """Build generic metadata tags for unknown file types."""
-        tags = {}
-
-        # Try common tags that might work
-        if metadata.get('title'):
-            tags['Title'] = metadata['title']
-
-        if metadata.get('subject'):
-            tags['Description'] = metadata['subject']
-            tags['Subject'] = metadata['subject']
-
-        if metadata.get('keywords'):
-            tags['Keywords'] = metadata['keywords']
-
-        return tags
--- a/backend/app/processors/updaters/image_updater.py
+++ b/backend/app/processors/updaters/image_updater.py
@ -1,221 +0,0 @@
-"""Image metadata updater."""
-
-import piexif
-from PIL import Image
-from PIL.PngImagePlugin import PngInfo
-from typing import Dict
-from pathlib import Path
-
-from ..base_updater import BaseUpdater
-from ..utils import get_logger, create_backup, sanitize_metadata_value
-
-logger = get_logger(__name__)
-
-
-class ImageUpdater(BaseUpdater):
-    """Updater for image file metadata (JPEG, PNG)."""
-
-    SUPPORTED_FORMATS = ['jpg', 'jpeg', 'png', 'gif', 'bmp']
-
-    def update_metadata(self, file_path: str, metadata: Dict[str, str], backup: bool = True) -> bool:
-        """
-        Update image metadata using EXIF for JPEG and PIL for PNG.
-
-        Args:
-            file_path: Path to the image file
-            metadata: Dictionary with 'title', 'subject', 'keywords' keys
-            backup: Whether to create backup before updating
-
-        Returns:
-            True if successful, False otherwise
-        """
-        try:
-            # Validate metadata
-            if not self.validate_metadata(metadata):
-                logger.error(f"Invalid metadata for {file_path}")
-                return False
-
-            # Check file format
-            file_ext = file_path.lower().split('.')[-1]
-            if file_ext not in self.SUPPORTED_FORMATS:
-                logger.error(f"Unsupported image format: {file_ext}")
-                return False
-
-            # Create backup if requested
-            if backup:
-                backup_path = create_backup(file_path)
-                if not backup_path:
-                    logger.warning(f"Failed to create backup for {file_path}, proceeding anyway")
-
-            # Route to appropriate update method
-            if file_ext in ['jpg', 'jpeg']:
-                success = self._update_jpeg_metadata(file_path, metadata)
-            elif file_ext == 'png':
-                success = self._update_png_metadata(file_path, metadata)
-            else:
-                # For GIF, BMP and other formats - skip metadata update
-                # These formats don't support metadata in the same way
-                logger.warning(f"Metadata update not supported for {file_ext} format")
-                return True  # Return success to not block the workflow
-
-            if success:
-                logger.info(f"Successfully updated metadata for {file_path}")
-            else:
-                logger.error(f"Failed to update metadata for {file_path}")
-
-            return success
-
-        except Exception as e:
-            logger.error(f"Failed to update image metadata for {file_path}: {e}", exc_info=True)
-            return False
-
-    def _update_jpeg_metadata(self, file_path: str, metadata: Dict[str, str]) -> bool:
-        """
-        Update JPEG metadata using EXIF.
-
-        Args:
-            file_path: Path to JPEG file
-            metadata: Metadata dictionary
-
-        Returns:
-            True if successful
-        """
-        try:
-            # Sanitize metadata
-            title = sanitize_metadata_value(metadata.get('title', ''), max_length=200)
-            subject = sanitize_metadata_value(metadata.get('subject', ''), max_length=300)
-            keywords = sanitize_metadata_value(metadata.get('keywords', ''), max_length=500)
-
-            # Read existing EXIF
-            try:
-                exif_dict = piexif.load(file_path)
-            except (piexif.InvalidImageDataError, FileNotFoundError):
-                exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}}
-
-            # Update metadata fields
-            exif_dict["0th"][piexif.ImageIFD.ImageDescription] = title.encode('utf-8')
-            exif_dict["0th"][piexif.ImageIFD.XPSubject] = subject.encode('utf-8')
-            exif_dict["0th"][piexif.ImageIFD.XPKeywords] = keywords.encode('utf-8')
-
-            # Encode EXIF data
-            exif_bytes = piexif.dump(exif_dict)
-
-            # Open image and save with new EXIF
-            image = Image.open(file_path)
-            image.save(file_path, exif=exif_bytes)
-
-            logger.debug(f"Updated JPEG metadata - Title: {title}, Subject: {subject}, Keywords: {keywords}")
-            return True
-
-        except Exception as e:
-            logger.error(f"Failed to update JPEG metadata: {e}", exc_info=True)
-            return False
-
-    def _update_png_metadata(self, file_path: str, metadata: Dict[str, str]) -> bool:
-        """
-        Update PNG metadata using PIL.
-
-        Args:
-            file_path: Path to PNG file
-            metadata: Metadata dictionary
-
-        Returns:
-            True if successful
-        """
-        try:
-            # Sanitize metadata
-            title = sanitize_metadata_value(metadata.get('title', ''), max_length=200)
-            subject = sanitize_metadata_value(metadata.get('subject', ''), max_length=300)
-            keywords = sanitize_metadata_value(metadata.get('keywords', ''), max_length=500)
-
-            # Open image
-            image = Image.open(file_path)
-
-            # Create metadata dictionary
-            pnginfo = PngInfo()
-            pnginfo.add_text("Title", title)
-            pnginfo.add_text("Subject", subject)
-            pnginfo.add_text("Keywords", keywords)
-
-            # Save image with new metadata
-            image.save(file_path, pnginfo=pnginfo)
-
-            logger.debug(f"Updated PNG metadata - Title: {title}, Subject: {subject}, Keywords: {keywords}")
-            return True
-
-        except Exception as e:
-            logger.error(f"Failed to update PNG metadata: {e}", exc_info=True)
-            return False
-
-    def verify_metadata(self, file_path: str, expected_metadata: Dict[str, str]) -> bool:
-        """
-        Verify that metadata was written correctly to image.
-
-        Args:
-            file_path: Path to the image file
-            expected_metadata: Expected metadata values
-
-        Returns:
-            True if metadata matches expected values, False otherwise
-        """
-        try:
-            file_ext = file_path.lower().split('.')[-1]
-
-            if file_ext in ['jpg', 'jpeg']:
-                return self._verify_jpeg_metadata(file_path, expected_metadata)
-            else:
-                return self._verify_png_metadata(file_path, expected_metadata)
-
-        except Exception as e:
-            logger.error(f"Failed to verify image metadata for {file_path}: {e}", exc_info=True)
-            return False
-
-    def _verify_jpeg_metadata(self, file_path: str, expected_metadata: Dict[str, str]) -> bool:
-        """Verify JPEG metadata."""
-        try:
-            exif_dict = piexif.load(file_path)
-
-            expected_title = sanitize_metadata_value(expected_metadata.get('title', ''), max_length=200)
-            expected_subject = sanitize_metadata_value(expected_metadata.get('subject', ''), max_length=300)
-            expected_keywords = sanitize_metadata_value(expected_metadata.get('keywords', ''), max_length=500)
-
-            # Check fields
-            actual_title = exif_dict["0th"].get(piexif.ImageIFD.ImageDescription, b"").decode('utf-8', errors='ignore')
-            actual_subject = exif_dict["0th"].get(piexif.ImageIFD.XPSubject, b"").decode('utf-8', errors='ignore')
-            actual_keywords = exif_dict["0th"].get(piexif.ImageIFD.XPKeywords, b"").decode('utf-8', errors='ignore')
-
-            if actual_title == expected_title and actual_subject == expected_subject and actual_keywords == expected_keywords:
-                logger.info(f"Metadata verification successful for {file_path}")
-                return True
-            else:
-                logger.warning(f"Metadata verification failed for {file_path}")
-                return False
-
-        except Exception as e:
-            logger.debug(f"JPEG metadata verification failed: {e}")
-            return False
-
-    def _verify_png_metadata(self, file_path: str, expected_metadata: Dict[str, str]) -> bool:
-        """Verify PNG metadata."""
-        try:
-            image = Image.open(file_path)
-
-            expected_title = sanitize_metadata_value(expected_metadata.get('title', ''), max_length=200)
-            expected_subject = sanitize_metadata_value(expected_metadata.get('subject', ''), max_length=300)
-            expected_keywords = sanitize_metadata_value(expected_metadata.get('keywords', ''), max_length=500)
-
-            # Check metadata
-            actual_title = image.info.get('Title', '').strip()
-            actual_subject = image.info.get('Subject', '').strip()
-            actual_keywords = image.info.get('Keywords', '').strip()
-
-            if actual_title == expected_title and actual_subject == expected_subject and actual_keywords == expected_keywords:
-                logger.info(f"Metadata verification successful for {file_path}")
-                return True
-            else:
-                logger.warning(f"Metadata verification failed for {file_path}")
-                return False
-
-        except Exception as e:
-            logger.debug(f"PNG metadata verification failed: {e}")
-            return False
--- a/backend/app/processors/updaters/office_updater.py
+++ b/backend/app/processors/updaters/office_updater.py
@ -1,253 +0,0 @@
-"""Office document metadata updater."""
-
-from docx import Document as DocxDocument
-from openpyxl import load_workbook
-from pptx import Presentation
-from typing import Dict
-
-from ..base_updater import BaseUpdater
-from ..utils import get_logger, create_backup, sanitize_metadata_value
-
-logger = get_logger(__name__)
-
-
-class OfficeUpdater(BaseUpdater):
-    """Updater for Office file metadata (DOCX, XLSX, PPTX)."""
-
-    SUPPORTED_FORMATS = ['docx', 'xlsx', 'pptx']
-
-    def update_metadata(self, file_path: str, metadata: Dict[str, str], backup: bool = True) -> bool:
-        """
-        Update Office document metadata.
-
-        Updates core properties (title, subject, keywords) for DOCX, XLSX, and PPTX files.
-
-        Args:
-            file_path: Path to the Office file
-            metadata: Dictionary with 'title', 'subject', 'keywords' keys
-            backup: Whether to create backup before updating
-
-        Returns:
-            True if successful, False otherwise
-        """
-        try:
-            # Validate metadata
-            if not self.validate_metadata(metadata):
-                logger.error(f"Invalid metadata for {file_path}")
-                return False
-
-            # Check file format
-            file_ext = file_path.lower().split('.')[-1]
-            if file_ext not in self.SUPPORTED_FORMATS:
-                logger.error(f"Unsupported Office format: {file_ext}")
-                return False
-
-            # Create backup if requested
-            if backup:
-                backup_path = create_backup(file_path)
-                if not backup_path:
-                    logger.warning(f"Failed to create backup for {file_path}, proceeding anyway")
-
-            # Route to appropriate update method
-            if file_ext == 'docx':
-                success = self._update_docx_metadata(file_path, metadata)
-            elif file_ext == 'xlsx':
-                success = self._update_xlsx_metadata(file_path, metadata)
-            elif file_ext == 'pptx':
-                success = self._update_pptx_metadata(file_path, metadata)
-            else:
-                return False
-
-            if success:
-                logger.info(f"Successfully updated metadata for {file_path}")
-            else:
-                logger.error(f"Failed to update metadata for {file_path}")
-
-            return success
-
-        except Exception as e:
-            logger.error(f"Failed to update Office metadata for {file_path}: {e}", exc_info=True)
-            return False
-
-    def _update_docx_metadata(self, file_path: str, metadata: Dict[str, str]) -> bool:
-        """Update DOCX metadata."""
-        try:
-            # Sanitize metadata
-            title = sanitize_metadata_value(metadata.get('title', ''), max_length=200)
-            subject = sanitize_metadata_value(metadata.get('subject', ''), max_length=300)
-            keywords = sanitize_metadata_value(metadata.get('keywords', ''), max_length=500)
-
-            # Open document
-            doc = DocxDocument(file_path)
-            core_props = doc.core_properties
-
-            # Update properties
-            core_props.title = title
-            core_props.subject = subject
-            core_props.keywords = keywords
-
-            # Save document
-            doc.save(file_path)
-
-            logger.debug(f"Updated DOCX metadata - Title: {title}, Subject: {subject}, Keywords: {keywords}")
-            return True
-
-        except Exception as e:
-            logger.error(f"Failed to update DOCX metadata: {e}", exc_info=True)
-            return False
-
-    def _update_xlsx_metadata(self, file_path: str, metadata: Dict[str, str]) -> bool:
-        """Update XLSX metadata."""
-        try:
-            # Sanitize metadata
-            title = sanitize_metadata_value(metadata.get('title', ''), max_length=200)
-            subject = sanitize_metadata_value(metadata.get('subject', ''), max_length=300)
-            keywords = sanitize_metadata_value(metadata.get('keywords', ''), max_length=500)
-
-            # Open workbook
-            workbook = load_workbook(file_path)
-            props = workbook.properties
-
-            # Update properties
-            props.title = title
-            props.subject = subject
-            props.keywords = keywords
-
-            # Save workbook
-            workbook.save(file_path)
-
-            logger.debug(f"Updated XLSX metadata - Title: {title}, Subject: {subject}, Keywords: {keywords}")
-            return True
-
-        except Exception as e:
-            logger.error(f"Failed to update XLSX metadata: {e}", exc_info=True)
-            return False
-
-    def _update_pptx_metadata(self, file_path: str, metadata: Dict[str, str]) -> bool:
-        """Update PPTX metadata."""
-        try:
-            # Sanitize metadata
-            title = sanitize_metadata_value(metadata.get('title', ''), max_length=200)
-            subject = sanitize_metadata_value(metadata.get('subject', ''), max_length=300)
-            keywords = sanitize_metadata_value(metadata.get('keywords', ''), max_length=500)
-
-            # Open presentation
-            presentation = Presentation(file_path)
-            core_props = presentation.core_properties
-
-            # Update properties
-            core_props.title = title
-            core_props.subject = subject
-            core_props.keywords = keywords
-
-            # Save presentation
-            presentation.save(file_path)
-
-            logger.debug(f"Updated PPTX metadata - Title: {title}, Subject: {subject}, Keywords: {keywords}")
-            return True
-
-        except Exception as e:
-            logger.error(f"Failed to update PPTX metadata: {e}", exc_info=True)
-            return False
-
-    def verify_metadata(self, file_path: str, expected_metadata: Dict[str, str]) -> bool:
-        """
-        Verify that metadata was written correctly to Office document.
-
-        Args:
-            file_path: Path to the Office file
-            expected_metadata: Expected metadata values
-
-        Returns:
-            True if metadata matches expected values, False otherwise
-        """
-        try:
-            file_ext = file_path.lower().split('.')[-1]
-
-            if file_ext == 'docx':
-                return self._verify_docx_metadata(file_path, expected_metadata)
-            elif file_ext == 'xlsx':
-                return self._verify_xlsx_metadata(file_path, expected_metadata)
-            elif file_ext == 'pptx':
-                return self._verify_pptx_metadata(file_path, expected_metadata)
-            else:
-                return False
-
-        except Exception as e:
-            logger.error(f"Failed to verify Office metadata for {file_path}: {e}", exc_info=True)
-            return False
-
-    def _verify_docx_metadata(self, file_path: str, expected_metadata: Dict[str, str]) -> bool:
-        """Verify DOCX metadata."""
-        try:
-            doc = DocxDocument(file_path)
-            core_props = doc.core_properties
-
-            expected_title = sanitize_metadata_value(expected_metadata.get('title', ''), max_length=200)
-            expected_subject = sanitize_metadata_value(expected_metadata.get('subject', ''), max_length=300)
-            expected_keywords = sanitize_metadata_value(expected_metadata.get('keywords', ''), max_length=500)
-
-            actual_title = (core_props.title or '').strip()
-            actual_subject = (core_props.subject or '').strip()
-            actual_keywords = (core_props.keywords or '').strip()
-
-            if actual_title == expected_title and actual_subject == expected_subject and actual_keywords == expected_keywords:
-                logger.info(f"Metadata verification successful for {file_path}")
-                return True
-            else:
-                logger.warning(f"Metadata verification failed for {file_path}")
-                return False
-
-        except Exception as e:
-            logger.debug(f"DOCX metadata verification failed: {e}")
-            return False
-
-    def _verify_xlsx_metadata(self, file_path: str, expected_metadata: Dict[str, str]) -> bool:
-        """Verify XLSX metadata."""
-        try:
-            workbook = load_workbook(file_path)
-            props = workbook.properties
-
-            expected_title = sanitize_metadata_value(expected_metadata.get('title', ''), max_length=200)
-            expected_subject = sanitize_metadata_value(expected_metadata.get('subject', ''), max_length=300)
-            expected_keywords = sanitize_metadata_value(expected_metadata.get('keywords', ''), max_length=500)
-
-            actual_title = (props.title or '').strip()
-            actual_subject = (props.subject or '').strip()
-            actual_keywords = (props.keywords or '').strip()
-
-            if actual_title == expected_title and actual_subject == expected_subject and actual_keywords == expected_keywords:
-                logger.info(f"Metadata verification successful for {file_path}")
-                return True
-            else:
-                logger.warning(f"Metadata verification failed for {file_path}")
-                return False
-
-        except Exception as e:
-            logger.debug(f"XLSX metadata verification failed: {e}")
-            return False
-
-    def _verify_pptx_metadata(self, file_path: str, expected_metadata: Dict[str, str]) -> bool:
-        """Verify PPTX metadata."""
-        try:
-            presentation = Presentation(file_path)
-            core_props = presentation.core_properties
-
-            expected_title = sanitize_metadata_value(expected_metadata.get('title', ''), max_length=200)
-            expected_subject = sanitize_metadata_value(expected_metadata.get('subject', ''), max_length=300)
-            expected_keywords = sanitize_metadata_value(expected_metadata.get('keywords', ''), max_length=500)
-
-            actual_title = (core_props.title or '').strip()
-            actual_subject = (core_props.subject or '').strip()
-            actual_keywords = (core_props.keywords or '').strip()
-
-            if actual_title == expected_title and actual_subject == expected_subject and actual_keywords == expected_keywords:
-                logger.info(f"Metadata verification successful for {file_path}")
-                return True
-            else:
-                logger.warning(f"Metadata verification failed for {file_path}")
-                return False
-
-        except Exception as e:
-            logger.debug(f"PPTX metadata verification failed: {e}")
-            return False
--- a/backend/app/processors/updaters/pdf_updater.py
+++ b/backend/app/processors/updaters/pdf_updater.py
@ -1,132 +0,0 @@
-"""PDF metadata updater."""
-
-import pypdf
-from typing import Dict
-from pathlib import Path
-
-from ..base_updater import BaseUpdater
-from ..utils import get_logger, create_backup, sanitize_metadata_value
-
-logger = get_logger(__name__)
-
-
-class PDFUpdater(BaseUpdater):
-    """Updater for PDF file metadata."""
-
-    def update_metadata(self, file_path: str, metadata: Dict[str, str], backup: bool = True) -> bool:
-        """
-        Update PDF metadata fields.
-
-        Updates /Title, /Subject, /Keywords fields in the PDF document information dictionary.
-
-        Args:
-            file_path: Path to the PDF file
-            metadata: Dictionary with 'title', 'subject', 'keywords' keys
-            backup: Whether to create backup before updating
-
-        Returns:
-            True if successful, False otherwise
-        """
-        try:
-            # Validate metadata
-            if not self.validate_metadata(metadata):
-                logger.error(f"Invalid metadata for {file_path}")
-                return False
-
-            # Create backup if requested
-            if backup:
-                backup_path = create_backup(file_path)
-                if not backup_path:
-                    logger.warning(f"Failed to create backup for {file_path}, proceeding anyway")
-
-            # Sanitize metadata values
-            title = sanitize_metadata_value(metadata.get('title', ''), max_length=200)
-            subject = sanitize_metadata_value(metadata.get('subject', ''), max_length=300)
-            keywords = sanitize_metadata_value(metadata.get('keywords', ''), max_length=500)
-
-            # Read existing PDF
-            with open(file_path, 'rb') as f:
-                pdf_reader = pypdf.PdfReader(f)
-                pdf_writer = pypdf.PdfWriter()
-
-                # Copy all pages
-                for page in pdf_reader.pages:
-                    pdf_writer.add_page(page)
-
-                # Update metadata
-                pdf_writer.add_metadata({
-                    '/Title': title,
-                    '/Subject': subject,
-                    '/Keywords': keywords,
-                })
-
-            # Write updated PDF
-            with open(file_path, 'wb') as f:
-                pdf_writer.write(f)
-
-            logger.info(f"Successfully updated metadata for {file_path}")
-            logger.debug(f"Updated fields - Title: {title}, Subject: {subject}, Keywords: {keywords}")
-
-            return True
-
-        except Exception as e:
-            logger.error(f"Failed to update PDF metadata for {file_path}: {e}", exc_info=True)
-            return False
-
-    def verify_metadata(self, file_path: str, expected_metadata: Dict[str, str]) -> bool:
-        """
-        Verify that metadata was written correctly to PDF.
-
-        Checks if the written metadata matches the expected values.
-
-        Args:
-            file_path: Path to the PDF file
-            expected_metadata: Expected metadata values
-
-        Returns:
-            True if metadata matches expected values, False otherwise
-        """
-        try:
-            # Read the updated PDF
-            with open(file_path, 'rb') as f:
-                pdf_reader = pypdf.PdfReader(f)
-                doc_info = pdf_reader.metadata
-
-                if not doc_info:
-                    logger.warning(f"No metadata found in {file_path}")
-                    return False
-
-                # Check each expected field
-                expected_title = sanitize_metadata_value(expected_metadata.get('title', ''), max_length=200)
-                expected_subject = sanitize_metadata_value(expected_metadata.get('subject', ''), max_length=300)
-                expected_keywords = sanitize_metadata_value(expected_metadata.get('keywords', ''), max_length=500)
-
-                # Get actual values and handle bytes
-                actual_title = doc_info.get('/Title')
-                if isinstance(actual_title, bytes):
-                    actual_title = actual_title.decode('utf-8', errors='ignore')
-                actual_title = str(actual_title).strip() if actual_title else ""
-
-                actual_subject = doc_info.get('/Subject')
-                if isinstance(actual_subject, bytes):
-                    actual_subject = actual_subject.decode('utf-8', errors='ignore')
-                actual_subject = str(actual_subject).strip() if actual_subject else ""
-
-                actual_keywords = doc_info.get('/Keywords')
-                if isinstance(actual_keywords, bytes):
-                    actual_keywords = actual_keywords.decode('utf-8', errors='ignore')
-                actual_keywords = str(actual_keywords).strip() if actual_keywords else ""
-
-                # Compare
-                if actual_title == expected_title and actual_subject == expected_subject and actual_keywords == expected_keywords:
-                    logger.info(f"Metadata verification successful for {file_path}")
-                    return True
-                else:
-                    logger.warning(f"Metadata verification failed for {file_path}")
-                    logger.debug(f"Expected - Title: {expected_title}, Subject: {expected_subject}, Keywords: {expected_keywords}")
-                    logger.debug(f"Actual - Title: {actual_title}, Subject: {actual_subject}, Keywords: {actual_keywords}")
-                    return False
-
-        except Exception as e:
-            logger.error(f"Failed to verify PDF metadata for {file_path}: {e}", exc_info=True)
-            return False
--- a/backend/app/processors/updaters/video_updater.py
+++ b/backend/app/processors/updaters/video_updater.py
@ -1,185 +0,0 @@
-"""Video metadata updater."""
-
-from typing import Dict
-
-from ..base_updater import BaseUpdater
-from ..utils import get_logger, create_backup, sanitize_metadata_value
-
-logger = get_logger(__name__)
-
-
-class VideoUpdater(BaseUpdater):
-    """Updater for video file metadata (MP4, MOV, AVI)."""
-
-    SUPPORTED_FORMATS = ['mp4', 'mov', 'avi', 'mkv', 'flv', 'wmv', 'webm']
-
-    def update_metadata(self, file_path: str, metadata: Dict[str, str], backup: bool = True) -> bool:
-        """
-        Update video metadata using mutagen.
-
-        Args:
-            file_path: Path to the video file
-            metadata: Dictionary with 'title', 'subject', 'keywords' keys
-            backup: Whether to create backup before updating
-
-        Returns:
-            True if successful, False otherwise
-        """
-        try:
-            # Validate metadata
-            if not self.validate_metadata(metadata):
-                logger.error(f"Invalid metadata for {file_path}")
-                return False
-
-            # Check file format
-            file_ext = file_path.lower().split('.')[-1]
-            if file_ext not in self.SUPPORTED_FORMATS:
-                logger.error(f"Unsupported video format: {file_ext}")
-                return False
-
-            # Create backup if requested
-            if backup:
-                backup_path = create_backup(file_path)
-                if not backup_path:
-                    logger.warning(f"Failed to create backup for {file_path}, proceeding anyway")
-
-            # Update using mutagen
-            success = self._update_with_mutagen(file_path, metadata)
-
-            if success:
-                logger.info(f"Successfully updated metadata for {file_path}")
-            else:
-                logger.error(f"Failed to update metadata for {file_path}")
-
-            return success
-
-        except Exception as e:
-            logger.error(f"Failed to update video metadata for {file_path}: {e}", exc_info=True)
-            return False
-
-    def _update_with_mutagen(self, file_path: str, metadata: Dict[str, str]) -> bool:
-        """
-        Update video metadata using mutagen.
-
-        Args:
-            file_path: Path to video file
-            metadata: Metadata dictionary
-
-        Returns:
-            True if successful
-        """
-        try:
-            from mutagen import File
-        except ImportError:
-            logger.error("mutagen not installed, cannot update video metadata")
-            return False
-
-        try:
-            # Sanitize metadata
-            title = sanitize_metadata_value(metadata.get('title', ''), max_length=200)
-            subject = sanitize_metadata_value(metadata.get('subject', ''), max_length=300)
-            keywords = sanitize_metadata_value(metadata.get('keywords', ''), max_length=500)
-
-            # Open audio file
-            audio = File(file_path)
-
-            if audio is None:
-                logger.warning(f"mutagen could not identify file format: {file_path}")
-                return False
-
-            # Update tags based on file format
-            file_ext = file_path.lower().split('.')[-1]
-
-            if file_ext == 'mp4':
-                # MP4 uses specific atom names
-                audio['\xa9nam'] = title
-                audio['\xa9cmt'] = subject
-                if 'TXXX:Keywords' not in audio:
-                    audio['TXXX:Keywords'] = keywords
-            elif file_ext == 'mov':
-                # MOV is similar to MP4
-                audio['\xa9nam'] = title
-                audio['\xa9cmt'] = subject
-                if 'TXXX:Keywords' not in audio:
-                    audio['TXXX:Keywords'] = keywords
-            else:
-                # For other formats (AVI, MKV, etc.), use generic ID3/Vorbis tags
-                if hasattr(audio, 'add'):
-                    # ID3v2 style
-                    audio.add_tags()
-                    audio['TIT2'] = title
-                    audio['TXXX:Subject'] = subject
-                    audio['TXXX:Keywords'] = keywords
-                else:
-                    # Vorbis Comment style
-                    audio['title'] = title
-                    audio['subject'] = subject
-                    audio['keywords'] = keywords
-
-            # Save file
-            audio.save()
-
-            logger.debug(f"Updated video metadata - Title: {title}, Subject: {subject}, Keywords: {keywords}")
-            return True
-
-        except Exception as e:
-            logger.error(f"Failed to update video metadata with mutagen: {e}", exc_info=True)
-            return False
-
-    def verify_metadata(self, file_path: str, expected_metadata: Dict[str, str]) -> bool:
-        """
-        Verify that metadata was written correctly to video.
-
-        Args:
-            file_path: Path to the video file
-            expected_metadata: Expected metadata values
-
-        Returns:
-            True if metadata matches expected values, False otherwise
-        """
-        try:
-            from mutagen import File
-        except ImportError:
-            logger.error("mutagen not installed, cannot verify video metadata")
-            return False
-
-        try:
-            audio = File(file_path)
-
-            if audio is None:
-                logger.warning(f"Could not read file for verification: {file_path}")
-                return False
-
-            expected_title = sanitize_metadata_value(expected_metadata.get('title', ''), max_length=200)
-            expected_subject = sanitize_metadata_value(expected_metadata.get('subject', ''), max_length=300)
-            expected_keywords = sanitize_metadata_value(expected_metadata.get('keywords', ''), max_length=500)
-
-            # Get actual values
-            file_ext = file_path.lower().split('.')[-1]
-
-            if file_ext in ['mp4', 'mov']:
-                actual_title = audio.get('\xa9nam', [''])[0] if '\xa9nam' in audio else ""
-                actual_subject = audio.get('\xa9cmt', [''])[0] if '\xa9cmt' in audio else ""
-                actual_keywords = audio.get('TXXX:Keywords', [''])[0] if 'TXXX:Keywords' in audio else ""
-            else:
-                actual_title = audio.get('TIT2', [''])[0] if 'TIT2' in audio else audio.get('title', [''])[0] if 'title' in audio else ""
-                actual_subject = audio.get('TXXX:Subject', [''])[0] if 'TXXX:Subject' in audio else audio.get('subject', [''])[0] if 'subject' in audio else ""
-                actual_keywords = audio.get('TXXX:Keywords', [''])[0] if 'TXXX:Keywords' in audio else audio.get('keywords', [''])[0] if 'keywords' in audio else ""
-
-            # Normalize strings
-            actual_title = str(actual_title).strip() if actual_title else ""
-            actual_subject = str(actual_subject).strip() if actual_subject else ""
-            actual_keywords = str(actual_keywords).strip() if actual_keywords else ""
-
-            if actual_title == expected_title and actual_subject == expected_subject and actual_keywords == expected_keywords:
-                logger.info(f"Metadata verification successful for {file_path}")
-                return True
-            else:
-                logger.warning(f"Metadata verification failed for {file_path}")
-                logger.debug(f"Expected - Title: {expected_title}, Subject: {expected_subject}, Keywords: {expected_keywords}")
-                logger.debug(f"Actual - Title: {actual_title}, Subject: {actual_subject}, Keywords: {actual_keywords}")
-                return False
-
-        except Exception as e:
-            logger.error(f"Failed to verify video metadata for {file_path}: {e}", exc_info=True)
-            return False
--- a/backend/app/processors/utils.py
+++ b/backend/app/processors/utils.py
@ -1,175 +0,0 @@
-"""Utility functions for backup, logging, and file operations."""
-
-import shutil
-import logging
-from pathlib import Path
-from datetime import datetime
-from typing import Optional
-from .config import Config
-
-# Setup logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger(__name__)
-
-def create_backup(file_path: str) -> Optional[Path]:
-    """
-    Create a backup of the file before modification.
-
-    Args:
-        file_path: Path to the file to backup
-
-    Returns:
-        Path to the backup file, or None if backup failed
-    """
-    try:
-        source = Path(file_path)
-        if not source.exists():
-            logger.error(f"File not found for backup: {file_path}")
-            return None
-
-        # Create backup filename with timestamp
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        backup_filename = f"{source.stem}_{timestamp}{source.suffix}"
-        backup_path = Config.BACKUP_DIR / backup_filename
-
-        # Ensure backup directory exists
-        Config.BACKUP_DIR.mkdir(parents=True, exist_ok=True)
-
-        # Copy file
-        shutil.copy2(source, backup_path)
-        logger.info(f"Backup created: {backup_path}")
-
-        return backup_path
-
-    except Exception as e:
-        logger.error(f"Failed to create backup for {file_path}: {e}")
-        return None
-
-def get_logger(name: str) -> logging.Logger:
-    """
-    Get a logger instance.
-
-    Args:
-        name: Logger name
-
-    Returns:
-        Logger instance
-    """
-    return logging.getLogger(name)
-
-def format_metadata_comparison(old_metadata: dict, new_metadata: dict) -> str:
-    """
-    Format metadata comparison for display.
-
-    Args:
-        old_metadata: Old metadata dictionary
-        new_metadata: New metadata dictionary
-
-    Returns:
-        Formatted comparison string
-    """
-    lines = ["\n" + "="*60]
-    lines.append("METADATA COMPARISON")
-    lines.append("="*60)
-
-    all_keys = set(old_metadata.keys()) | set(new_metadata.keys())
-
-    for key in sorted(all_keys):
-        old_value = old_metadata.get(key, "N/A")
-        new_value = new_metadata.get(key, "N/A")
-
-        lines.append(f"\n{key.upper()}:")
-        lines.append(f"  Old: {old_value}")
-        lines.append(f"  New: {new_value}")
-
-        if old_value != new_value:
-            lines.append("  [CHANGED]")
-
-    lines.append("="*60 + "\n")
-    return "\n".join(lines)
-
-def sanitize_metadata_value(value: str, max_length: int = 500) -> str:
-    """
-    Sanitize and truncate metadata value.
-
-    Args:
-        value: Metadata value
-        max_length: Maximum length
-
-    Returns:
-        Sanitized value
-    """
-    if not value:
-        return ""
-
-    # Remove control characters and excessive whitespace
-    value = ' '.join(value.split())
-
-    # Truncate if too long
-    if len(value) > max_length:
-        value = value[:max_length-3] + "..."
-
-    return value.strip()
-
-def validate_file_path(file_path: str) -> bool:
-    """
-    Validate file path exists and is accessible.
-
-    Args:
-        file_path: Path to validate
-
-    Returns:
-        True if valid
-    """
-    try:
-        path = Path(file_path)
-        return path.exists() and path.is_file()
-    except Exception:
-        return False
-
-def get_file_size_mb(file_path: str) -> float:
-    """
-    Get file size in MB.
-
-    Args:
-        file_path: Path to file
-
-    Returns:
-        File size in MB
-    """
-    try:
-        size_bytes = Path(file_path).stat().st_size
-        return size_bytes / (1024 * 1024)
-    except Exception:
-        return 0.0
-
-def create_report_entry(file_path: str, file_type: str, old_metadata: dict,
-                       new_metadata: dict, status: str) -> dict:
-    """
-    Create a report entry for CSV export.
-
-    Args:
-        file_path: Path to file
-        file_type: Type of file
-        old_metadata: Old metadata
-        new_metadata: New metadata
-        status: Processing status (success/failed)
-
-    Returns:
-        Dictionary with report data
-    """
-    return {
-        'timestamp': datetime.now().isoformat(),
-        'file_path': file_path,
-        'file_type': file_type,
-        'old_title': old_metadata.get('title', 'N/A'),
-        'new_title': new_metadata.get('title', 'N/A'),
-        'old_subject': old_metadata.get('subject', 'N/A'),
-        'new_subject': new_metadata.get('subject', 'N/A'),
-        'old_keywords': old_metadata.get('keywords', 'N/A'),
-        'new_keywords': new_metadata.get('keywords', 'N/A'),
-        'status': status
-    }
--- a/backend/app/services/file_service.py
+++ b/backend/app/services/file_service.py
@ -1,264 +0,0 @@
-"""
-File Service
-Handles file upload, download, storage, and cleanup.
-Replaces Flask's tempfile approach with persistent storage.
-"""
-
-from pathlib import Path
-from typing import Optional, BinaryIO
-from fastapi import UploadFile
-import secrets
-import shutil
-import aiofiles
-from datetime import datetime, timedelta
-import os
-
-
-class FileService:
-    """Service for managing file uploads and storage"""
-
-    def __init__(self, upload_dir: str = "./uploads"):
-        """
-        Initialize file service.
-
-        Args:
-            upload_dir: Base directory for file uploads
-        """
-        self.upload_dir = Path(upload_dir)
-        self.upload_dir.mkdir(parents=True, exist_ok=True)
-
-    def _safe_filename(self, filename: str) -> str:
-        """
-        Sanitize filename while preserving Unicode characters.
-        Copied from web_app.py:33-44 - DO NOT use secure_filename()!
-
-        Args:
-            filename: Original filename
-
-        Returns:
-            Sanitized filename
-        """
-        import unicodedata
-
-        # Normalize unicode
-        filename = unicodedata.normalize('NFC', filename)
-        # Remove path separators and null bytes
-        filename = filename.replace('/', '_').replace('\\', '_').replace('\x00', '')
-        # Remove leading/trailing dots and spaces
-        filename = filename.strip('. ')
-        # If empty, use default
-        if not filename:
-            filename = 'unnamed_file'
-        return filename
-
-    async def save_upload(
-        self,
-        file: UploadFile,
-        user_id: int
-    ) -> dict:
-        """
-        Save uploaded file with persistent storage.
-        Organizes files by: uploads/{user_id}/{YYYYMMDD}/{file_id}_{filename}
-
-        Args:
-            file: FastAPI UploadFile object
-            user_id: User ID for organization
-
-        Returns:
-            Dict with file info (file_id, filename, filepath, size)
-        """
-        # Create user directory with date
-        date_str = datetime.now().strftime("%Y%m%d")
-        user_dir = self.upload_dir / str(user_id) / date_str
-        user_dir.mkdir(parents=True, exist_ok=True)
-
-        # Generate unique file ID
-        file_id = secrets.token_urlsafe(8)
-        safe_name = self._safe_filename(file.filename)
-        filename_with_id = f"{file_id}_{safe_name}"
-        filepath = user_dir / filename_with_id
-
-        # Save file async
-        async with aiofiles.open(filepath, 'wb') as f:
-            content = await file.read()
-            await f.write(content)
-
-        return {
-            "file_id": file_id,
-            "filename": safe_name,
-            "filepath": str(filepath),
-            "size": len(content),
-            "uploaded_at": datetime.utcnow().isoformat()
-        }
-
-    def get_file_path(self, filepath: str) -> Path:
-        """
-        Get Path object for file.
-
-        Args:
-            filepath: File path string
-
-        Returns:
-            Path object
-        """
-        return Path(filepath)
-
-    def file_exists(self, filepath: str) -> bool:
-        """
-        Check if file exists.
-
-        Args:
-            filepath: File path string
-
-        Returns:
-            True if file exists
-        """
-        return Path(filepath).exists()
-
-    def delete_file(self, filepath: str) -> bool:
-        """
-        Delete file from storage.
-
-        Args:
-            filepath: File path string
-
-        Returns:
-            True if deleted, False if not found
-        """
-        path = Path(filepath)
-        if path.exists():
-            path.unlink()
-            return True
-        return False
-
-    def cleanup_session_files(self, file_list: list[dict]) -> int:
-        """
-        Cleanup all files in a session.
-
-        Args:
-            file_list: List of file dicts with 'filepath' key
-
-        Returns:
-            Number of files deleted
-        """
-        deleted_count = 0
-        for file_info in file_list:
-            filepath = file_info.get("filepath")
-            if filepath and self.delete_file(filepath):
-                deleted_count += 1
-        return deleted_count
-
-    def cleanup_old_files(self, days: int = 7) -> int:
-        """
-        Delete files older than specified days.
-
-        Args:
-            days: Number of days (default: 7)
-
-        Returns:
-            Number of files deleted
-        """
-        cutoff_time = datetime.now().timestamp() - (days * 86400)
-        deleted_count = 0
-
-        # Iterate through all user directories
-        for user_dir in self.upload_dir.iterdir():
-            if not user_dir.is_dir():
-                continue
-
-            # Iterate through date directories
-            for date_dir in user_dir.iterdir():
-                if not date_dir.is_dir():
-                    continue
-
-                # Check all files in date directory
-                for filepath in date_dir.iterdir():
-                    if filepath.is_file():
-                        # Check file modification time
-                        if filepath.stat().st_mtime < cutoff_time:
-                            filepath.unlink()
-                            deleted_count += 1
-
-                # Remove empty date directories
-                if not any(date_dir.iterdir()):
-                    date_dir.rmdir()
-
-            # Remove empty user directories
-            if not any(user_dir.iterdir()):
-                user_dir.rmdir()
-
-        return deleted_count
-
-    async def create_zip_archive(
-        self,
-        files: list[dict],
-        output_filename: str
-    ) -> Path:
-        """
-        Create ZIP archive of multiple files.
-
-        Args:
-            files: List of file dicts with 'filepath' and 'filename'
-            output_filename: Name for ZIP file
-
-        Returns:
-            Path to created ZIP file
-        """
-        import zipfile
-
-        # Create temp zip file
-        zip_path = self.upload_dir / output_filename
-
-        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            for file_info in files:
-                filepath = Path(file_info["filepath"])
-                if filepath.exists():
-                    # Use original filename in archive
-                    arcname = file_info.get("filename", filepath.name)
-                    zipf.write(filepath, arcname=arcname)
-
-        return zip_path
-
-    def get_storage_stats(self) -> dict:
-        """
-        Get storage statistics.
-
-        Returns:
-            Dict with total files, total size, users
-        """
-        total_files = 0
-        total_size = 0
-        users = set()
-
-        for user_dir in self.upload_dir.iterdir():
-            if user_dir.is_dir():
-                users.add(user_dir.name)
-                for date_dir in user_dir.iterdir():
-                    if date_dir.is_dir():
-                        for filepath in date_dir.iterdir():
-                            if filepath.is_file():
-                                total_files += 1
-                                total_size += filepath.stat().st_size
-
-        return {
-            "total_files": total_files,
-            "total_size_bytes": total_size,
-            "total_size_mb": round(total_size / (1024 * 1024), 2),
-            "total_users": len(users)
-        }
-
-
-# Singleton instance
-_file_service = None
-
-
-def get_file_service() -> FileService:
-    """
-    Get or create FileService singleton.
-    Used as FastAPI dependency.
-    """
-    global _file_service
-    if _file_service is None:
-        upload_dir = os.getenv("UPLOAD_DIR", "./uploads")
-        _file_service = FileService(upload_dir)
-    return _file_service
--- a/backend/app/services/metadata_service.py
+++ b/backend/app/services/metadata_service.py
@ -1,379 +0,0 @@
-"""
-Metadata Service
-Handles metadata extraction, generation, and updates.
-Integrates with existing processors (extractors/updaters).
-"""
-
-from pathlib import Path
-from typing import Optional, Dict, Any
-from app.processors.file_detector import FileDetector, FileType
-from app.processors.base_extractor import BaseExtractor
-from app.processors.base_updater import BaseUpdater
-
-# Import all extractors
-from app.processors.extractors.pdf_extractor import PDFExtractor
-from app.processors.extractors.image_extractor import ImageExtractor
-from app.processors.extractors.office_extractor import OfficeExtractor
-from app.processors.extractors.video_extractor import VideoExtractor
-
-# Import all updaters
-from app.processors.updaters.pdf_updater import PDFUpdater
-from app.processors.updaters.image_updater import ImageUpdater
-from app.processors.updaters.office_updater import OfficeUpdater
-from app.processors.updaters.video_updater import VideoUpdater
-
-# Import metadata sources
-from app.processors.metadata_analyzer import MetadataAnalyzer
-from app.processors.excel_metadata_lookup import ExcelMetadataLookup
-from app.processors.metadata_importer import MetadataImporter
-from app.processors.template_manager import TemplateManager
-import os
-
-
-class MetadataService:
-    """Service for metadata operations"""
-
-    def __init__(self):
-        """Initialize metadata service with extractors and updaters"""
-        # Initialize extractors
-        self.extractors = {
-            FileType.PDF: PDFExtractor(),
-            FileType.IMAGE: ImageExtractor(),
-            FileType.OFFICE_DOC: OfficeExtractor(),
-            FileType.OFFICE_SHEET: OfficeExtractor(),
-            FileType.OFFICE_PRESENTATION: OfficeExtractor(),
-            FileType.VIDEO: VideoExtractor(),
-        }
-
-        # Initialize updaters
-        self.updaters = {
-            FileType.PDF: PDFUpdater(),
-            FileType.IMAGE: ImageUpdater(),
-            FileType.OFFICE_DOC: OfficeUpdater(),
-            FileType.OFFICE_SHEET: OfficeUpdater(),
-            FileType.OFFICE_PRESENTATION: OfficeUpdater(),
-            FileType.VIDEO: VideoUpdater(),
-        }
-
-        # Initialize metadata sources (lazy initialization)
-        self._ai_analyzer = None
-        self._excel_lookup = None
-        self._template_manager = None
-
-    @property
-    def ai_analyzer(self) -> Optional[MetadataAnalyzer]:
-        """Lazy initialize AI analyzer (returns None if OPENAI_API_KEY not configured)"""
-        if self._ai_analyzer is None:
-            try:
-                self._ai_analyzer = MetadataAnalyzer()
-            except ValueError as e:
-                # OPENAI_API_KEY not configured
-                print(f"AI analyzer not available: {e}")
-                return None
-        return self._ai_analyzer
-
-    @property
-    def excel_lookup(self) -> Optional[ExcelMetadataLookup]:
-        """Lazy initialize Excel lookup"""
-        if self._excel_lookup is None:
-            excel_path = Path("Celum ID to Adobe Asset Path Mapping Spreadsheet (1).xlsx")
-            if excel_path.exists():
-                self._excel_lookup = ExcelMetadataLookup(str(excel_path))
-        return self._excel_lookup
-
-    @property
-    def template_manager(self) -> TemplateManager:
-        """Lazy initialize template manager"""
-        if self._template_manager is None:
-            self._template_manager = TemplateManager()
-        return self._template_manager
-
-    def get_extractor(self, file_type: FileType) -> Optional[BaseExtractor]:
-        """Get extractor for file type"""
-        return self.extractors.get(file_type)
-
-    def get_updater(self, file_type: FileType) -> Optional[BaseUpdater]:
-        """Get updater for file type"""
-        return self.updaters.get(file_type)
-
-    async def extract_current_metadata(self, filepath: str) -> Dict[str, Optional[str]]:
-        """
-        Extract current metadata from file.
-
-        Args:
-            filepath: Path to file
-
-        Returns:
-            Dict with current metadata
-        """
-        # Detect file type
-        file_type = FileDetector.detect_file_type(filepath)
-
-        # Get extractor
-        extractor = self.get_extractor(file_type)
-        if not extractor:
-            return {}
-
-        # Extract metadata
-        try:
-            metadata = extractor.read_metadata(filepath)
-            return metadata
-        except Exception as e:
-            print(f"Error extracting metadata from {filepath}: {e}")
-            return {}
-
-    async def generate_metadata(
-        self,
-        filepath: str,
-        filename: str,
-        source: str,
-        import_metadata: Optional[Dict[str, Any]] = None,
-        template_name: Optional[str] = None,
-        custom_vars: Optional[Dict[str, str]] = None
-    ) -> Dict[str, Optional[str]]:
-        """
-        Generate suggested metadata based on source.
-
-        Args:
-            filepath: Path to file
-            filename: Original filename
-            source: Metadata source ('ai', 'excel', 'import', 'manual', 'template')
-            import_metadata: Imported metadata map (for 'import' source)
-            template_name: Template name (for 'template' source)
-            custom_vars: Custom variables (for 'template' source)
-
-        Returns:
-            Dict with suggested metadata
-        """
-        if source == "manual":
-            # Return empty metadata for manual entry
-            return {
-                "title": "",
-                "subject": "",
-                "keywords": "",
-                "author": "",
-                "copyright": "",
-                "comments": ""
-            }
-
-        elif source == "ai":
-            return await self._generate_ai_metadata(filepath, filename)
-
-        elif source == "excel":
-            return await self._lookup_excel_metadata(filename)
-
-        elif source == "import":
-            return await self._lookup_import_metadata(filename, import_metadata)
-
-        elif source == "template":
-            return await self._apply_template(filename, template_name, custom_vars)
-
-        else:
-            return {}
-
-    async def _generate_ai_metadata(
-        self,
-        filepath: str,
-        filename: str
-    ) -> Dict[str, Optional[str]]:
-        """Generate metadata using AI (OpenAI)"""
-        # Check if AI analyzer is available
-        analyzer = self.ai_analyzer
-        if not analyzer:
-            return {
-                "title": filename,
-                "subject": "AI generation requires OPENAI_API_KEY environment variable",
-                "keywords": ""
-            }
-
-        # Detect file type
-        file_type = FileDetector.detect_file_type(filepath)
-
-        # Get extractor
-        extractor = self.get_extractor(file_type)
-        if not extractor:
-            return {}
-
-        try:
-            # Extract content from file
-            content = extractor.extract_content(filepath)
-
-            # Check if content is sufficient
-            if not content or len(content.strip()) < 10:
-                return {
-                    "title": filename,
-                    "subject": "No content available for AI analysis",
-                    "keywords": ""
-                }
-
-            # Generate metadata with AI (pass FileType enum, not string)
-            metadata = analyzer.analyze_content(
-                content=content,
-                filename=filename,
-                file_type=file_type
-            )
-
-            return metadata
-
-        except Exception as e:
-            print(f"AI generation error for {filepath}: {e}")
-            return {
-                "title": filename,
-                "subject": f"AI generation failed: {str(e)}",
-                "keywords": ""
-            }
-
-    async def _lookup_excel_metadata(self, filename: str) -> Dict[str, Optional[str]]:
-        """Lookup metadata from Excel file"""
-        if not self.excel_lookup:
-            return {
-                "title": filename,
-                "subject": "Excel lookup not available",
-                "keywords": ""
-            }
-
-        try:
-            metadata = self.excel_lookup.lookup_by_filename(filename)
-            if metadata:
-                return metadata
-            else:
-                return {
-                    "title": filename,
-                    "subject": "Not found in Excel lookup",
-                    "keywords": ""
-                }
-        except Exception as e:
-            print(f"Excel lookup error for {filename}: {e}")
-            return {
-                "title": filename,
-                "subject": f"Excel lookup failed: {str(e)}",
-                "keywords": ""
-            }
-
-    async def _lookup_import_metadata(
-        self,
-        filename: str,
-        import_metadata: Optional[Dict[str, Any]]
-    ) -> Dict[str, Optional[str]]:
-        """Lookup metadata from imported file"""
-        if not import_metadata:
-            return {
-                "title": filename,
-                "subject": "No import metadata available",
-                "keywords": ""
-            }
-
-        # Get filename stem for lookup
-        filename_stem = Path(filename).stem
-
-        # Try exact match first
-        if filename_stem in import_metadata:
-            return import_metadata[filename_stem]
-
-        # Try case-insensitive match
-        for key, value in import_metadata.items():
-            if key.lower() == filename_stem.lower():
-                return value
-
-        return {
-            "title": filename,
-            "subject": "Not found in imported metadata",
-            "keywords": ""
-        }
-
-    async def _apply_template(
-        self,
-        filename: str,
-        template_name: Optional[str],
-        custom_vars: Optional[Dict[str, str]]
-    ) -> Dict[str, Optional[str]]:
-        """Apply template to generate metadata"""
-        if not template_name:
-            return {
-                "title": filename,
-                "subject": "No template specified",
-                "keywords": ""
-            }
-
-        try:
-            # Load template
-            template = self.template_manager.load_template(template_name)
-            if not template:
-                return {
-                    "title": filename,
-                    "subject": f"Template '{template_name}' not found",
-                    "keywords": ""
-                }
-
-            # Apply template
-            metadata = self.template_manager.apply_template(
-                template=template,
-                filename=filename,
-                user=os.getenv("USER", "user"),
-                custom_vars=custom_vars or {}
-            )
-
-            return metadata
-
-        except Exception as e:
-            print(f"Template application error for {filename}: {e}")
-            return {
-                "title": filename,
-                "subject": f"Template application failed: {str(e)}",
-                "keywords": ""
-            }
-
-    async def update_file_metadata(
-        self,
-        filepath: str,
-        metadata: Dict[str, Optional[str]]
-    ) -> tuple[bool, str]:
-        """
-        Update file with metadata.
-
-        Args:
-            filepath: Path to file
-            metadata: Metadata dict to write
-
-        Returns:
-            Tuple of (success, message)
-        """
-        # Detect file type
-        file_type = FileDetector.detect_file_type(filepath)
-
-        # Get updater
-        updater = self.get_updater(file_type)
-        if not updater:
-            return False, f"No updater available for file type: {file_type}"
-
-        try:
-            # Update metadata
-            success = updater.update_metadata(filepath, metadata)
-            if not success:
-                return False, "Metadata update failed"
-
-            # Verify metadata was written
-            verified = updater.verify_metadata(filepath, metadata)
-
-            if verified:
-                return True, "Metadata updated and verified"
-            else:
-                return True, "Metadata updated but verification failed"
-
-        except Exception as e:
-            return False, f"Error updating metadata: {str(e)}"
-
-
-# Singleton instance
-_metadata_service = None
-
-
-def get_metadata_service() -> MetadataService:
-    """
-    Get or create MetadataService singleton.
-    Used as FastAPI dependency.
-    """
-    global _metadata_service
-    if _metadata_service is None:
-        _metadata_service = MetadataService()
-    return _metadata_service
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -1,73 +0,0 @@
-# FastAPI Framework
-fastapi==0.109.0
-uvicorn[standard]==0.27.0
-python-multipart==0.0.7
-jinja2>=3.1.0  # Template engine for serving Flask HTML
-
-# Authentication & Security
-python-jose[cryptography]==3.3.0
-passlib[bcrypt]==1.7.4
-PyJWT[crypto]>=2.8.0  # JWT validation for Azure AD id_tokens
-msal>=1.20.0  # Microsoft Authentication Library for SSO (legacy, will be removed)
-
-# Database & ORM
-sqlalchemy==2.0.25
-aiosqlite==0.19.0
-alembic==1.13.1
-
-# Redis & Caching
-redis==5.0.1
-aioredis==2.0.1
-
-# Rate Limiting & Middleware
-slowapi==0.1.9
-
-# Pydantic & Settings
-pydantic==2.5.0
-pydantic-settings==2.1.0
-
-# Async File Operations
-aiofiles==23.2.1
-
-# Core Libraries
-python-magic>=0.4.27
-python-dotenv>=1.0.1
-tqdm>=4.66.0
-
-# Excel Processing
-pandas>=2.0.0
-openpyxl>=3.1.0
-
-# PDF Processing
-pypdf>=4.0.0
-pdfplumber>=0.11.0
-PyPDF2>=3.0.0
-
-# Image Processing
-Pillow>=10.2.0
-pytesseract>=0.3.0
-pdf2image>=1.16.0
-piexif>=1.1.0
-iptcinfo3>=2.1.0
-
-# Office Documents
-python-docx>=1.0.0
-python-pptx>=0.6.0
-
-# Video Processing
-mutagen>=1.45.0
-ffmpeg-python>=0.2.0
-pymediainfo>=7.0.0
-
-# AI & Metadata Generation
-openai>=1.0.0
-tiktoken>=0.5.0
-tenacity>=8.2.0
-
-# ExifTool Integration (optional but recommended)
-PyExifTool>=0.5.6
-
-# Testing
-pytest==7.4.3
-pytest-asyncio==0.21.1
-httpx==0.26.0
--- a/backend/templates/login.html
+++ b/backend/templates/login.html
@ -1,361 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Login - Oliver Metadata Tool</title>
-    <link href="https://fonts.googleapis.com/css2?family=Montserrat:wght@300;400;500;600;700&display=swap" rel="stylesheet">
-    <style>
-        :root {
-            --primary-gold: #FFC407;
-            --primary-gold-dark: #e6b007;
-            --primary-gold-light: #ffcf33;
-            --dark-primary: #2c2c2c;
-            --dark-secondary: #1a1a1a;
-            --white: #ffffff;
-            --text-primary: #1f2937;
-            --text-muted: #6b7280;
-            --overlay-light: rgba(255, 255, 255, 0.95);
-            --border-light: rgba(255, 255, 255, 0.2);
-            --shadow-lg: 0 20px 40px rgba(0, 0, 0, 0.1);
-            --radius-md: 12px;
-            --radius-xl: 20px;
-            --font-family: 'Montserrat', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
-            --transition-fast: 0.15s ease;
-        }
-
-        * { margin: 0; padding: 0; box-sizing: border-box; }
-
-        @keyframes shimmer {
-            0% { transform: translateX(-100%); }
-            100% { transform: translateX(100%); }
-        }
-
-        @keyframes pulse {
-            0%, 100% { transform: scale(1); }
-            50% { transform: scale(1.05); }
-        }
-
-        body {
-            font-family: var(--font-family);
-            background: linear-gradient(135deg, var(--dark-primary) 0%, var(--dark-secondary) 100%);
-            min-height: 100vh;
-            display: flex;
-            align-items: center;
-            justify-content: center;
-            padding: 20px;
-        }
-
-        .login-container {
-            background: var(--overlay-light);
-            backdrop-filter: blur(20px);
-            border-radius: var(--radius-xl);
-            box-shadow: var(--shadow-lg);
-            border: 1px solid var(--border-light);
-            width: 100%;
-            max-width: 450px;
-            padding: 40px;
-        }
-
-        .logo {
-            text-align: center;
-            margin-bottom: 30px;
-            position: relative;
-        }
-
-        .logo h1 {
-            color: var(--primary-gold-dark);
-            font-size: 32px;
-            margin-bottom: 10px;
-            font-weight: 700;
-            text-shadow: 0 2px 4px rgba(255, 196, 7, 0.2);
-        }
-
-        .logo p {
-            color: var(--text-muted);
-            font-size: 14px;
-            font-weight: 500;
-        }
-
-        .divider {
-            text-align: center;
-            margin: 30px 0;
-            position: relative;
-        }
-
-        .divider::before {
-            content: '';
-            position: absolute;
-            left: 0;
-            right: 0;
-            top: 50%;
-            height: 2px;
-            background: linear-gradient(90deg, transparent, var(--primary-gold-light), transparent);
-        }
-
-        .divider span {
-            background: var(--overlay-light);
-            padding: 0 15px;
-            color: var(--text-muted);
-            font-size: 13px;
-            font-weight: 600;
-            position: relative;
-            z-index: 1;
-        }
-
-        .form-group {
-            margin-bottom: 20px;
-        }
-
-        .form-group label {
-            display: block;
-            font-weight: 600;
-            color: var(--text-primary);
-            margin-bottom: 8px;
-            font-size: 14px;
-        }
-
-        .form-group input {
-            width: 100%;
-            padding: 12px;
-            border: 2px solid #dee2e6;
-            border-radius: var(--radius-md);
-            font-size: 14px;
-            font-family: var(--font-family);
-            transition: all var(--transition-fast);
-        }
-
-        .form-group input:focus {
-            outline: none;
-            border-color: var(--primary-gold);
-            box-shadow: 0 0 0 3px rgba(255, 196, 7, 0.1);
-        }
-
-        .btn {
-            width: 100%;
-            padding: 14px;
-            border: none;
-            border-radius: var(--radius-md);
-            font-size: 16px;
-            font-weight: 600;
-            font-family: var(--font-family);
-            cursor: pointer;
-            transition: all var(--transition-fast);
-        }
-
-        .btn:hover {
-            transform: translateY(-2px);
-        }
-
-        .btn-primary {
-            background: linear-gradient(135deg, var(--primary-gold), var(--primary-gold-dark));
-            color: var(--dark-secondary);
-            margin-bottom: 15px;
-            box-shadow: 0 4px 12px rgba(255, 196, 7, 0.3);
-        }
-
-        .btn-primary:hover {
-            box-shadow: 0 6px 16px rgba(255, 196, 7, 0.4);
-        }
-
-        .btn-sso {
-            background: var(--white);
-            color: var(--text-primary);
-            border: 2px solid var(--primary-gold);
-        }
-
-        .btn-sso:hover {
-            border-color: var(--primary-gold-dark);
-            background: #fffbf0;
-            color: var(--primary-gold-dark);
-        }
-
-        .alert {
-            padding: 12px;
-            border-radius: var(--radius-md);
-            margin-bottom: 20px;
-            font-size: 14px;
-            font-weight: 500;
-        }
-
-        .alert-error {
-            background: #fee;
-            color: #c33;
-            border: 2px solid #fcc;
-        }
-
-        .alert-info {
-            background: #fffbf0;
-            color: var(--primary-gold-dark);
-            border: 2px solid var(--primary-gold-light);
-        }
-
-        .test-user-info {
-            background: #fffbf0;
-            border: 2px dashed var(--primary-gold);
-            border-radius: var(--radius-md);
-            padding: 15px;
-            margin-bottom: 20px;
-            font-size: 13px;
-            color: var(--text-primary);
-            animation: pulse 3s infinite;
-        }
-
-        .test-user-info strong {
-            color: var(--primary-gold-dark);
-            font-weight: 600;
-        }
-
-        .test-user-info code {
-            background: rgba(255, 196, 7, 0.15);
-            padding: 2px 6px;
-            border-radius: 4px;
-            font-family: 'Courier New', monospace;
-            color: var(--primary-gold-dark);
-            font-weight: 600;
-        }
-
-        .footer-text {
-            text-align: center;
-            margin-top: 20px;
-            font-size: 12px;
-            color: var(--text-muted);
-            font-weight: 500;
-        }
-
-        .microsoft-icon {
-            display: inline-block;
-            margin-right: 8px;
-        }
-    </style>
-</head>
-<body>
-    <div class="login-container">
-        <div class="logo">
-            <h1>🎯 Oliver Metadata Tool</h1>
-            <p>Sign in to continue</p>
-        </div>
-
-        {% if error %}
-        <div class="alert alert-error">
-            ⚠️ {{ error }}
-        </div>
-        {% endif %}
-
-        {% if info %}
-        <div class="alert alert-info">
-            ℹ️ {{ info }}
-        </div>
-        {% endif %}
-
-        <div class="test-user-info">
-            <strong>🧪 Test Account</strong><br>
-            Username: <code>tester</code><br>
-            Password: <code>oliveradmin</code>
-        </div>
-
-        <form id="loginForm">
-            <div class="form-group">
-                <label for="username">Username</label>
-                <input type="text" id="username" name="username" required autofocus placeholder="Enter your username">
-            </div>
-
-            <div class="form-group">
-                <label for="password">Password</label>
-                <input type="password" id="password" name="password" required placeholder="Enter your password">
-            </div>
-
-            <button type="submit" class="btn btn-primary">
-                🔐 Sign In
-            </button>
-        </form>
-
-        {% if sso_enabled %}
-        <div class="divider">
-            <span>OR</span>
-        </div>
-
-        <button type="button" class="btn btn-sso" id="msalLoginBtn" disabled title="Microsoft SSO coming soon">
-            <span class="microsoft-icon">
-                <svg width="20" height="20" viewBox="0 0 23 23" style="vertical-align: middle;">
-                    <path fill="#f25022" d="M1 1h10v10H1z"/>
-                    <path fill="#00a4ef" d="M12 1h10v10H12z"/>
-                    <path fill="#7fba00" d="M1 12h10v10H1z"/>
-                    <path fill="#ffb900" d="M12 12h10v10H12z"/>
-                </svg>
-            </span>
-            Sign in with Microsoft (Coming Soon)
-        </button>
-        {% endif %}
-
-    <script>
-        // Login form handler
-        document.getElementById('loginForm').addEventListener('submit', async (e) => {
-            e.preventDefault();
-
-            const username = document.getElementById('username').value;
-            const password = document.getElementById('password').value;
-            const submitBtn = e.target.querySelector('button[type="submit"]');
-
-            // Disable button and show loading
-            submitBtn.disabled = true;
-            submitBtn.textContent = '🔄 Signing in...';
-
-            try {
-                const response = await fetch('/api/auth/login', {
-                    method: 'POST',
-                    headers: {
-                        'Content-Type': 'application/json'
-                    },
-                    body: JSON.stringify({ username, password })
-                });
-
-                const data = await response.json();
-
-                if (response.ok) {
-                    // Store JWT tokens
-                    localStorage.setItem('access_token', data.access_token);
-                    localStorage.setItem('refresh_token', data.refresh_token);
-                    localStorage.setItem('user', JSON.stringify(data.user));
-
-                    // Redirect to main page
-                    window.location.href = '/';
-                } else {
-                    // Show error message
-                    showError(data.detail || 'Login failed');
-                    submitBtn.disabled = false;
-                    submitBtn.textContent = '🔐 Sign In';
-                }
-            } catch (error) {
-                console.error('Login error:', error);
-                showError('Network error. Please try again.');
-                submitBtn.disabled = false;
-                submitBtn.textContent = '🔐 Sign In';
-            }
-        });
-
-        function showError(message) {
-            // Remove existing alerts
-            const existingAlert = document.querySelector('.alert-error');
-            if (existingAlert) existingAlert.remove();
-
-            // Create new alert
-            const alert = document.createElement('div');
-            alert.className = 'alert alert-error';
-            alert.textContent = '⚠️ ' + message;
-
-            // Insert before form
-            const form = document.getElementById('loginForm');
-            form.parentNode.insertBefore(alert, form);
-        }
-
-        // MSAL SSO - disabled for now
-        // TODO: Implement client-side MSAL flow
-    </script>
-
-        <div class="footer-text">
-            Oliver Metadata Tool v3.1 | Enterprise Edition
-        </div>
-    </div>
-</body>
-</html>
--- a/backend/test_ai_integration.py
+++ b/backend/test_ai_integration.py
@ -1,146 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script to verify AI metadata generation integration
-Run this after installing dependencies: pip install -r requirements.txt
-"""
-
-import sys
-import os
-from pathlib import Path
-
-# Add backend to path
-sys.path.insert(0, str(Path(__file__).parent))
-
-def test_imports():
-    """Test that all imports work"""
-    print("Testing imports...")
-
-    try:
-        from app.services.metadata_service import MetadataService, get_metadata_service
-        print("✅ MetadataService imported successfully")
-
-        from app.processors.metadata_analyzer import MetadataAnalyzer
-        print("✅ MetadataAnalyzer imported successfully")
-
-        from app.processors.file_detector import FileDetector, FileType
-        print("✅ FileDetector imported successfully")
-
-        return True
-    except Exception as e:
-        print(f"❌ Import failed: {e}")
-        return False
-
-
-def test_service_initialization():
-    """Test MetadataService initialization"""
-    print("\nTesting MetadataService initialization...")
-
-    try:
-        from app.services.metadata_service import get_metadata_service
-
-        service = get_metadata_service()
-        print("✅ MetadataService initialized successfully")
-
-        # Check extractors
-        print(f"   - Extractors: {len(service.extractors)} types")
-
-        # Check updaters
-        print(f"   - Updaters: {len(service.updaters)} types")
-
-        # Check AI analyzer (may be None if no OPENAI_API_KEY)
-        analyzer = service.ai_analyzer
-        if analyzer:
-            print(f"✅ AI Analyzer initialized with model: {analyzer.model}")
-        else:
-            print("⚠️  AI Analyzer not available (OPENAI_API_KEY not configured)")
-
-        return True
-    except Exception as e:
-        print(f"❌ Initialization failed: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-
-def test_ai_metadata_generation():
-    """Test AI metadata generation (if OPENAI_API_KEY is configured)"""
-    print("\nTesting AI metadata generation...")
-
-    try:
-        from app.services.metadata_service import get_metadata_service
-        from app.processors.file_detector import FileType
-
-        service = get_metadata_service()
-
-        # Check if AI is available
-        if not service.ai_analyzer:
-            print("⚠️  Skipping AI test (OPENAI_API_KEY not configured)")
-            return True
-
-        # Test with sample content
-        test_content = """
-        This is a technical document about the 3M Filtek Universal Restorative.
-        It provides comprehensive shade selection guidelines for dental professionals.
-        The document covers proper color matching techniques and application procedures.
-        """
-
-        test_filename = "3M_Filtek_Shade_Guide.pdf"
-
-        metadata = service.ai_analyzer.analyze_content(
-            content=test_content,
-            filename=test_filename,
-            file_type=FileType.PDF
-        )
-
-        print(f"✅ AI metadata generated:")
-        print(f"   - Title: {metadata.get('title', 'N/A')[:80]}...")
-        print(f"   - Subject: {metadata.get('subject', 'N/A')[:80]}...")
-        print(f"   - Keywords: {metadata.get('keywords', 'N/A')[:80]}...")
-        print(f"   - Tokens used: {metadata.get('_tokens_used', 0)}")
-
-        return True
-    except Exception as e:
-        print(f"❌ AI generation test failed: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-
-def main():
-    """Run all tests"""
-    print("=" * 60)
-    print("AI Metadata Generation Integration Test")
-    print("=" * 60)
-
-    results = []
-
-    # Test imports
-    results.append(("Imports", test_imports()))
-
-    # Test service initialization
-    results.append(("Service Init", test_service_initialization()))
-
-    # Test AI generation (if available)
-    results.append(("AI Generation", test_ai_metadata_generation()))
-
-    # Print summary
-    print("\n" + "=" * 60)
-    print("Test Summary:")
-    print("=" * 60)
-
-    for test_name, result in results:
-        status = "✅ PASS" if result else "❌ FAIL"
-        print(f"{status}: {test_name}")
-
-    all_passed = all(result for _, result in results)
-
-    if all_passed:
-        print("\n🎉 All tests passed!")
-        return 0
-    else:
-        print("\n⚠️  Some tests failed. Check details above.")
-        return 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/deploy.sh
+++ b/deploy.sh
@ -1,509 +1,92 @@
 #!/bin/bash
+# Solventum Image Metadata — Idempotent Deployment Script
+# Usage: ./deploy.sh
 #
-# Oliver Metadata Tool v4.0 - Production Deployment Script
-# Idempotent deployment for Ubuntu server at /opt/solventum-image-metadata/
+# First run:
+#   cd /opt/oliver-metadata-tool
+#   cp .env.example .env   # edit with your secrets
+#   chmod +x deploy.sh
+#   ./deploy.sh
 #
-# Usage: sudo ./deploy.sh
-#
-# Prerequisites:
-# - Configure Apache/Nginx reverse proxy separately
-# - Ensure .env file is configured
-# - Git repository must be clean (no uncommitted changes)
+# Subsequent updates:
+#   cd /opt/oliver-metadata-tool && ./deploy.sh

-set -e
+set -euo pipefail

-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-CYAN='\033[0;36m'
-NC='\033[0m' # No Color
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+COMPOSE_PROJECT="solventum-image-metadata"

-# Logging functions
-log_info() {
-    echo -e "[$(date '+%Y-%m-%d %H:%M:%S')] ${BLUE}[INFO]${NC} $1"
-}
+# Use sudo for docker if current user can't access docker socket
+DOCKER_CMD="docker"
+if ! docker info > /dev/null 2>&1; then
+    DOCKER_CMD="sudo docker"
+fi

-log_success() {
-    echo -e "[$(date '+%Y-%m-%d %H:%M:%S')] ${GREEN}[SUCCESS]${NC} $1"
-}
+cd "$SCRIPT_DIR"

-log_warn() {
-    echo -e "[$(date '+%Y-%m-%d %H:%M:%S')] ${YELLOW}[WARN]${NC} $1"
-}
+echo "=== Solventum Image Metadata — Deploy ==="
+echo "Directory: $SCRIPT_DIR"
+echo ""

-log_error() {
-    echo -e "[$(date '+%Y-%m-%d %H:%M:%S')] ${RED}[ERROR]${NC} $1"
-}
+# 1. Pull latest code from Bitbucket (runs as current user — needs SSH key)
+echo ">>> Pulling latest code..."
+git pull

-log_step() {
+# 2. Check .env exists (first-run guard)
+if [ ! -f .env ]; then
+    echo ""
+    echo "ERROR: .env file not found!"
+    echo ""
+    echo "  cp .env.example .env"
+    echo "  Then edit .env with your secrets (AZURE_CLIENT_SECRET, SECRET_KEY, etc.)"
    echo ""
-    echo -e "${CYAN}▶ $1${NC}"
-    echo "=============================================="
-}
-
-# Error handler
-error_exit() {
-    log_error "$1"
-    log_error "Deployment failed! Check logs above for details."
    exit 1
-}
-
-# Configuration
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-FRONTEND_DEPLOY_PATH="/var/www/html/solventum-image-metadata"
-
-# Load environment variables to get BACKEND_PORT
-if [[ -f "$SCRIPT_DIR/.env" ]]; then
-    source "$SCRIPT_DIR/.env"
 fi

-BACKEND_PORT="${BACKEND_PORT:-5001}"
-REDIS_PORT=6379
-HEALTH_CHECK_RETRIES=30
-HEALTH_CHECK_INTERVAL=2
-COMPOSE_FILE="docker-compose.fastapi.yml"
-
-# Banner
-echo ""
-echo -e "${CYAN}╔════════════════════════════════════════════════╗${NC}"
-echo -e "${CYAN}║   Oliver Metadata Tool v4.0 Deployment        ║${NC}"
-echo -e "${CYAN}║   FastAPI + React + Redis                     ║${NC}"
-echo -e "${CYAN}╚════════════════════════════════════════════════╝${NC}"
-echo ""
-
-log_info "Starting deployment..."
-log_info "Working directory: $SCRIPT_DIR"
-log_info "Frontend deploy path: $FRONTEND_DEPLOY_PATH"
-
-# -----------------------------------------------------------------------------
-# Pre-flight checks
-# -----------------------------------------------------------------------------
-log_step "Pre-flight Checks"
-
-# Check if running as root
-if [[ $EUID -ne 0 ]]; then
-    error_exit "This script must be run as root (use sudo)"
-fi
-log_info "✓ Running as root"
-
-# Check Docker
-if ! command -v docker &> /dev/null; then
-    error_exit "Docker is not installed"
-fi
-log_info "✓ Docker: $(docker --version)"
-
-# Check docker-compose (try both v1 and v2 syntax)
-if command -v docker-compose &> /dev/null; then
-    DOCKER_COMPOSE="docker-compose"
-elif docker compose version &> /dev/null; then
-    DOCKER_COMPOSE="docker compose"
-else
-    error_exit "docker-compose is not installed"
-fi
-log_info "✓ Docker Compose: $($DOCKER_COMPOSE version --short 2>/dev/null || $DOCKER_COMPOSE version)"
-
-# Check Node.js
-if ! command -v node &> /dev/null; then
-    error_exit "Node.js is not installed"
-fi
-NODE_VERSION=$(node --version)
-log_info "✓ Node.js: $NODE_VERSION"
-
-# Verify Node.js version (need 18+)
-NODE_MAJOR_VERSION=$(echo "$NODE_VERSION" | sed 's/v\([0-9]*\).*/\1/')
-if [[ "$NODE_MAJOR_VERSION" -lt 18 ]]; then
-    log_warn "Node.js version $NODE_VERSION detected. Version 18+ recommended."
-fi
-
-# Check npm
-if ! command -v npm &> /dev/null; then
-    error_exit "npm is not installed"
-fi
-log_info "✓ npm: $(npm --version)"
-
-# Check git
-if ! command -v git &> /dev/null; then
-    log_warn "git is not installed - manual code updates required"
-else
-    log_info "✓ git: $(git --version)"
-fi
-
-# Check .env file
-if [[ ! -f "$SCRIPT_DIR/.env" ]]; then
-    error_exit "Environment file not found at $SCRIPT_DIR/.env"
-fi
-log_info "✓ .env file found"
-
-# Validate required environment variables
-log_info "Validating environment variables..."
-source "$SCRIPT_DIR/.env"
-
-if [[ -z "$SECRET_KEY" ]] || [[ "$SECRET_KEY" == *"change"* ]]; then
-    log_warn "SECRET_KEY not properly set - using default (NOT SECURE FOR PRODUCTION)"
-fi
-
-if [[ -z "$OPENAI_API_KEY" ]]; then
-    log_warn "OPENAI_API_KEY not set - AI features will not work"
-fi
-
-if [[ -n "$AZURE_CLIENT_ID" ]]; then
-    log_info "✓ Azure AD SSO configured"
-fi
-
-# Verify compose file exists
-if [[ ! -f "$SCRIPT_DIR/$COMPOSE_FILE" ]]; then
-    error_exit "$COMPOSE_FILE not found"
-fi
-log_info "✓ Docker Compose file: $COMPOSE_FILE"
-
-# Check frontend directory
-if [[ ! -d "$SCRIPT_DIR/frontend" ]]; then
-    error_exit "Frontend directory not found"
-fi
-log_info "✓ Frontend directory exists"
-
-# Check backend directory
-if [[ ! -d "$SCRIPT_DIR/backend" ]]; then
-    error_exit "Backend directory not found"
-fi
-log_info "✓ Backend directory exists"
-
-log_success "All pre-flight checks passed"
-
-# -----------------------------------------------------------------------------
-# Pull latest code from Git
-# -----------------------------------------------------------------------------
-log_step "Pulling Latest Code"
-
-if command -v git &> /dev/null && [[ -d "$SCRIPT_DIR/.git" ]]; then
-    cd "$SCRIPT_DIR"
-
-    # Get current commit before pull
-    COMMIT_BEFORE=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown")
-
-    # Check for uncommitted changes
-    if [[ -n $(git status --porcelain 2>/dev/null) ]]; then
-        log_warn "Uncommitted changes detected:"
-        git status --short
-        read -p "Continue with deployment? [y/N] " -n 1 -r
-        echo
-        if [[ ! $REPLY =~ ^[Yy]$ ]]; then
-            error_exit "Deployment cancelled by user"
-        fi
-    fi
-
-    # Stash any local changes (just in case)
-    log_info "Stashing local changes (if any)..."
-    git stash push -m "Auto-stash before deployment $(date +%Y%m%d-%H%M%S)" || true
-
-    # Pull latest code
-    log_info "Pulling from origin/main..."
-    if git pull origin main; then
-        log_success "Git pull successful"
-    else
-        log_warn "Git pull failed - continuing with existing code"
-        log_warn "This is OK for first deployment or if SSH keys not configured"
-        log_warn "For updates, ensure git credentials are set up"
-    fi
-
-    # Get new commit info
-    COMMIT_HASH=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown")
-    COMMIT_MSG=$(git log -1 --pretty=format:"%s" 2>/dev/null || echo "unknown")
-    COMMIT_DATE=$(git log -1 --pretty=format:"%ci" 2>/dev/null || echo "unknown")
-
-    if [[ "$COMMIT_BEFORE" != "$COMMIT_HASH" ]]; then
-        log_success "Code updated: $COMMIT_BEFORE → $COMMIT_HASH"
-    else
-        log_info "Already up to date at commit: $COMMIT_HASH"
-    fi
-
-    log_info "Commit message: $COMMIT_MSG"
-    log_info "Commit date: $COMMIT_DATE"
-else
-    log_warn "Git not available or not a git repository"
-    COMMIT_HASH="unknown"
-    COMMIT_MSG="unknown"
-    COMMIT_DATE="unknown"
-fi
-
-log_success "Code ready for deployment"
-
-# -----------------------------------------------------------------------------
-# Clean old Docker resources
-# -----------------------------------------------------------------------------
-log_step "Cleaning Old Docker Resources"
-
-cd "$SCRIPT_DIR"
-
-# Stop old containers
-log_info "Stopping old containers..."
-$DOCKER_COMPOSE -f "$COMPOSE_FILE" down --remove-orphans || log_warn "No containers to stop"
-
-# Remove old images for this project (keep base images)
-log_info "Removing old project images..."
-OLD_IMAGES=$(docker images --filter "reference=solventum-image-metadata*" --filter "reference=*oliver*" -q 2>/dev/null || true)
-if [[ -n "$OLD_IMAGES" ]]; then
-    docker rmi -f $OLD_IMAGES 2>/dev/null || log_warn "Some images could not be removed (may be in use)"
-    log_success "Old images removed"
-else
-    log_info "No old images to remove"
-fi
-
-# Clean build cache (keep last 24 hours)
-log_info "Cleaning Docker build cache..."
-docker builder prune -f --filter "until=24h" > /dev/null 2>&1 || true
-
-# Remove unused networks
-log_info "Removing unused networks..."
-docker network prune -f > /dev/null 2>&1 || true
-
-# Show disk space saved
-log_info "Docker cleanup complete"
-
-log_success "Old resources cleaned"
-
-# -----------------------------------------------------------------------------
-# Build Docker containers
-# -----------------------------------------------------------------------------
-log_step "Building Docker Containers"
-
-cd "$SCRIPT_DIR"
-
-# Pull latest base images and build (use cache for efficiency)
-log_info "Building containers with latest base images..."
-$DOCKER_COMPOSE -f "$COMPOSE_FILE" build --pull || error_exit "Docker build failed"
-
-log_success "Docker containers built successfully"
-
-# -----------------------------------------------------------------------------
-# Start Docker services
-# -----------------------------------------------------------------------------
-log_step "Starting Docker Services"
-
-log_info "Starting backend and Redis..."
-$DOCKER_COMPOSE -f "$COMPOSE_FILE" up -d || error_exit "Failed to start Docker services"
-
-# Wait for Redis to be ready (inside Docker network)
-log_info "Waiting for Redis to be ready..."
-sleep 5  # Give Redis time to start
-log_success "Redis container started"
-
-# Wait for backend to start
-log_info "Waiting for backend to start..."
-sleep 5
-
-log_success "Docker services started"
-
-# -----------------------------------------------------------------------------
-# Database initialization (if needed)
-# -----------------------------------------------------------------------------
-log_step "Database Setup"
-
-# Check if database exists
-if [[ -f "$SCRIPT_DIR/backend/data/oliver_metadata.db" ]]; then
-    log_info "Database file exists - skipping initialization"
-else
-    log_info "First run detected - database will be initialized automatically"
-fi
-
-# Note: Alembic migrations would go here if we add them
-# For now, FastAPI initializes DB on first run via init_db()
-
-log_success "Database setup complete"
-
-# -----------------------------------------------------------------------------
-# Build frontend
-# -----------------------------------------------------------------------------
-log_step "Building Frontend"
-
-cd "$SCRIPT_DIR/frontend"
-
-# Check if node_modules exists and package.json changed
-if [[ ! -d "node_modules" ]] || [[ "package.json" -nt "node_modules" ]]; then
-    log_info "Installing frontend dependencies..."
-    npm ci || error_exit "npm ci failed"
-    log_success "Dependencies installed"
-else
-    log_info "Dependencies up to date (skipping install)"
-fi
-
-# Build production bundle
-log_info "Creating production build with Vite..."
-npm run build || error_exit "Frontend build failed"
-
-# Verify dist directory was created
-if [[ ! -d "$SCRIPT_DIR/frontend/dist" ]]; then
-    error_exit "Frontend dist directory not found (build failed)"
-fi
-
-# Verify index.html exists
-if [[ ! -f "$SCRIPT_DIR/frontend/dist/index.html" ]]; then
-    error_exit "Frontend index.html not found in dist/"
-fi
-
-# Get build size
-BUILD_SIZE=$(du -sh "$SCRIPT_DIR/frontend/dist" | cut -f1)
-log_info "Build size: $BUILD_SIZE"
-
-log_success "Frontend built successfully"
-
-# -----------------------------------------------------------------------------
-# Deploy frontend to Apache/Nginx
-# -----------------------------------------------------------------------------
-log_step "Deploying Frontend"
-
-# Create deployment directory if it doesn't exist
-log_info "Creating deployment directory..."
-mkdir -p "$FRONTEND_DEPLOY_PATH"
-
-# Backup existing files (optional)
-if [[ -d "$FRONTEND_DEPLOY_PATH" ]] && [[ "$(ls -A $FRONTEND_DEPLOY_PATH)" ]]; then
-    BACKUP_DIR="/tmp/oliver-metadata-backup-$(date +%Y%m%d-%H%M%S)"
-    log_info "Backing up existing files to $BACKUP_DIR"
-    mkdir -p "$BACKUP_DIR"
-    cp -r "$FRONTEND_DEPLOY_PATH"/* "$BACKUP_DIR/" || log_warn "Backup failed (non-critical)"
-fi
-
-# Clear existing files
-log_info "Removing old frontend files..."
-rm -rf "${FRONTEND_DEPLOY_PATH:?}"/*
-
-# Copy new build
-log_info "Copying new build to web directory..."
-cp -r "$SCRIPT_DIR/frontend/dist/"* "$FRONTEND_DEPLOY_PATH/"
-
-# Set proper ownership for web server
-log_info "Setting permissions..."
-chown -R www-data:www-data "$FRONTEND_DEPLOY_PATH"
-chmod -R 755 "$FRONTEND_DEPLOY_PATH"
-
-# Verify deployment
-if [[ ! -f "$FRONTEND_DEPLOY_PATH/index.html" ]]; then
-    error_exit "Frontend deployment verification failed - index.html not found"
-fi
-
-log_success "Frontend deployed to $FRONTEND_DEPLOY_PATH"
-
-# -----------------------------------------------------------------------------
-# Verification & Health Checks
-# -----------------------------------------------------------------------------
-log_step "Running Health Checks"
-
-# Wait for backend API to be ready
-log_info "Checking backend API health..."
-BACKEND_READY=false
-for i in $(seq 1 $HEALTH_CHECK_RETRIES); do
-    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$BACKEND_PORT/health" 2>/dev/null || echo "000")
-    if [[ "$HTTP_STATUS" == "200" ]]; then
-        BACKEND_READY=true
+# 3. Build Docker image (uses layer cache, picks up code changes via COPY . .)
+echo ">>> Building Docker image..."
+$DOCKER_CMD compose -p "$COMPOSE_PROJECT" build
+
+# 4. Start or restart containers (idempotent — creates if missing, restarts if running)
+echo ">>> Starting containers..."
+$DOCKER_CMD compose -p "$COMPOSE_PROJECT" up -d
+
+# 5. Wait for health check
+#    Database auto-initializes on first container startup:
+#    - Tables created via CREATE TABLE IF NOT EXISTS
+#    - Migrations run in-code (check-before-act pattern)
+#    - Superadmin created if SUPERADMIN_EMAIL is set
+echo ">>> Waiting for app to be healthy..."
+HEALTHY=false
+for i in $(seq 1 20); do
+    if curl -sf http://127.0.0.1:5001/login > /dev/null 2>&1; then
+        echo ">>> App is healthy!"
+        HEALTHY=true
        break
    fi
-    log_info "Waiting for backend... (attempt $i/$HEALTH_CHECK_RETRIES, status: $HTTP_STATUS)"
-    sleep $HEALTH_CHECK_INTERVAL
+    echo "  Waiting... ($i/20)"
+    sleep 3
 done

-if [[ "$BACKEND_READY" != "true" ]]; then
-    log_warn "Backend health check failed - service may still be starting"
-    log_info "Backend logs:"
-    cd "$SCRIPT_DIR"
-    $DOCKER_COMPOSE -f "$COMPOSE_FILE" logs --tail=50 backend
-else
-    log_success "Backend health check passed (HTTP 200)"
+if [ "$HEALTHY" = false ]; then
+    echo ""
+    echo "WARNING: App may not be healthy after 60 seconds."
+    echo "Check logs:"
+    echo "  $DOCKER_CMD compose -p $COMPOSE_PROJECT logs --tail 50"
+    echo ""
+    exit 1
 fi

-# Check API documentation endpoint
-API_DOCS_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$BACKEND_PORT/docs" 2>/dev/null || echo "000")
-if [[ "$API_DOCS_STATUS" == "200" ]]; then
-    log_success "API docs accessible at http://localhost:$BACKEND_PORT/docs"
-else
-    log_warn "API docs check failed (status: $API_DOCS_STATUS)"
-fi
-
-# Verify Redis (check if container is running)
-log_info "Verifying Redis..."
-if docker ps | grep -q oliver-redis; then
-    log_success "Redis container is running"
-else
-    log_warn "Redis container not found"
-fi
-
-# Check Docker container status
-log_info "Docker container status:"
-cd "$SCRIPT_DIR"
-$DOCKER_COMPOSE -f "$COMPOSE_FILE" ps
-
-# -----------------------------------------------------------------------------
-# Cleanup
-# -----------------------------------------------------------------------------
-log_step "Cleanup"
-
-# Remove old Docker images
-log_info "Removing unused Docker images..."
-docker image prune -f > /dev/null 2>&1 || log_warn "Image cleanup failed (non-critical)"
-
-# Remove old backups (keep last 7 days)
-if [[ -d "/tmp" ]]; then
-    log_info "Removing old backup files (>7 days)..."
-    find /tmp -name "oliver-metadata-backup-*" -type d -mtime +7 -exec rm -rf {} + 2>/dev/null || true
-fi
-
-log_success "Cleanup complete"
-
-# -----------------------------------------------------------------------------
-# Summary
-# -----------------------------------------------------------------------------
-echo ""
-echo -e "${GREEN}╔════════════════════════════════════════════════╗${NC}"
-echo -e "${GREEN}║        🎉 Deployment Successful!              ║${NC}"
-echo -e "${GREEN}╚════════════════════════════════════════════════╝${NC}"
-echo ""
-
-if [[ -n "$COMMIT_HASH" ]]; then
-    log_info "Deployed commit: $COMMIT_HASH - $COMMIT_MSG"
-fi
+# 6. Deploy static files for Apache to serve directly
+WEB_DIR="/var/www/html/solventum-image-metadata"
+echo ">>> Deploying static files to $WEB_DIR..."
+sudo rm -rf "$WEB_DIR/static"
+sudo mkdir -p "$WEB_DIR"
+sudo cp -r "$SCRIPT_DIR/static" "$WEB_DIR/static"
+sudo chown -R www-data:www-data "$WEB_DIR"

 echo ""
-log_info "📍 Access Points:"
-echo "   Frontend:     https://ai-sandbox.oliver.solutions/solventum-image-metadata/"
-echo "   Backend API:  https://ai-sandbox.oliver.solutions/solventum-image-metadata/api/"
-echo "   API Docs:     http://localhost:$BACKEND_PORT/docs"
+echo "=== Deploy complete ==="
+echo "URL: https://ai-sandbox.oliver.solutions/solventum-image-metadata/"
 echo ""
-
-log_info "🐳 Docker Services:"
-echo "   Backend:      http://localhost:$BACKEND_PORT"
-echo "   Redis:        localhost:$REDIS_PORT"
-echo ""
-
-log_info "📂 File Locations:"
-echo "   Frontend:     $FRONTEND_DEPLOY_PATH"
-echo "   Backend:      $SCRIPT_DIR/backend"
-echo "   Database:     $SCRIPT_DIR/backend/data/oliver_metadata.db"
-echo "   Uploads:      $SCRIPT_DIR/backend/uploads"
-echo ""
-
-log_info "🔧 Useful Commands:"
-echo "   View logs:       $DOCKER_COMPOSE -f $COMPOSE_FILE logs -f"
-echo "   Stop services:   $DOCKER_COMPOSE -f $COMPOSE_FILE down"
-echo "   Restart backend: $DOCKER_COMPOSE -f $COMPOSE_FILE restart backend"
-echo "   Redis CLI:       docker exec -it oliver-redis redis-cli"
-echo ""
-
-if [[ "$BACKEND_READY" != "true" ]]; then
-    log_warn "⚠️  Backend health check did not pass - verify services manually"
-    echo "   Check logs: $DOCKER_COMPOSE -f $COMPOSE_FILE logs backend"
-else
-    log_success "✓ All health checks passed"
-fi
-
-echo ""
-log_info "🔐 Next Steps:"
-echo "   1. Configure Apache reverse proxy (see apache-config.conf)"
-echo "   2. Test frontend: https://ai-sandbox.oliver.solutions/solventum-image-metadata/"
-echo "   3. Verify SSO redirect (Azure AD)"
-echo "   4. Upload test files and verify metadata updates"
-echo ""
-
-log_success "Deployment complete! 🚀"
-echo "=============================================="
+$DOCKER_CMD compose -p "$COMPOSE_PROJECT" ps
--- a/deploy/apache-solventum-metadata.conf
+++ b/deploy/apache-solventum-metadata.conf
@ -0,0 +1,30 @@
+# Solventum Image Metadata Tool — Apache Config
+# Add these directives inside your existing <VirtualHost *:443> for ai-sandbox.oliver.solutions
+#
+# IMPORTANT: The static files Alias and "ProxyPass ... !" exclusion
+# MUST come BEFORE the main ProxyPass rule.
+
+# Serve static files directly from disk (fast, bypasses Docker)
+Alias /solventum-image-metadata/static /var/www/html/solventum-image-metadata/static
+<Directory /var/www/html/solventum-image-metadata/static>
+    Require all granted
+    Options -Indexes
+</Directory>
+
+# Exclude static from proxy (Apache serves them directly)
+ProxyPass /solventum-image-metadata/static !
+
+# Proxy everything else to Docker container
+ProxyPass /solventum-image-metadata/ http://localhost:5001/
+ProxyPassReverse /solventum-image-metadata/ http://localhost:5001/
+
+# SSE support (disable buffering for realtime AI progress events)
+<LocationMatch "^/solventum-image-metadata/events/">
+    SetEnv proxy-sendchunked 1
+    SetEnv proxy-interim-response RFC
+</LocationMatch>
+
+# Upload size limit (500MB)
+<Location /solventum-image-metadata/>
+    LimitRequestBody 524288000
+</Location>
--- a/deploy/deploy.sh
+++ b/deploy/deploy.sh
@ -0,0 +1,94 @@
+#!/bin/bash
+# Oliver Metadata Tool — Deployment Script
+# Usage: ./deploy.sh [--first-run]
+set -euo pipefail
+
+APP_DIR="/var/www/oliver"
+SERVICE_NAME="oliver-metadata"
+VENV_DIR="$APP_DIR/venv"
+REPO_BRANCH="${DEPLOY_BRANCH:-main}"
+
+echo "=== Oliver Metadata Tool Deployment ==="
+echo "Directory: $APP_DIR"
+echo "Service:   $SERVICE_NAME"
+echo ""
+
+# Check we're running as root or with sudo
+if [ "$EUID" -ne 0 ]; then
+    echo "Please run with sudo"
+    exit 1
+fi
+
+cd "$APP_DIR"
+
+# First run setup
+if [ "${1:-}" = "--first-run" ]; then
+    echo ">>> First-run setup..."
+
+    # System dependencies
+    apt-get update
+    apt-get install -y python3.11 python3.11-venv python3.11-dev \
+        libimage-exiftool-perl tesseract-ocr tesseract-ocr-eng \
+        tesseract-ocr-chi-sim tesseract-ocr-chi-tra tesseract-ocr-jpn tesseract-ocr-kor \
+        poppler-utils ffmpeg gcc
+
+    # Create venv
+    python3.11 -m venv "$VENV_DIR"
+
+    # Create directories
+    mkdir -p "$APP_DIR/uploads" "$APP_DIR/data" "$APP_DIR/templates_saved"
+
+    # Set permissions
+    chown -R www-data:www-data "$APP_DIR"
+
+    # Install systemd service
+    cp "$APP_DIR/deploy/oliver-metadata.service" /etc/systemd/system/
+    systemctl daemon-reload
+    systemctl enable "$SERVICE_NAME"
+
+    # Install Apache config (if Apache is installed)
+    if command -v apache2 &> /dev/null; then
+        cp "$APP_DIR/deploy/oliver-metadata.conf" /etc/apache2/sites-available/
+        a2enmod proxy proxy_http headers rewrite ssl expires
+        a2ensite oliver-metadata
+        echo ">>> Apache config installed. Update SSL paths and restart Apache."
+    fi
+
+    echo ">>> First-run setup complete."
+    echo ">>> Edit $APP_DIR/.env before starting the service."
+    echo ""
+fi
+
+# Pull latest code
+echo ">>> Pulling latest code..."
+sudo -u www-data git pull origin "$REPO_BRANCH"
+
+# Install/update Python deps
+echo ">>> Installing Python dependencies..."
+"$VENV_DIR/bin/pip" install --upgrade pip
+"$VENV_DIR/bin/pip" install -r requirements.txt
+
+# Restart service
+echo ">>> Restarting service..."
+systemctl restart "$SERVICE_NAME"
+
+# Wait for health
+echo ">>> Waiting for service to start..."
+sleep 3
+
+# Health check
+for i in {1..10}; do
+    if curl -sf http://127.0.0.1:5001/login > /dev/null 2>&1; then
+        echo ">>> Service is healthy!"
+        systemctl status "$SERVICE_NAME" --no-pager -l
+        echo ""
+        echo "=== Deployment complete ==="
+        exit 0
+    fi
+    echo "  Waiting... ($i/10)"
+    sleep 2
+done
+
+echo ">>> WARNING: Service may not be healthy. Check logs:"
+echo "  journalctl -u $SERVICE_NAME -n 50 --no-pager"
+exit 1
--- a/deploy/oliver-metadata.conf
+++ b/deploy/oliver-metadata.conf
@ -0,0 +1,57 @@
+<VirtualHost *:443>
+    ServerName metadata.oliver.agency
+
+    # SSL — provide your own certificates
+    SSLEngine on
+    SSLCertificateFile /etc/ssl/certs/oliver-metadata.crt
+    SSLCertificateKeyFile /etc/ssl/private/oliver-metadata.key
+    # SSLCertificateChainFile /etc/ssl/certs/ca-bundle.crt
+
+    # Serve static files directly via Apache (bypass gunicorn)
+    Alias /static /var/www/oliver/static
+    <Directory /var/www/oliver/static>
+        Require all granted
+        Options -Indexes
+        ExpiresActive On
+        ExpiresDefault "access plus 1 week"
+        Header set Cache-Control "public, max-age=604800"
+    </Directory>
+
+    # Proxy to gunicorn/uvicorn
+    ProxyPreserveHost On
+    ProxyPass /static !
+    ProxyPass / http://127.0.0.1:5001/
+    ProxyPassReverse / http://127.0.0.1:5001/
+
+    # SSE support — disable buffering for event streams
+    <LocationMatch "/events/">
+        ProxyPass http://127.0.0.1:5001
+        ProxyPassReverse http://127.0.0.1:5001
+        SetEnv proxy-sendchunked 1
+        SetEnv proxy-interim-response RFC
+    </LocationMatch>
+
+    # Timeouts (AI generation can take 30+ seconds per file)
+    ProxyTimeout 120
+    Timeout 120
+
+    # Upload size limit (500MB)
+    LimitRequestBody 524288000
+
+    # Security headers
+    Header always set X-Content-Type-Options "nosniff"
+    Header always set X-Frame-Options "DENY"
+    Header always set X-XSS-Protection "1; mode=block"
+    Header always set Referrer-Policy "strict-origin-when-cross-origin"
+
+    # Logging
+    ErrorLog ${APACHE_LOG_DIR}/oliver-metadata-error.log
+    CustomLog ${APACHE_LOG_DIR}/oliver-metadata-access.log combined
+</VirtualHost>
+
+# Redirect HTTP to HTTPS
+<VirtualHost *:80>
+    ServerName metadata.oliver.agency
+    RewriteEngine On
+    RewriteRule ^(.*)$ https://%{HTTP_HOST}$1 [R=301,L]
+</VirtualHost>
--- a/deploy/oliver-metadata.service
+++ b/deploy/oliver-metadata.service
@ -0,0 +1,37 @@
+[Unit]
+Description=Oliver Metadata Tool (FastAPI)
+After=network.target
+Wants=network-online.target
+
+[Service]
+Type=notify
+User=www-data
+Group=www-data
+WorkingDirectory=/var/www/oliver
+Environment="PATH=/var/www/oliver/venv/bin:/usr/local/bin:/usr/bin:/bin"
+EnvironmentFile=/var/www/oliver/.env
+
+ExecStart=/var/www/oliver/venv/bin/gunicorn app.main:app \
+    --worker-class uvicorn.workers.UvicornWorker \
+    --workers 2 \
+    --bind 127.0.0.1:5001 \
+    --timeout 120 \
+    --graceful-timeout 30 \
+    --access-logfile - \
+    --error-logfile -
+
+ExecReload=/bin/kill -s HUP $MAINPID
+KillMode=mixed
+TimeoutStopSec=10
+Restart=on-failure
+RestartSec=5
+
+# Security hardening
+NoNewPrivileges=yes
+ProtectSystem=strict
+ProtectHome=yes
+ReadWritePaths=/var/www/oliver/uploads /var/www/oliver/data /var/www/oliver/oliver_metadata.db /var/www/oliver/oliver_sessions.db /tmp
+PrivateTmp=yes
+
+[Install]
+WantedBy=multi-user.target
--- a/docker-compose.fastapi.yml
+++ b/docker-compose.fastapi.yml
@ -1,98 +0,0 @@
-version: '3.9'
-
-services:
-  # Redis for session storage (internal only, no external port)
-  redis:
-    image: redis:7-alpine
-    container_name: oliver-redis
-    restart: unless-stopped
-    volumes:
-      - redis-data:/data
-    command: redis-server --appendonly yes
-    healthcheck:
-      test: ["CMD", "redis-cli", "ping"]
-      interval: 10s
-      timeout: 3s
-      retries: 3
-    networks:
-      - oliver-network
-
-  # FastAPI Backend
-  backend:
-    build:
-      context: ./backend
-      dockerfile: Dockerfile
-    container_name: oliver-backend
-    restart: unless-stopped
-    environment:
-      # Database - use SQLite by default (simpler for migration)
-      DATABASE_URL: sqlite+aiosqlite:///./data/oliver_metadata.db
-      # Or use PostgreSQL:
-      # DATABASE_URL: postgresql+asyncpg://oliver:${DB_PASSWORD:-changeme}@postgres:5432/oliver_metadata
-
-      # Redis (internal Docker network)
-      REDIS_URL: redis://redis:6379/0
-
-      # Security
-      SECRET_KEY: ${SECRET_KEY:-please-change-this-secret-key-in-production}
-
-      # OpenAI (for AI metadata generation)
-      OPENAI_API_KEY: ${OPENAI_API_KEY}
-      AI_MODEL: ${AI_MODEL:-gpt-4o-mini}
-      MAX_TOKENS: ${MAX_TOKENS:-500}
-      TEMPERATURE: ${TEMPERATURE:-0.5}
-
-      # Microsoft SSO (optional)
-      AZURE_CLIENT_ID: ${AZURE_CLIENT_ID}
-      AZURE_CLIENT_SECRET: ${AZURE_CLIENT_SECRET}
-      AZURE_TENANT_ID: ${AZURE_TENANT_ID}
-      REDIRECT_URI: ${REDIRECT_URI:-http://localhost:8000/auth/microsoft/callback}
-
-      # Debugging
-      DEBUG: ${DEBUG:-false}
-
-      # Upload directory
-      UPLOAD_DIR: /app/uploads
-
-      # Frontend directory (for serving static files)
-      FRONTEND_DIR: /app/frontend/dist
-
-    volumes:
-      # Persistent storage for uploads
-      - ./backend/uploads:/app/uploads
-      # Persistent database (SQLite)
-      - ./backend/data:/app/data
-      # Persistent templates
-      - ./backend/output:/app/output
-      # Frontend static files (local dev only - on production, frontend is served by Apache/Nginx)
-      # Comment out the next line for production deployment:
-      - ./frontend/dist:/app/frontend/dist:ro
-      # Excel lookup file (optional - comment out if file doesn't exist)
-      # - ./Celum ID to Adobe Asset Path Mapping Spreadsheet (1).xlsx:/app/Celum ID to Adobe Asset Path Mapping Spreadsheet (1).xlsx:ro
-
-    ports:
-      - "${BACKEND_PORT:-5001}:8000"
-
-    depends_on:
-      redis:
-        condition: service_healthy
-
-    networks:
-      - oliver-network
-
-    command: uvicorn app.main:app --host 0.0.0.0 --port 8000
-
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-      start_period: 40s
-
-volumes:
-  redis-data:
-    driver: local
-
-networks:
-  oliver-network:
-    driver: bridge
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -5,7 +5,7 @@ services:
      dockerfile: Dockerfile
    container_name: oliver-metadata-tool
    ports:
-      - "5001:5001"
+      - "127.0.0.1:5001:5001"
    volumes:
      # Persistent storage for uploads
      - uploads:/app/uploads
@ -25,7 +25,7 @@ services:
    restart: unless-stopped

    healthcheck:
-      test: ["CMD", "python", "-c", "import requests; requests.get('http://localhost:5001/login', timeout=5)"]
+      test: ["CMD", "curl", "-sf", "http://localhost:5001/login"]
      interval: 30s
      timeout: 10s
      retries: 3
--- a/docker-run.sh
+++ b/docker-run.sh
@ -0,0 +1,165 @@
+#!/bin/bash
+# Oliver Metadata Tool - Docker Management Script
+
+set -e
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Functions
+print_header() {
+    echo -e "${BLUE}============================================${NC}"
+    echo -e "${BLUE}  Oliver Metadata Tool - Docker Manager${NC}"
+    echo -e "${BLUE}============================================${NC}"
+}
+
+print_success() {
+    echo -e "${GREEN}✓ $1${NC}"
+}
+
+print_error() {
+    echo -e "${RED}✗ $1${NC}"
+}
+
+print_info() {
+    echo -e "${YELLOW}ℹ $1${NC}"
+}
+
+# Check if Docker is installed
+check_docker() {
+    if ! command -v docker &> /dev/null; then
+        print_error "Docker is not installed. Please install Docker first."
+        exit 1
+    fi
+
+    if ! command -v docker-compose &> /dev/null && ! docker compose version &> /dev/null; then
+        print_error "Docker Compose is not installed. Please install Docker Compose first."
+        exit 1
+    fi
+}
+
+# Build Docker image
+build() {
+    print_header
+    print_info "Building Docker image..."
+    docker-compose build
+    print_success "Docker image built successfully"
+}
+
+# Start containers
+start() {
+    print_header
+    print_info "Starting Oliver Metadata Tool..."
+    docker-compose up -d
+    print_success "Application started successfully"
+    print_info "Access the application at: http://localhost:5001"
+    print_info "Default credentials: tester / oliveradmin"
+}
+
+# Stop containers
+stop() {
+    print_header
+    print_info "Stopping Oliver Metadata Tool..."
+    docker-compose down
+    print_success "Application stopped successfully"
+}
+
+# View logs
+logs() {
+    print_header
+    print_info "Showing application logs (Ctrl+C to exit)..."
+    docker-compose logs -f
+}
+
+# Restart containers
+restart() {
+    print_header
+    print_info "Restarting Oliver Metadata Tool..."
+    docker-compose restart
+    print_success "Application restarted successfully"
+}
+
+# Show status
+status() {
+    print_header
+    docker-compose ps
+}
+
+# Clean up (remove containers and volumes)
+clean() {
+    print_header
+    print_error "WARNING: This will remove all containers, volumes, and data!"
+    read -p "Are you sure? (yes/no): " confirm
+    if [ "$confirm" == "yes" ]; then
+        print_info "Cleaning up..."
+        docker-compose down -v
+        print_success "Cleanup completed"
+    else
+        print_info "Cleanup cancelled"
+    fi
+}
+
+# Show help
+show_help() {
+    print_header
+    echo ""
+    echo "Usage: ./docker-run.sh [command]"
+    echo ""
+    echo "Commands:"
+    echo "  build    - Build Docker image"
+    echo "  start    - Start the application"
+    echo "  stop     - Stop the application"
+    echo "  restart  - Restart the application"
+    echo "  logs     - View application logs"
+    echo "  status   - Show container status"
+    echo "  clean    - Remove containers and volumes (WARNING: deletes data)"
+    echo "  help     - Show this help message"
+    echo ""
+    echo "Examples:"
+    echo "  ./docker-run.sh build    # Build image"
+    echo "  ./docker-run.sh start    # Start application"
+    echo "  ./docker-run.sh logs     # View logs"
+    echo ""
+}
+
+# Main script
+check_docker
+
+case "$1" in
+    build)
+        build
+        ;;
+    start)
+        start
+        ;;
+    stop)
+        stop
+        ;;
+    restart)
+        restart
+        ;;
+    logs)
+        logs
+        ;;
+    status)
+        status
+        ;;
+    clean)
+        clean
+        ;;
+    help|--help|-h)
+        show_help
+        ;;
+    "")
+        show_help
+        ;;
+    *)
+        print_error "Unknown command: $1"
+        show_help
+        exit 1
+        ;;
+esac
--- a/docs/apache/APACHE-MIGRATION.md
+++ b/docs/apache/APACHE-MIGRATION.md
@ -1,155 +0,0 @@
-# Apache Configuration Migration Guide
-
-## ⚠️ Important Changes for FastAPI
-
-Your current Apache config uses **Flask on port 5001**. For FastAPI, you need to change:
-
-**Note:** Using **port 5001** (same as Flask) for Azure AD compatibility
-
-### Current (Flask):
-```apache
-ProxyPass /solventum-image-metadata/ http://localhost:5001/
-ProxyPassReverse /solventum-image-metadata/ http://localhost:5001/
-```
-
-### New (FastAPI):
-```apache
-# Frontend - static files (React build)
-Alias /solventum-image-metadata /var/www/html/solventum-image-metadata
-
-<Directory /var/www/html/solventum-image-metadata>
-    Options -Indexes +FollowSymLinks
-    AllowOverride All
-    Require all granted
-
-    # React Router (SPA) - rewrite to index.html
-    RewriteEngine On
-    RewriteBase /solventum-image-metadata
-    RewriteCond %{REQUEST_FILENAME} !-f
-    RewriteCond %{REQUEST_FILENAME} !-d
-    RewriteCond %{REQUEST_URI} !^/solventum-image-metadata/api/
-    RewriteRule ^ /solventum-image-metadata/index.html [L]
-</Directory>
-
-# Backend API - proxy to FastAPI
-ProxyPreserveHost On
-ProxyTimeout 600
-
-<Location /solventum-image-metadata/api>
-    ProxyPass http://localhost:5001
-    ProxyPassReverse http://localhost:5001
-
-    RequestHeader set X-Forwarded-Proto "https"
-    RequestHeader set X-Forwarded-For "%{REMOTE_ADDR}s"
-</Location>
-```
-
-## Key Changes:
-
-1. **Port unchanged**: 5001 (same port as Flask for Azure AD compatibility)
-2. **Frontend**: Separate static files (not proxied)
-3. **API prefix**: `/solventum-image-metadata/api/` → Backend
-4. **SPA routing**: RewriteRule for React Router
-
-## Update on Server:
-
-```bash
-# 1. Edit Apache config
-sudo nano /etc/apache2/sites-available/solventum-image-metadata.conf
-
-# 2. Replace the ProxyPass lines with the new config above
-
-# 3. Enable required modules
-sudo a2enmod rewrite headers alias
-
-# 4. Test config
-sudo apache2ctl configtest
-
-# 5. Reload Apache
-sudo systemctl reload apache2
-```
-
-## Update .env on Server:
-
-```bash
-# Edit /opt/solventum-image-metadata/.env
-sudo nano /opt/solventum-image-metadata/.env
-
-# Change REDIRECT_URI:
-REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/api/auth/microsoft/callback
-#                                                                         ^^^^ ADD /api/
-```
-
-## Verify:
-
-```bash
-# Backend health (direct)
-curl http://localhost:5001/health
-
-# Frontend (through Apache)
-curl https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-
-# API (through Apache)
-curl https://ai-sandbox.oliver.solutions/solventum-image-metadata/api/health
-```
-
-## Complete Apache VirtualHost Example:
-
-```apache
-<VirtualHost *:443>
-    ServerName ai-sandbox.oliver.solutions
-
-    SSLEngine on
-    SSLCertificateFile /etc/letsencrypt/live/ai-sandbox.oliver.solutions/fullchain.pem
-    SSLCertificateKeyFile /etc/letsencrypt/live/ai-sandbox.oliver.solutions/privkey.pem
-
-    # Security headers
-    Header always set X-Frame-Options "SAMEORIGIN"
-    Header always set X-Content-Type-Options "nosniff"
-
-    # Frontend - React SPA static files
-    Alias /solventum-image-metadata /var/www/html/solventum-image-metadata
-
-    <Directory /var/www/html/solventum-image-metadata>
-        Options -Indexes +FollowSymLinks
-        AllowOverride All
-        Require all granted
-
-        # React Router support
-        RewriteEngine On
-        RewriteBase /solventum-image-metadata
-        RewriteCond %{REQUEST_FILENAME} !-f
-        RewriteCond %{REQUEST_FILENAME} !-d
-        RewriteCond %{REQUEST_URI} !^/solventum-image-metadata/api/
-        RewriteRule ^ /solventum-image-metadata/index.html [L]
-    </Directory>
-
-    # Cache static assets
-    <FilesMatch "\.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2)$">
-        Header set Cache-Control "public, max-age=31536000"
-    </FilesMatch>
-
-    # Don't cache HTML
-    <FilesMatch "\.(html)$">
-        Header set Cache-Control "no-cache, no-store, must-revalidate"
-    </FilesMatch>
-
-    # Backend API - FastAPI reverse proxy
-    ProxyPreserveHost On
-    ProxyTimeout 600
-
-    <Location /solventum-image-metadata/api>
-        ProxyPass http://localhost:5001
-        ProxyPassReverse http://localhost:5001
-
-        RequestHeader set X-Forwarded-Proto "https"
-        RequestHeader set X-Forwarded-For "%{REMOTE_ADDR}s"
-    </Location>
-
-    # Allow large file uploads (500MB)
-    LimitRequestBody 524288000
-
-    ErrorLog ${APACHE_LOG_DIR}/solventum-image-metadata-error.log
-    CustomLog ${APACHE_LOG_DIR}/solventum-image-metadata-access.log combined
-</VirtualHost>
-```
--- a/docs/apache/APACHE-SIMPLE.md
+++ b/docs/apache/APACHE-SIMPLE.md
@ -1,88 +0,0 @@
-# Apache Configuration - Simple Version
-
-## Для ai-sandbox.oliver.solutions
-
-### Вариант 1: Только Backend Proxy (проще, но медленнее)
-
-Backend FastAPI будет serve и static files и API:
-
-```apache
-# Oliver Metadata Tool - Backend only
-ProxyPass /solventum-image-metadata/ http://localhost:5001/
-ProxyPassReverse /solventum-image-metadata/ http://localhost:5001/
-ProxyTimeout 600
-```
-
-**Требует:** Backend должен serve статические файлы React (добавить StaticFiles в FastAPI)
-
---
-
-### Вариант 2: Разделение Frontend/Backend (быстрее, рекомендую)
-
-Frontend - static files, Backend - только API:
-
-```apache
-# Oliver Metadata Tool - Frontend static files
-Alias /solventum-image-metadata /var/www/html/solventum-image-metadata
-
-<Directory /var/www/html/solventum-image-metadata>
-    Options -Indexes +FollowSymLinks
-    AllowOverride All
-    Require all granted
-
-    # React Router support
-    RewriteEngine On
-    RewriteBase /solventum-image-metadata
-    RewriteCond %{REQUEST_FILENAME} !-f
-    RewriteCond %{REQUEST_FILENAME} !-d
-    RewriteCond %{REQUEST_URI} !^/solventum-image-metadata/api/
-    RewriteRule ^ /solventum-image-metadata/index.html [L]
-</Directory>
-
-# Backend API - FastAPI
-ProxyPass /solventum-image-metadata/api/ http://localhost:5001/
-ProxyPassReverse /solventum-image-metadata/api/ http://localhost:5001/
-ProxyTimeout 600
-```
-
-**Преимущества:**
- Apache serve статику быстрее чем FastAPI
- Backend занимается только API логикой
- Лучше кеширование static assets
-
---
-
-## Что использовать?
-
-**Рекомендую Вариант 2** - разделение Frontend/Backend.
-
-Просто добавьте эти строки в существующую конфигурацию Apache.
-
-## После изменения Apache:
-
-```bash
-# Проверить конфиг
-sudo apache2ctl configtest
-
-# Reload Apache
-sudo systemctl reload apache2
-```
-
-## Также обновите .env на сервере:
-
-```bash
-sudo nano /opt/solventum-image-metadata/.env
-
-# Добавьте /api/ в REDIRECT_URI:
-REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/api/auth/microsoft/callback
-```
-
-## Проверка:
-
-```bash
-# Frontend (static files через Apache)
-curl https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-
-# Backend API (proxy через Apache)
-curl https://ai-sandbox.oliver.solutions/solventum-image-metadata/api/health
-```
--- a/docs/apache/apache-config.conf
+++ b/docs/apache/apache-config.conf
@ -1,101 +0,0 @@
-# Oliver Metadata Tool v4.0 - Apache Configuration
-# Location: /etc/apache2/sites-available/solventum-image-metadata.conf
-#
-# Enable with:
-#   sudo a2ensite solventum-image-metadata
-#   sudo a2enmod proxy proxy_http headers rewrite ssl
-#   sudo systemctl reload apache2
-
-<VirtualHost *:80>
-    ServerName ai-sandbox.oliver.solutions
-
-    # Redirect HTTP to HTTPS
-    Redirect permanent / https://ai-sandbox.oliver.solutions/
-</VirtualHost>
-
-<VirtualHost *:443>
-    ServerName ai-sandbox.oliver.solutions
-
-    # SSL Configuration
-    SSLEngine on
-    SSLCertificateFile /etc/letsencrypt/live/ai-sandbox.oliver.solutions/fullchain.pem
-    SSLCertificateKeyFile /etc/letsencrypt/live/ai-sandbox.oliver.solutions/privkey.pem
-
-    # Security headers
-    Header always set X-Frame-Options "SAMEORIGIN"
-    Header always set X-Content-Type-Options "nosniff"
-    Header always set X-XSS-Protection "1; mode=block"
-    Header always set Referrer-Policy "strict-origin-when-cross-origin"
-
-    # =========================================================================
-    # Frontend - React SPA (Static Files)
-    # =========================================================================
-
-    # Serve static files from /var/www/html/solventum-image-metadata
-    DocumentRoot /var/www/html/solventum-image-metadata
-
-    <Directory /var/www/html/solventum-image-metadata>
-        Options -Indexes +FollowSymLinks
-        AllowOverride All
-        Require all granted
-
-        # Enable React Router (SPA routing)
-        RewriteEngine On
-        RewriteBase /solventum-image-metadata
-
-        # Don't rewrite files or directories that exist
-        RewriteCond %{REQUEST_FILENAME} !-f
-        RewriteCond %{REQUEST_FILENAME} !-d
-
-        # Don't rewrite API calls
-        RewriteCond %{REQUEST_URI} !^/solventum-image-metadata/api/
-
-        # Rewrite everything else to index.html
-        RewriteRule ^ /solventum-image-metadata/index.html [L]
-    </Directory>
-
-    # Cache static assets
-    <FilesMatch "\.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$">
-        Header set Cache-Control "public, max-age=31536000"
-    </FilesMatch>
-
-    # Don't cache HTML
-    <FilesMatch "\.(html)$">
-        Header set Cache-Control "no-cache, no-store, must-revalidate"
-        Header set Pragma "no-cache"
-        Header set Expires "0"
-    </FilesMatch>
-
-    # =========================================================================
-    # Backend API - FastAPI (Reverse Proxy)
-    # =========================================================================
-
-    # Proxy API requests to FastAPI backend
-    ProxyPreserveHost On
-    ProxyTimeout 600
-
-    # API endpoints
-    <Location /solventum-image-metadata/api>
-        ProxyPass http://localhost:8000
-        ProxyPassReverse http://localhost:8000
-
-        # Headers for backend
-        RequestHeader set X-Forwarded-Proto "https"
-        RequestHeader set X-Forwarded-For "%{REMOTE_ADDR}s"
-        RequestHeader set X-Real-IP "%{REMOTE_ADDR}s"
-    </Location>
-
-    # Allow large file uploads (500MB)
-    LimitRequestBody 524288000
-
-    # =========================================================================
-    # Logs
-    # =========================================================================
-    ErrorLog ${APACHE_LOG_DIR}/solventum-image-metadata-error.log
-    CustomLog ${APACHE_LOG_DIR}/solventum-image-metadata-access.log combined
-
-    # Log level (debug for troubleshooting, warn for production)
-    LogLevel warn
-</VirtualHost>
-
-# vim: syntax=apache ts=4 sw=4 sts=4 sr noet
--- a/docs/apache/setup-apache.sh
+++ b/docs/apache/setup-apache.sh
@ -1,117 +0,0 @@
-#!/bin/bash
-#
-# Apache Setup Script for Oliver Metadata Tool
-# Run once to configure Apache for the application
-#
-# Usage: sudo ./setup-apache.sh
-
-set -e
-
-# Colors
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m'
-
-log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
-log_success() { echo -e "${GREEN}[OK]${NC} $1"; }
-log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
-
-echo ""
-echo "Oliver Metadata Tool - Apache Setup"
-echo "===================================="
-echo ""
-
-# Check if running as root
-if [[ $EUID -ne 0 ]]; then
-    echo "This script must be run as root (use sudo)"
-    exit 1
-fi
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-APACHE_CONFIG="/etc/apache2/sites-available/solventum-image-metadata.conf"
-
-# -----------------------------------------------------------------------------
-# Enable required Apache modules
-# -----------------------------------------------------------------------------
-log_info "Enabling Apache modules..."
-
-sudo a2enmod proxy 2>/dev/null || log_warn "proxy already enabled"
-sudo a2enmod proxy_http 2>/dev/null || log_warn "proxy_http already enabled"
-sudo a2enmod headers 2>/dev/null || log_warn "headers already enabled"
-sudo a2enmod rewrite 2>/dev/null || log_warn "rewrite already enabled"
-sudo a2enmod ssl 2>/dev/null || log_warn "ssl already enabled"
-
-log_success "Apache modules enabled"
-
-# -----------------------------------------------------------------------------
-# Copy Apache configuration
-# -----------------------------------------------------------------------------
-log_info "Installing Apache configuration..."
-
-if [[ -f "$APACHE_CONFIG" ]]; then
-    log_warn "Configuration already exists, creating backup..."
-    sudo cp "$APACHE_CONFIG" "${APACHE_CONFIG}.backup.$(date +%Y%m%d-%H%M%S)"
-fi
-
-sudo cp "$SCRIPT_DIR/apache-config.conf" "$APACHE_CONFIG"
-
-log_success "Configuration installed"
-
-# -----------------------------------------------------------------------------
-# Test Apache configuration
-# -----------------------------------------------------------------------------
-log_info "Testing Apache configuration..."
-
-if sudo apache2ctl configtest; then
-    log_success "Apache configuration is valid"
-else
-    echo "Apache configuration test failed!"
-    echo "Fix errors and run: sudo apache2ctl configtest"
-    exit 1
-fi
-
-# -----------------------------------------------------------------------------
-# Enable site
-# -----------------------------------------------------------------------------
-log_info "Enabling site..."
-
-sudo a2ensite solventum-image-metadata 2>/dev/null || log_warn "Site already enabled"
-
-log_success "Site enabled"
-
-# -----------------------------------------------------------------------------
-# Reload Apache
-# -----------------------------------------------------------------------------
-log_info "Reloading Apache..."
-
-sudo systemctl reload apache2 || {
-    echo "Apache reload failed, trying restart..."
-    sudo systemctl restart apache2
-}
-
-log_success "Apache reloaded"
-
-# -----------------------------------------------------------------------------
-# Summary
-# -----------------------------------------------------------------------------
-echo ""
-echo "=============================================="
-log_success "Apache setup complete!"
-echo "=============================================="
-echo ""
-
-log_info "Configuration file: $APACHE_CONFIG"
-log_info "Frontend path: /var/www/html/solventum-image-metadata"
-echo ""
-
-log_info "Next steps:"
-echo "  1. Run: sudo ./deploy.sh"
-echo "  2. Access: https://ai-sandbox.oliver.solutions/solventum-image-metadata/"
-echo ""
-
-log_info "Useful commands:"
-echo "  Check config:    sudo apache2ctl configtest"
-echo "  Reload Apache:   sudo systemctl reload apache2"
-echo "  View logs:       sudo tail -f /var/log/apache2/solventum-image-metadata-error.log"
-echo ""
--- a/frontend/.env
+++ b/frontend/.env
@ -1,20 +0,0 @@
-# Frontend Environment Configuration
-# Oliver Metadata Tool v4.0 - React/Vite
-
-# API Configuration
-# IMPORTANT: Use relative URLs for production (avoids mixed content errors with HTTPS)
-VITE_API_URL=/solventum-image-metadata/api
-# For local development without proxy:
-# VITE_API_URL=http://localhost:5001
-
-# Azure AD / MSAL Configuration
-VITE_AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
-VITE_AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
-# For production, use your actual HTTPS URL:
-VITE_AZURE_REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-# For local development:
-# VITE_AZURE_REDIRECT_URI=http://localhost:8888/solventum-image-metadata/
-
-# Application Configuration
-VITE_APP_NAME=Oliver Metadata Tool
-VITE_APP_VERSION=4.0.0
--- a/frontend/.env.example
+++ b/frontend/.env.example
@ -1,32 +0,0 @@
-# Frontend Environment Variables (Vite)
-# Copy to .env for local development, or .env.production for build
-
-# ======================
-# API Configuration
-# ======================
-# IMPORTANT: Use full path for production (Apache proxy)
-# Production:
-VITE_API_URL=/solventum-image-metadata/api
-
-# For local development:
-# VITE_API_URL=http://localhost:5001
-
-# ======================
-# Azure AD / MSAL Configuration
-# ======================
-# Production values for ai-sandbox.oliver.solutions
-VITE_AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
-VITE_AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
-
-# Redirect URI (must match Azure AD app registration)
-# Production:
-VITE_AZURE_REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-
-# Local development:
-# VITE_AZURE_REDIRECT_URI=http://localhost:8888/solventum-image-metadata/
-
-# ======================
-# Application Configuration
-# ======================
-VITE_APP_NAME=Oliver Metadata Tool
-VITE_APP_VERSION=4.0.0
--- a/frontend/.env.production
+++ b/frontend/.env.production
@ -1,13 +0,0 @@
-# Frontend Production Environment
-# API requests go through Apache proxy
-# Must include full path with /solventum-image-metadata prefix
-VITE_API_URL=/solventum-image-metadata/api
-
-# Azure AD Configuration for Production
-VITE_AZURE_CLIENT_ID=9079054c-9620-4757-a256-23413042f1ef
-VITE_AZURE_TENANT_ID=e519c2e6-bc6d-4fdf-8d9c-923c2f002385
-VITE_AZURE_REDIRECT_URI=https://ai-sandbox.oliver.solutions/solventum-image-metadata/
-
-# App Info
-VITE_APP_NAME=Oliver Metadata Tool
-VITE_APP_VERSION=4.0.0
--- a/frontend/index.html
+++ b/frontend/index.html
@ -1,13 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8" />
-    <link rel="icon" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='0.9em' font-size='90'>🎯</text></svg>" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Oliver Metadata Tool v4.0</title>
-  </head>
-  <body>
-    <div id="root"></div>
-    <script type="module" src="/src/main.tsx"></script>
-  </body>
-</html>
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
--- a/frontend/package.json
+++ b/frontend/package.json
@ -1,31 +0,0 @@
-{
-  "name": "oliver-metadata-frontend",
-  "version": "4.0.0",
-  "type": "module",
-  "scripts": {
-    "dev": "vite",
-    "build": "vite build",
-    "preview": "vite preview"
-  },
-  "dependencies": {
-    "@azure/msal-browser": "^3.30.0",
-    "@azure/msal-react": "^2.2.0",
-    "axios": "^1.6.5",
-    "react": "^18.2.0",
-    "react-dom": "^18.2.0",
-    "react-dropzone": "^14.2.3",
-    "react-hot-toast": "^2.4.1",
-    "react-router-dom": "^6.21.0",
-    "zustand": "^4.4.7"
-  },
-  "devDependencies": {
-    "@types/react": "^18.2.48",
-    "@types/react-dom": "^18.2.18",
-    "@vitejs/plugin-react": "^4.2.1",
-    "autoprefixer": "^10.4.17",
-    "postcss": "^8.4.33",
-    "tailwindcss": "^3.4.1",
-    "typescript": "^5.3.3",
-    "vite": "^5.0.11"
-  }
-}
--- a/frontend/postcss.config.js
+++ b/frontend/postcss.config.js
@ -1,6 +0,0 @@
-export default {
-  plugins: {
-    tailwindcss: {},
-    autoprefixer: {},
-  },
-}
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@ -1,41 +0,0 @@
-import { BrowserRouter, Routes, Route, Navigate } from 'react-router-dom';
-import { Toaster } from 'react-hot-toast';
-import { MsalProvider } from '@azure/msal-react';
-import { PublicClientApplication } from '@azure/msal-browser';
-import { useAuthStore } from './store/authStore';
-import LoginPage from './pages/LoginPage';
-import RegisterPage from './pages/RegisterPage';
-import DashboardPage from './pages/DashboardPage';
-import { msalConfig } from './config/msalConfig';
-
-// Initialize MSAL instance
-const msalInstance = new PublicClientApplication(msalConfig);
-
-function ProtectedRoute({ children }: { children: React.ReactNode }) {
-  const isAuthenticated = useAuthStore((state) => state.isAuthenticated);
-  return isAuthenticated ? <>{children}</> : <Navigate to="/login" replace />;
-}
-
-// MSAL handles OAuth redirect automatically - no custom handler needed
-
-function App() {
-  return (
-    <MsalProvider instance={msalInstance}>
-      <BrowserRouter basename="/solventum-image-metadata">
-        <Routes>
-          <Route path="/login" element={<LoginPage />} />
-          <Route path="/register" element={<RegisterPage />} />
-          <Route path="/" element={
-            <ProtectedRoute>
-              <DashboardPage />
-            </ProtectedRoute>
-          } />
-          <Route path="*" element={<Navigate to="/login" replace />} />
-        </Routes>
-      </BrowserRouter>
-      <Toaster position="top-right" />
-    </MsalProvider>
-  );
-}
-
-export default App;
--- a/frontend/src/components/common/Button.tsx
+++ b/frontend/src/components/common/Button.tsx
@ -1,39 +0,0 @@
-import { ReactNode } from 'react';
-
-interface ButtonProps {
-  children: ReactNode;
-  onClick?: () => void;
-  type?: 'button' | 'submit' | 'reset';
-  variant?: 'primary' | 'secondary' | 'success' | 'danger';
-  disabled?: boolean;
-  className?: string;
-}
-
-export default function Button({
-  children,
-  onClick,
-  type = 'button',
-  variant = 'primary',
-  disabled = false,
-  className = '',
-}: ButtonProps) {
-  const baseClasses = 'px-4 py-2 rounded-lg font-medium transition-colors disabled:opacity-50 disabled:cursor-not-allowed';
-
-  const variantClasses = {
-    primary: 'bg-yellow-500 text-white hover:bg-yellow-600',
-    secondary: 'bg-gray-200 text-gray-800 hover:bg-gray-300',
-    success: 'bg-green-500 text-white hover:bg-green-600',
-    danger: 'bg-red-500 text-white hover:bg-red-600',
-  };
-
-  return (
-    <button
-      type={type}
-      onClick={onClick}
-      disabled={disabled}
-      className={`${baseClasses} ${variantClasses[variant]} ${className}`}
-    >
-      {children}
-    </button>
-  );
-}
--- a/Show more
+++ b/Show more
				`@ -1 +0,0 @@`
				`"""Content extractors for different file types."""`
				`@ -1 +0,0 @@`
				`"""Metadata updaters for different file types."""`