ai_qc/backend/migrate_output_files.py
nickviljoen 477780df09 Add client-specific output folders and 14-day auto-cleanup
## New Features

### Client-Specific Output Folders
- Files now saved in client-specific subdirectories (loreal/, diageo/, unilever/, general/)
- Automatic client detection from profile ID
- Better organization for multi-client environment
- Each client only sees their own QC reports

### Automatic File Cleanup
- Auto-delete reports older than 14 days on every file listing request
- Keeps output folder clean and manageable
- Configurable cleanup age (default: 14 days)

### File Filtering by Client
- API endpoint `/api/output_files` now accepts `?client=<client_id>` parameter
- Frontend automatically filters files by selected client
- No more cluttered file lists for clients

### Migration Script
- `migrate_output_files.py` - Move existing files to client folders
- Dry-run mode by default (use --execute to run)
- Deletes files older than 14 days during migration
- Supports both development (--dev) and production (--production)

## API Changes

### Modified Endpoints
- `GET /api/output_files?client=<client_id>` - List files filtered by client
- `GET /output/<client>/<filename>` - Serve files from client folders
- `GET /output/<filename>` - Legacy route for backward compatibility

### New Functions
- `get_client_from_profile(profile_id)` - Detect client from profile
- `ensure_client_output_folder(client)` - Create client folders
- `cleanup_old_files(max_age_days)` - Delete old files

## File Structure
```
output-dev/
├── loreal/
│   └── 20260202_102514_Missing_text_report.html
├── diageo/
│   └── 20260202_103423_Product_shot_report.html
├── unilever/
│   └── 20260202_104512_Key_visual_report.html
└── general/
    └── 20260202_105634_Other_report.html
```

## Frontend Changes
- `loadSavedFiles()` now includes client parameter in API calls
- Automatically filters saved files by selected client
- Clean UI showing only relevant reports

## Usage

### Migration (Development)
```bash
# Dry-run (no changes)
python3 migrate_output_files.py

# Execute migration
python3 migrate_output_files.py --execute
```

### Migration (Production)
```bash
# Dry-run for production folder
python3 migrate_output_files.py --production

# Execute migration
python3 migrate_output_files.py --production --execute
```

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-02 11:18:05 +02:00

172 lines
5.9 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Migration script to move existing output files to client-specific folders
based on their profile information.
"""
import os
import json
import re
import shutil
from datetime import datetime
import time
def get_client_from_profile(profile_id):
"""Determine client from profile ID"""
if not profile_id:
return 'general'
profile_lower = str(profile_id).lower()
if 'loreal' in profile_lower or 'static' in profile_lower:
return 'loreal'
elif 'diageo' in profile_lower:
return 'diageo'
elif 'unilever' in profile_lower:
return 'unilever'
else:
return 'general'
def extract_profile_from_html(html_path):
"""Extract profile information from HTML report"""
try:
with open(html_path, 'r', encoding='utf-8') as f:
content = f.read()
# Look for the profile_id in the JSON data at the end of the file
json_match = re.search(r'var reportData = ({.*?});', content, re.DOTALL)
if json_match:
json_str = json_match.group(1)
data = json.loads(json_str)
return data.get('profile_id') or data.get('profiles', [None])[0]
# Alternative: look for profile name in the HTML
profile_match = re.search(r'Profile: ([^<]+)', content)
if profile_match:
return profile_match.group(1).strip().lower().replace(' ', '_')
except Exception as e:
print(f"Error extracting profile from {html_path}: {e}")
return None
def migrate_files(output_folder='output-dev', max_age_days=14, dry_run=False):
"""Migrate existing files to client-specific folders"""
if not os.path.exists(output_folder):
print(f"Output folder {output_folder} does not exist")
return
cutoff_time = time.time() - (max_age_days * 24 * 60 * 60)
migrated_count = 0
deleted_count = 0
error_count = 0
print(f"{'DRY RUN - ' if dry_run else ''}Migrating files from {output_folder}")
print(f"Files older than {max_age_days} days will be deleted")
print("=" * 60)
# Get all files in root output folder
for filename in os.listdir(output_folder):
file_path = os.path.join(output_folder, filename)
# Skip directories
if os.path.isdir(file_path):
continue
# Skip non-report files
if not filename.endswith(('.html', '.json')):
continue
try:
file_age = os.path.getctime(file_path)
age_days = (time.time() - file_age) / (24 * 60 * 60)
# Delete files older than max_age_days
if file_age < cutoff_time:
print(f"❌ DELETE: {filename} (age: {age_days:.1f} days)")
if not dry_run:
os.remove(file_path)
deleted_count += 1
continue
# Migrate files newer than max_age_days
if filename.endswith('.html'):
profile_id = extract_profile_from_html(file_path)
client = get_client_from_profile(profile_id)
print(f"📁 MIGRATE: {filename}")
print(f" Profile: {profile_id} → Client: {client}")
# Create client folder
client_folder = os.path.join(output_folder, client)
if not dry_run:
os.makedirs(client_folder, exist_ok=True)
# Move file
dest_path = os.path.join(client_folder, filename)
if not dry_run:
shutil.move(file_path, dest_path)
print(f" Moved to: {client}/{filename}")
migrated_count += 1
elif filename.endswith('.json'):
# Try to read JSON to get profile info
try:
with open(file_path, 'r') as f:
data = json.load(f)
profile_id = data.get('profile_id') or data.get('profiles', [None])[0]
client = get_client_from_profile(profile_id)
print(f"📁 MIGRATE: {filename}")
print(f" Profile: {profile_id} → Client: {client}")
client_folder = os.path.join(output_folder, client)
if not dry_run:
os.makedirs(client_folder, exist_ok=True)
dest_path = os.path.join(client_folder, filename)
if not dry_run:
shutil.move(file_path, dest_path)
print(f" Moved to: {client}/{filename}")
migrated_count += 1
except:
# If can't read JSON, move to general
client = 'general'
print(f"📁 MIGRATE: {filename}{client} (default)")
client_folder = os.path.join(output_folder, client)
if not dry_run:
os.makedirs(client_folder, exist_ok=True)
dest_path = os.path.join(client_folder, filename)
if not dry_run:
shutil.move(file_path, dest_path)
migrated_count += 1
except Exception as e:
print(f"⚠️ ERROR processing {filename}: {e}")
error_count += 1
print("=" * 60)
print(f"{'DRY RUN ' if dry_run else ''}Summary:")
print(f" Migrated: {migrated_count} files")
print(f" Deleted: {deleted_count} files")
print(f" Errors: {error_count} files")
if dry_run:
print("\nThis was a DRY RUN. No files were actually moved or deleted.")
print("Run with --execute to perform the migration.")
if __name__ == '__main__':
import sys
# Parse arguments
dry_run = '--execute' not in sys.argv
output_folder = 'output-dev'
if '--production' in sys.argv:
output_folder = 'output'
migrate_files(output_folder=output_folder, max_age_days=14, dry_run=dry_run)