#!/usr/bin/env python3 """ Migration script to move existing output files to client-specific folders based on their profile information. """ import os import json import re import shutil from datetime import datetime import time def get_client_from_profile(profile_id): """Determine client from profile ID""" if not profile_id: return 'general' profile_lower = str(profile_id).lower() if 'loreal' in profile_lower or 'static' in profile_lower: return 'loreal' elif 'diageo' in profile_lower: return 'diageo' elif 'unilever' in profile_lower: return 'unilever' else: return 'general' def extract_profile_from_html(html_path): """Extract profile information from HTML report""" try: with open(html_path, 'r', encoding='utf-8') as f: content = f.read() # Look for the profile_id in the JSON data at the end of the file json_match = re.search(r'var reportData = ({.*?});', content, re.DOTALL) if json_match: json_str = json_match.group(1) data = json.loads(json_str) return data.get('profile_id') or data.get('profiles', [None])[0] # Alternative: look for profile name in the HTML profile_match = re.search(r'Profile: ([^<]+)', content) if profile_match: return profile_match.group(1).strip().lower().replace(' ', '_') except Exception as e: print(f"Error extracting profile from {html_path}: {e}") return None def migrate_files(output_folder='output-dev', max_age_days=14, dry_run=False): """Migrate existing files to client-specific folders""" if not os.path.exists(output_folder): print(f"Output folder {output_folder} does not exist") return cutoff_time = time.time() - (max_age_days * 24 * 60 * 60) migrated_count = 0 deleted_count = 0 error_count = 0 print(f"{'DRY RUN - ' if dry_run else ''}Migrating files from {output_folder}") print(f"Files older than {max_age_days} days will be deleted") print("=" * 60) # Get all files in root output folder for filename in os.listdir(output_folder): file_path = os.path.join(output_folder, filename) # Skip directories if os.path.isdir(file_path): continue # Skip non-report files if not filename.endswith(('.html', '.json')): continue try: file_age = os.path.getctime(file_path) age_days = (time.time() - file_age) / (24 * 60 * 60) # Delete files older than max_age_days if file_age < cutoff_time: print(f"❌ DELETE: {filename} (age: {age_days:.1f} days)") if not dry_run: os.remove(file_path) deleted_count += 1 continue # Migrate files newer than max_age_days if filename.endswith('.html'): profile_id = extract_profile_from_html(file_path) client = get_client_from_profile(profile_id) print(f"📁 MIGRATE: {filename}") print(f" Profile: {profile_id} → Client: {client}") # Create client folder client_folder = os.path.join(output_folder, client) if not dry_run: os.makedirs(client_folder, exist_ok=True) # Move file dest_path = os.path.join(client_folder, filename) if not dry_run: shutil.move(file_path, dest_path) print(f" Moved to: {client}/{filename}") migrated_count += 1 elif filename.endswith('.json'): # Try to read JSON to get profile info try: with open(file_path, 'r') as f: data = json.load(f) profile_id = data.get('profile_id') or data.get('profiles', [None])[0] client = get_client_from_profile(profile_id) print(f"📁 MIGRATE: {filename}") print(f" Profile: {profile_id} → Client: {client}") client_folder = os.path.join(output_folder, client) if not dry_run: os.makedirs(client_folder, exist_ok=True) dest_path = os.path.join(client_folder, filename) if not dry_run: shutil.move(file_path, dest_path) print(f" Moved to: {client}/{filename}") migrated_count += 1 except: # If can't read JSON, move to general client = 'general' print(f"📁 MIGRATE: {filename} → {client} (default)") client_folder = os.path.join(output_folder, client) if not dry_run: os.makedirs(client_folder, exist_ok=True) dest_path = os.path.join(client_folder, filename) if not dry_run: shutil.move(file_path, dest_path) migrated_count += 1 except Exception as e: print(f"⚠️ ERROR processing {filename}: {e}") error_count += 1 print("=" * 60) print(f"{'DRY RUN ' if dry_run else ''}Summary:") print(f" Migrated: {migrated_count} files") print(f" Deleted: {deleted_count} files") print(f" Errors: {error_count} files") if dry_run: print("\nThis was a DRY RUN. No files were actually moved or deleted.") print("Run with --execute to perform the migration.") if __name__ == '__main__': import sys # Parse arguments dry_run = '--execute' not in sys.argv output_folder = 'output-dev' if '--production' in sys.argv: output_folder = 'output' migrate_files(output_folder=output_folder, max_age_days=14, dry_run=dry_run)