- Copy utils.py from src/ (required by extractors) - Fixes ModuleNotFoundError: No module named 'app.processors.utils' Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
175 lines
4.4 KiB
Python
175 lines
4.4 KiB
Python
"""Utility functions for backup, logging, and file operations."""
|
|
|
|
import shutil
|
|
import logging
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Optional
|
|
from .config import Config
|
|
|
|
# Setup logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def create_backup(file_path: str) -> Optional[Path]:
|
|
"""
|
|
Create a backup of the file before modification.
|
|
|
|
Args:
|
|
file_path: Path to the file to backup
|
|
|
|
Returns:
|
|
Path to the backup file, or None if backup failed
|
|
"""
|
|
try:
|
|
source = Path(file_path)
|
|
if not source.exists():
|
|
logger.error(f"File not found for backup: {file_path}")
|
|
return None
|
|
|
|
# Create backup filename with timestamp
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
backup_filename = f"{source.stem}_{timestamp}{source.suffix}"
|
|
backup_path = Config.BACKUP_DIR / backup_filename
|
|
|
|
# Ensure backup directory exists
|
|
Config.BACKUP_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Copy file
|
|
shutil.copy2(source, backup_path)
|
|
logger.info(f"Backup created: {backup_path}")
|
|
|
|
return backup_path
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to create backup for {file_path}: {e}")
|
|
return None
|
|
|
|
def get_logger(name: str) -> logging.Logger:
|
|
"""
|
|
Get a logger instance.
|
|
|
|
Args:
|
|
name: Logger name
|
|
|
|
Returns:
|
|
Logger instance
|
|
"""
|
|
return logging.getLogger(name)
|
|
|
|
def format_metadata_comparison(old_metadata: dict, new_metadata: dict) -> str:
|
|
"""
|
|
Format metadata comparison for display.
|
|
|
|
Args:
|
|
old_metadata: Old metadata dictionary
|
|
new_metadata: New metadata dictionary
|
|
|
|
Returns:
|
|
Formatted comparison string
|
|
"""
|
|
lines = ["\n" + "="*60]
|
|
lines.append("METADATA COMPARISON")
|
|
lines.append("="*60)
|
|
|
|
all_keys = set(old_metadata.keys()) | set(new_metadata.keys())
|
|
|
|
for key in sorted(all_keys):
|
|
old_value = old_metadata.get(key, "N/A")
|
|
new_value = new_metadata.get(key, "N/A")
|
|
|
|
lines.append(f"\n{key.upper()}:")
|
|
lines.append(f" Old: {old_value}")
|
|
lines.append(f" New: {new_value}")
|
|
|
|
if old_value != new_value:
|
|
lines.append(" [CHANGED]")
|
|
|
|
lines.append("="*60 + "\n")
|
|
return "\n".join(lines)
|
|
|
|
def sanitize_metadata_value(value: str, max_length: int = 500) -> str:
|
|
"""
|
|
Sanitize and truncate metadata value.
|
|
|
|
Args:
|
|
value: Metadata value
|
|
max_length: Maximum length
|
|
|
|
Returns:
|
|
Sanitized value
|
|
"""
|
|
if not value:
|
|
return ""
|
|
|
|
# Remove control characters and excessive whitespace
|
|
value = ' '.join(value.split())
|
|
|
|
# Truncate if too long
|
|
if len(value) > max_length:
|
|
value = value[:max_length-3] + "..."
|
|
|
|
return value.strip()
|
|
|
|
def validate_file_path(file_path: str) -> bool:
|
|
"""
|
|
Validate file path exists and is accessible.
|
|
|
|
Args:
|
|
file_path: Path to validate
|
|
|
|
Returns:
|
|
True if valid
|
|
"""
|
|
try:
|
|
path = Path(file_path)
|
|
return path.exists() and path.is_file()
|
|
except Exception:
|
|
return False
|
|
|
|
def get_file_size_mb(file_path: str) -> float:
|
|
"""
|
|
Get file size in MB.
|
|
|
|
Args:
|
|
file_path: Path to file
|
|
|
|
Returns:
|
|
File size in MB
|
|
"""
|
|
try:
|
|
size_bytes = Path(file_path).stat().st_size
|
|
return size_bytes / (1024 * 1024)
|
|
except Exception:
|
|
return 0.0
|
|
|
|
def create_report_entry(file_path: str, file_type: str, old_metadata: dict,
|
|
new_metadata: dict, status: str) -> dict:
|
|
"""
|
|
Create a report entry for CSV export.
|
|
|
|
Args:
|
|
file_path: Path to file
|
|
file_type: Type of file
|
|
old_metadata: Old metadata
|
|
new_metadata: New metadata
|
|
status: Processing status (success/failed)
|
|
|
|
Returns:
|
|
Dictionary with report data
|
|
"""
|
|
return {
|
|
'timestamp': datetime.now().isoformat(),
|
|
'file_path': file_path,
|
|
'file_type': file_type,
|
|
'old_title': old_metadata.get('title', 'N/A'),
|
|
'new_title': new_metadata.get('title', 'N/A'),
|
|
'old_subject': old_metadata.get('subject', 'N/A'),
|
|
'new_subject': new_metadata.get('subject', 'N/A'),
|
|
'old_keywords': old_metadata.get('keywords', 'N/A'),
|
|
'new_keywords': new_metadata.get('keywords', 'N/A'),
|
|
'status': status
|
|
}
|