ferrero-opentext/Python-Version/scripts/daily_report.py

341 lines
14 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Daily Report Generator
Analyzes log files from all workflows and sends comprehensive daily summary email
Run at 7pm daily via cron
Compatible with Python 3.6+
"""
import sys
import os
import re
from datetime import datetime, timedelta
import logging
# Add shared library to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from shared.config_loader import load_config
from shared.notifier import Notifier
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('DailyReport')
def parse_log_file(log_path, since_hours=24):
"""
Parse log file and extract statistics for the last N hours
Args:
log_path: Path to log file
since_hours: How many hours back to analyze (default 24)
Returns:
dict with statistics
"""
stats = {
'campaigns_found': 0,
'campaigns_processed': 0,
'campaigns_completed': 0,
'campaigns_partial': 0,
'campaigns_no_assets': 0,
'total_assets': 0,
'assets_successful': 0,
'assets_failed': 0,
'assets_skipped': 0,
'not_approved_count': 0,
'errors': [],
'campaign_details': []
}
if not os.path.exists(log_path):
logger.warning("Log file not found: {}".format(log_path))
return stats
# Calculate cutoff time
cutoff_time = datetime.now() - timedelta(hours=since_hours)
current_campaign = None
in_summary_block = False # Track when we're inside a summary block
try:
with open(log_path, 'r') as f:
for line in f:
# Parse timestamp
match = re.match(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})', line)
if match:
log_time = datetime.strptime(match.group(1), '%Y-%m-%d %H:%M:%S')
# Skip old lines UNLESS we're in a summary block (need to parse complete summary)
if log_time < cutoff_time and not in_summary_block:
continue
# Campaign found
if 'Found' in line and 'campaigns' in line and 'status' in line:
match = re.search(r'Found (\d+) campaigns with status', line)
if match:
stats['campaigns_found'] += int(match.group(1))
# Processing campaign
if 'Processing campaign:' in line:
match = re.search(r'Processing campaign: ([^(]+)\(([^)]+)\)', line)
if match:
current_campaign = {
'name': match.group(1).strip(),
'number': match.group(2).strip(),
'total_assets': 0,
'successful': 0,
'failed': 0,
'skipped': 0,
'status': 'unknown'
}
stats['campaigns_processed'] += 1
# Total assets found
if current_campaign:
if 'Found' in line and 'master assets' in line:
match = re.search(r'Found (\d+) master assets', line)
if match:
current_campaign['total_assets'] = int(match.group(1))
stats['total_assets'] += int(match.group(1))
# NOT APPROVED count (A5→A6 only)
if 'NOT APPROVED (rejected) assets:' in line:
match = re.search(r'NOT APPROVED \(rejected\) assets: (\d+)', line)
if match:
count = int(match.group(1))
stats['not_approved_count'] += count
current_campaign['not_approved'] = count
# Skipped count (A5→A6 only)
if 'Approved/other status (skipped):' in line:
match = re.search(r'Approved/other status \(skipped\): (\d+)', line)
if match:
skipped = int(match.group(1))
stats['assets_skipped'] += skipped
current_campaign['skipped'] = skipped
# Success count
if 'Successfully processed:' in line or 'Successful:' in line:
match = re.search(r'(Successfully processed|Successful): (\d+)', line)
if match:
count = int(match.group(2))
current_campaign['successful'] = count
stats['assets_successful'] += count
# Failed count
if 'Failed:' in line and 'assets' in line:
match = re.search(r'Failed: (\d+)', line)
if match:
count = int(match.group(1))
current_campaign['failed'] = count
stats['assets_failed'] += count
# Status updated
if 'Status updated successfully' in line or 'Campaign completed successfully' in line:
current_campaign['status'] = 'completed'
stats['campaigns_completed'] += 1
# Partial completion
if 'Campaign incomplete' in line or 'Status NOT updated' in line:
if current_campaign['status'] != 'completed':
current_campaign['status'] = 'partial'
stats['campaigns_partial'] += 1
# No assets found
if 'No master assets found' in line or 'No NOT APPROVED assets found' in line:
current_campaign['status'] = 'no_assets'
stats['campaigns_no_assets'] += 1
# Detect start of summary blocks
if 'Processing Summary' in line:
in_summary_block = True
summary_started = True # Flag to skip the first separator
# Extract timestamp from current line for batch identification
timestamp_match = re.match(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})', line)
batch_time = timestamp_match.group(1) if timestamp_match else 'Unknown Time'
# Initialize batch tracking for this summary
current_batch = {
'name': 'Upload Batch',
'number': batch_time, # Use timestamp as identifier
'total_assets': 0,
'successful': 0,
'failed': 0,
'status': 'completed'
}
elif in_summary_block and '====' in line:
# First separator after header - skip it
if 'summary_started' in locals() and summary_started:
summary_started = False
else:
# Second separator - end of block
in_summary_block = False
# Add batch to campaign details if it has data
if current_batch and current_batch['total_assets'] > 0:
stats['campaign_details'].append(current_batch)
stats['campaigns_processed'] += 1 # Count this batch as a processed campaign
current_batch = None
# Parse summary block contents
if in_summary_block:
# Parse file/asset counts from summary blocks (handle leading whitespace)
if 'Total files processed:' in line:
match = re.search(r'Total files processed:\s*(\d+)', line)
if match:
count = int(match.group(1))
stats['total_assets'] += count
if current_batch:
current_batch['total_assets'] = count
if 'Total campaigns processed:' in line:
match = re.search(r'Total campaigns processed:\s*(\d+)', line)
if match:
count = int(match.group(1))
stats['campaigns_processed'] += count
# Success/failure counts in summary blocks
if 'Successful:' in line and 'Successfully processed' not in line:
match = re.search(r'Successful:\s*(\d+)', line)
if match:
count = int(match.group(1))
stats['assets_successful'] += count
if current_batch:
current_batch['successful'] = count
if 'Failed:' in line and 'Failed to' not in line:
match = re.search(r'Failed:\s*(\d+)', line)
if match:
count = int(match.group(1))
stats['assets_failed'] += count
if current_batch:
current_batch['failed'] = count
# Errors
if ' - ERROR - ' in line or ' - CRITICAL - ' in line:
error_msg = line.split(' - ')[-1].strip()
if error_msg not in stats['errors']:
stats['errors'].append(error_msg)
# Add last campaign if exists
if current_campaign and current_campaign not in stats['campaign_details']:
stats['campaign_details'].append(current_campaign)
except Exception as e:
logger.error("Error parsing log {}: {}".format(log_path, str(e)))
return stats
def generate_daily_report():
"""Generate daily report for all workflows"""
logger.info("=" * 60)
logger.info("Generating Daily Report")
logger.info("=" * 60)
# Load configuration
config = load_config('config/config.yaml')
notifier = Notifier(config)
# Log files to analyze
log_files = {
'A1→A2 (Master Assets)': 'logs/a1_to_a2.log',
'A1→A2 (Box Upload)': 'logs/a1_to_a2_box.log',
'A2→A3 (Upload from Box)': 'logs/a2_to_a3.log',
'A4 (Box Upload)': 'logs/a4_box.log',
'A4 (Webhook Monitor)': 'logs/a4_webhook.log',
'A5→A6 (Rework/Rejections)': 'logs/a5_to_a6.log',
'B1→B2 (Global Masters)': 'logs/b1_to_b2.log'
}
# Collect stats for each workflow
workflow_stats = {}
total_stats = {
'campaigns_found': 0,
'campaigns_processed': 0,
'campaigns_completed': 0,
'campaigns_partial': 0,
'campaigns_no_assets': 0,
'total_assets': 0,
'assets_successful': 0,
'assets_failed': 0,
'assets_skipped': 0,
'not_approved_count': 0,
'total_errors': 0
}
for workflow_name, log_path in log_files.items():
logger.info("Analyzing: {}".format(workflow_name))
stats = parse_log_file(log_path, since_hours=24)
workflow_stats[workflow_name] = stats
# Aggregate totals
total_stats['campaigns_found'] += stats['campaigns_found']
total_stats['campaigns_processed'] += stats['campaigns_processed']
total_stats['campaigns_completed'] += stats['campaigns_completed']
total_stats['campaigns_partial'] += stats['campaigns_partial']
total_stats['campaigns_no_assets'] += stats['campaigns_no_assets']
total_stats['total_assets'] += stats['total_assets']
total_stats['assets_successful'] += stats['assets_successful']
total_stats['assets_failed'] += stats['assets_failed']
total_stats['assets_skipped'] += stats['assets_skipped']
total_stats['not_approved_count'] += stats['not_approved_count']
total_stats['total_errors'] += len(stats['errors'])
# Calculate success rate
if total_stats['total_assets'] > 0:
success_rate = (total_stats['assets_successful'] / total_stats['total_assets']) * 100
else:
success_rate = 0
total_stats['success_rate'] = success_rate
# Generate report timestamp
report_date = datetime.now().strftime('%Y-%m-%d')
report_time = datetime.now().strftime('%I:%M %p')
# Send email report
logger.info("")
logger.info("Sending daily report email...")
notifier.send_email(
template_name='daily_report',
recipients=config['notifications']['recipients']['success'],
data={
'report_date': report_date,
'report_time': report_time,
'total_stats': total_stats,
'workflow_stats': workflow_stats
}
)
logger.info("✓ Daily report sent successfully")
logger.info("")
logger.info("=" * 60)
logger.info("DAILY REPORT SUMMARY")
logger.info("=" * 60)
logger.info("Campaigns Found: {}".format(total_stats['campaigns_found']))
logger.info("Campaigns Processed: {}".format(total_stats['campaigns_processed']))
logger.info(" - Completed: {}".format(total_stats['campaigns_completed']))
logger.info(" - Partial: {}".format(total_stats['campaigns_partial']))
logger.info(" - No Assets: {}".format(total_stats['campaigns_no_assets']))
logger.info("Total Assets: {}".format(total_stats['total_assets']))
logger.info(" - Successful: {}".format(total_stats['assets_successful']))
logger.info(" - Failed: {}".format(total_stats['assets_failed']))
logger.info(" - Skipped: {}".format(total_stats['assets_skipped']))
if total_stats['not_approved_count'] > 0:
logger.info(" - NOT APPROVED: {}".format(total_stats['not_approved_count']))
logger.info("Success Rate: {:.1f}%".format(success_rate))
logger.info("Total Errors: {}".format(total_stats['total_errors']))
logger.info("=" * 60)
if __name__ == '__main__':
try:
generate_daily_report()
sys.exit(0)
except Exception as e:
logger.error("Daily report failed: {}".format(str(e)))
import traceback
traceback.print_exc()
sys.exit(1)