baic_dashboard/email_analysis.py

138 lines
No EOL
4.5 KiB
Python

#!/usr/bin/env python3
"""
Email Analysis Script for BAIC Dashboard
Pulls data from Make.com endpoint and analyzes unique email addresses for user filtering logic
"""
import requests
import json
import re
import os
from collections import Counter
MAKE_WEBHOOK_URL = os.environ.get('MAKE_WEBHOOK_URL', 'https://hook.eu1.make.celonis.com/h8gjldwnp4u5cvc0io474zq4u7vwb9zw')
def fetch_data(data_type):
"""Fetch data from Make.com endpoint"""
try:
params = {'type': data_type}
response = requests.get(MAKE_WEBHOOK_URL, params=params, timeout=30)
response.raise_for_status()
data = response.json()
return [item.get('data', {}) for item in data]
except Exception as e:
print(f'Error fetching {data_type}: {e}')
return []
def extract_display_name(email):
"""Extract display name from email address"""
if not email or '@' not in email:
return email
# Handle oliver.agency emails (firstname+lastname format)
if email.endswith('@oliver.agency'):
username = email.split('@')[0]
# Split camelCase or handle names like 'michaelclervi'
parts = re.findall(r'[A-Z][a-z]*|[a-z]+', username)
if len(parts) >= 2:
return f"{parts[0].title()} {parts[1].title()}"
else:
return username.title()
# Handle Barclays external emails (firstname.lastname_domain format)
elif email.endswith('@olivermarketing.onmicrosoft.com'):
username = email.split('@')[0]
# Remove the domain suffix like '_barclays.com#ext#'
clean_username = re.sub(r'_[^_]+\.(com|co\.uk)#ext#$', '', username)
parts = clean_username.split('.')
if len(parts) >= 2:
first_name = parts[0].replace('_', ' ').title()
last_name = parts[1].replace('_', ' ').title()
return f"{first_name} {last_name}"
else:
return clean_username.replace('_', ' ').title()
return email
def analyze_emails():
"""Main analysis function"""
print("Fetching data from Make.com endpoint...")
print("=" * 50)
# Fetch data
conversations = fetch_data('conversations')
messages = fetch_data('messages')
print(f"Found {len(conversations)} conversations")
print(f"Found {len(messages)} messages")
print()
# Extract unique emails
emails = set()
for conv in conversations:
user_id = conv.get('User_ID', '')
if user_id and '@' in user_id:
emails.add(user_id.lower())
for msg in messages:
user_id = msg.get('User_ID', '')
if user_id and '@' in user_id:
emails.add(user_id.lower())
print(f"UNIQUE EMAIL ADDRESSES ({len(emails)} total)")
print("=" * 50)
# Create mapping for filtering logic
email_to_display = {}
domain_groups = {'oliver.agency': [], 'barclays': []}
for email in sorted(emails):
display_name = extract_display_name(email)
email_to_display[email] = display_name
if email.endswith('@oliver.agency'):
domain_groups['oliver.agency'].append((email, display_name))
elif email.endswith('@olivermarketing.onmicrosoft.com'):
domain_groups['barclays'].append((email, display_name))
# Print organized results
print("OLIVER AGENCY USERS:")
print("-" * 30)
for email, name in domain_groups['oliver.agency']:
print(f"{name:<25} | {email}")
print(f"\nBARCLAYS EXTERNAL USERS:")
print("-" * 30)
for email, name in domain_groups['barclays']:
print(f"{name:<25} | {email}")
# Generate JavaScript mapping for frontend
print(f"\nJAVASCRIPT USER MAPPING:")
print("=" * 50)
print("// Add this to your frontend user filtering logic")
print("const userEmailToDisplayName = {")
for email, name in sorted(email_to_display.items()):
print(f' "{email}": "{name}",')
print("};")
# Domain analysis
domains = Counter()
for email in emails:
if '@' in email:
domain = email.split('@')[1]
domains[domain] += 1
print(f"\nDOMAIN BREAKDOWN:")
print("=" * 50)
for domain, count in domains.most_common():
print(f"{domain:<40} | {count:>3} users")
print(f"\nSUMMARY:")
print("=" * 50)
print(f"Total unique users: {len(emails)}")
print(f"Oliver Agency staff: {len(domain_groups['oliver.agency'])}")
print(f"Barclays external users: {len(domain_groups['barclays'])}")
if __name__ == "__main__":
analyze_emails()