Major Features: - Complete Ferrero ↔ CreativeX mapping system with 93 brands - Automated Box.com folder monitoring service - Email notifications with score breakdowns - Database integration for result storage Mapping System (v2.0.0): - mappings.json: 93 brand mappings, 44+ channel mappings - core/mapping_resolver.py: Translates Ferrero codes to CreativeX format - scripts/validate_mappings.py: Validation tool for brand/channel support - scripts/generate_brand_mappings.py: Auto-mapping tool - scripts/download_reports.py: Scorecard PDF download tool - Updated scripts/upload.py: Integrated mapping validation - Updated scripts/check_status.py: Added detailed score display with guidelines Documentation: - Updated README.md: Complete user guide with mapping system - Updated STATUS.md: Production-ready status with test results - MAPPINGS_GUIDE.md: Complete mapping documentation - MAPPING_IMPLEMENTATION.md: Implementation summary - BRAND_MAPPINGS_REVIEW.md: Brand mapping validation guide - PRODUCTION_BRANDS_SUMMARY.md: Production brand catalog - PRODUCTION_MAPPING_COMPLETE.md: Mapping completion summary Automation Service (New): - creativex-automation/: Complete automated Box monitoring service - Monitors Box Ferrero-In folder (363284027140) for new files - Automatically uploads to CreativeX - Polls for completion (30 min intervals) - Extracts scores and stores in PostgreSQL creativex_scores table - Sends formatted emails to file uploader + daveporter@oliver.agency - Moves processed files to Processed subfolder Service Components: - automation/box_monitor.py: Box folder monitoring with uploader detection - automation/upload_processor.py: CreativeX upload integration - automation/status_poller.py: CreativeX status polling - automation/result_handler.py: Score extraction and email sending - automation/orchestrator.py: Service coordination - automation/processing_queue.py: JSON-based processing queue - service.py: Main service entry point - config.py: Service configuration loader - requirements.txt: All dependencies - deployment/systemd/: Systemd service unit file - Updated shared/notifier.py: Added creativex_upload_complete and creativex_upload_failed templates Testing: - Supports --dry-run mode for configuration testing - Supports --scan-once mode for Box folder testing - Manual run mode for development/testing - Comprehensive logging with rotation (10MB, 28 backups) Database Integration: - Uses existing creativex_scores table (no migrations needed) - Compatible with existing Ferrero-Opentext workflows - Stores full CreativeX API responses in JSONB Email Templates: - Matches Ferrero-Opentext styling (#9c27b0 purple for CreativeX) - Includes score, tier, guidelines breakdown, scorecard URL - Recipients: Box uploader + CC to daveporter@oliver.agency Deployment: - Runs locally for dev/testing - Systemd service for production - Auto-restart on failure - Complete documentation in creativex-automation/README.md Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
261 lines
8.3 KiB
Python
Executable file
261 lines
8.3 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Generate brand mappings by matching Ferrero codes to Creative X brands
|
|
|
|
Usage:
|
|
python generate_brand_mappings.py
|
|
python generate_brand_mappings.py --output mappings_generated.json
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
import json
|
|
from pathlib import Path
|
|
from difflib import SequenceMatcher
|
|
|
|
# Add parent directory to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from config import load_config
|
|
from core.data_loader import DataLoader
|
|
from core.api_client import CreativeXAPIClient
|
|
|
|
|
|
def similarity(a: str, b: str) -> float:
|
|
"""Calculate similarity between two strings"""
|
|
return SequenceMatcher(None, a.lower(), b.lower()).ratio()
|
|
|
|
|
|
def normalize_name(name: str) -> str:
|
|
"""Normalize brand name for comparison"""
|
|
# Remove common differences
|
|
return (name.lower()
|
|
.replace('&', 'and')
|
|
.replace('-', ' ')
|
|
.replace('_', ' ')
|
|
.replace(' ', ' ')
|
|
.strip())
|
|
|
|
|
|
def find_best_match(ferrero_name: str, creativex_brands: list) -> tuple:
|
|
"""
|
|
Find best matching Creative X brand for Ferrero brand
|
|
|
|
Args:
|
|
ferrero_name: Ferrero brand name
|
|
creativex_brands: List of Creative X brands
|
|
|
|
Returns:
|
|
tuple: (best_match_brand, confidence_score, alternatives)
|
|
"""
|
|
ferrero_normalized = normalize_name(ferrero_name)
|
|
|
|
matches = []
|
|
for brand in creativex_brands:
|
|
creativex_name = brand['name']
|
|
creativex_normalized = normalize_name(creativex_name)
|
|
|
|
# Calculate similarity
|
|
score = similarity(ferrero_normalized, creativex_normalized)
|
|
|
|
# Check for exact word matches
|
|
ferrero_words = set(ferrero_normalized.split())
|
|
creativex_words = set(creativex_normalized.split())
|
|
word_overlap = len(ferrero_words & creativex_words) / max(len(ferrero_words), 1)
|
|
|
|
# Combine scores
|
|
combined_score = (score * 0.6) + (word_overlap * 0.4)
|
|
|
|
matches.append({
|
|
'brand': brand,
|
|
'score': combined_score,
|
|
'similarity': score,
|
|
'word_overlap': word_overlap
|
|
})
|
|
|
|
# Sort by combined score
|
|
matches.sort(key=lambda x: x['score'], reverse=True)
|
|
|
|
best_match = matches[0] if matches else None
|
|
alternatives = matches[1:4] if len(matches) > 1 else []
|
|
|
|
return best_match, alternatives
|
|
|
|
|
|
def generate_mappings(data_loader: DataLoader, api_client: CreativeXAPIClient):
|
|
"""
|
|
Generate brand mappings by matching Ferrero to Creative X
|
|
|
|
Args:
|
|
data_loader: Ferrero data loader
|
|
api_client: Creative X API client
|
|
|
|
Returns:
|
|
dict: Generated mappings with confidence scores
|
|
"""
|
|
print("=" * 70)
|
|
print("BRAND MAPPING GENERATOR")
|
|
print("=" * 70)
|
|
|
|
# Get Ferrero brands
|
|
ferrero_brands = data_loader.get_all_brands()
|
|
print(f"\nFerrero brands: {len(ferrero_brands)}")
|
|
|
|
# Get Creative X brands
|
|
print("\nFetching Creative X brands...")
|
|
response = api_client._make_request('GET', '/dimensions')
|
|
creativex_brands = response.get('brands', [])
|
|
print(f"Creative X brands: {len(creativex_brands)}")
|
|
|
|
# Generate mappings
|
|
print("\n" + "=" * 70)
|
|
print("MATCHING BRANDS")
|
|
print("=" * 70)
|
|
|
|
generated_mappings = {}
|
|
high_confidence = []
|
|
medium_confidence = []
|
|
low_confidence = []
|
|
no_match = []
|
|
|
|
for code, ferrero_name in ferrero_brands.items():
|
|
best_match, alternatives = find_best_match(ferrero_name, creativex_brands)
|
|
|
|
if not best_match:
|
|
no_match.append((code, ferrero_name))
|
|
continue
|
|
|
|
confidence = best_match['score']
|
|
matched_brand = best_match['brand']
|
|
|
|
mapping = {
|
|
'creativex_name': matched_brand['name'],
|
|
'creativex_id': matched_brand['id'],
|
|
'ferrero_name': ferrero_name,
|
|
'confidence': round(confidence, 3),
|
|
'match_type': ''
|
|
}
|
|
|
|
# Categorize by confidence
|
|
if confidence >= 0.9:
|
|
mapping['match_type'] = 'high_confidence'
|
|
high_confidence.append((code, mapping, alternatives))
|
|
elif confidence >= 0.6:
|
|
mapping['match_type'] = 'medium_confidence'
|
|
medium_confidence.append((code, mapping, alternatives))
|
|
else:
|
|
mapping['match_type'] = 'low_confidence'
|
|
low_confidence.append((code, mapping, alternatives))
|
|
|
|
generated_mappings[code] = mapping
|
|
|
|
# Display results
|
|
print(f"\n✅ High Confidence Matches ({len(high_confidence)}): >= 90% match")
|
|
print("-" * 70)
|
|
for code, mapping, alts in high_confidence[:10]: # Show first 10
|
|
print(f" {code:10s} → {mapping['creativex_name']:40s} ({mapping['confidence']:.1%})")
|
|
|
|
if len(high_confidence) > 10:
|
|
print(f" ... and {len(high_confidence) - 10} more")
|
|
|
|
print(f"\n⚠️ Medium Confidence Matches ({len(medium_confidence)}): 60-90% match")
|
|
print("-" * 70)
|
|
for code, mapping, alts in medium_confidence[:10]:
|
|
print(f" {code:10s} → {mapping['creativex_name']:40s} ({mapping['confidence']:.1%})")
|
|
if alts:
|
|
alt_names = [a['brand']['name'] for a in alts[:2]]
|
|
print(f" Alternatives: {', '.join(alt_names)}")
|
|
|
|
if len(medium_confidence) > 10:
|
|
print(f" ... and {len(medium_confidence) - 10} more")
|
|
|
|
print(f"\n❌ Low Confidence Matches ({len(low_confidence)}): < 60% match")
|
|
print("-" * 70)
|
|
for code, mapping, alts in low_confidence:
|
|
print(f" {code:10s} → {mapping['creativex_name']:40s} ({mapping['confidence']:.1%})")
|
|
print(f" Ferrero: {mapping['ferrero_name']}")
|
|
if alts:
|
|
alt_names = [a['brand']['name'] for a in alts[:3]]
|
|
print(f" Alternatives: {', '.join(alt_names)}")
|
|
|
|
print(f"\n🚫 No Match Found ({len(no_match)})")
|
|
print("-" * 70)
|
|
for code, name in no_match:
|
|
print(f" {code:10s} → {name}")
|
|
|
|
# Summary
|
|
print("\n" + "=" * 70)
|
|
print("SUMMARY")
|
|
print("=" * 70)
|
|
print(f"Total Ferrero brands: {len(ferrero_brands)}")
|
|
print(f"Total Creative X brands: {len(creativex_brands)}")
|
|
print(f"")
|
|
print(f"✅ High confidence: {len(high_confidence)} (review and approve)")
|
|
print(f"⚠️ Medium confidence: {len(medium_confidence)} (needs validation)")
|
|
print(f"❌ Low confidence: {len(low_confidence)} (needs manual matching)")
|
|
print(f"🚫 No match: {len(no_match)} (may not exist in Creative X)")
|
|
print("=" * 70)
|
|
|
|
return {
|
|
'brand_mappings': generated_mappings,
|
|
'summary': {
|
|
'high_confidence': len(high_confidence),
|
|
'medium_confidence': len(medium_confidence),
|
|
'low_confidence': len(low_confidence),
|
|
'no_match': len(no_match)
|
|
}
|
|
}
|
|
|
|
|
|
def main():
|
|
"""CLI entry point"""
|
|
parser = argparse.ArgumentParser(
|
|
description='Generate brand mappings by matching Ferrero to Creative X'
|
|
)
|
|
|
|
parser.add_argument('--output', default='mappings_generated.json',
|
|
help='Output file for generated mappings')
|
|
parser.add_argument('--auto-approve', action='store_true',
|
|
help='Auto-approve high confidence matches only')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Load configuration
|
|
try:
|
|
config = load_config()
|
|
except Exception as e:
|
|
print(f"Error loading configuration: {e}")
|
|
sys.exit(1)
|
|
|
|
# Initialize components
|
|
try:
|
|
data_loader = DataLoader(str(config.data_json_path))
|
|
api_client = CreativeXAPIClient(
|
|
config.api_base_url,
|
|
config.access_token,
|
|
config.api_max_retries,
|
|
config.api_timeout
|
|
)
|
|
except Exception as e:
|
|
print(f"Error initializing components: {e}")
|
|
sys.exit(1)
|
|
|
|
# Generate mappings
|
|
result = generate_mappings(data_loader, api_client)
|
|
|
|
# Save to file
|
|
output_path = Path(args.output)
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(result, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"\n✓ Generated mappings saved to: {output_path}")
|
|
print(f"\nNext steps:")
|
|
print(f" 1. Review {output_path}")
|
|
print(f" 2. Validate medium/low confidence matches")
|
|
print(f" 3. Manually add any missing brands")
|
|
print(f" 4. Merge into mappings.json")
|
|
print(f" 5. Test with: python scripts/validate_mappings.py --show-supported")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|