""" Data loader for Ferrero naming convention mappings """ import json from pathlib import Path from typing import Optional, List import difflib class DataLoader: """Load and query mapping data from data.json""" def __init__(self, data_json_path: str): """ Initialize data loader Args: data_json_path: Path to data.json file Raises: FileNotFoundError: If data.json doesn't exist ValueError: If data.json has invalid structure """ self.data_path = Path(data_json_path) if not self.data_path.exists(): raise FileNotFoundError(f"data.json not found at: {data_json_path}") self.data = self._load_json() self._validate_data_structure() def _load_json(self) -> dict: """Load JSON data from file""" try: with open(self.data_path, 'r', encoding='utf-8') as f: return json.load(f) except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON in data.json: {e}") def _validate_data_structure(self): """Validate that data.json has required structure""" required_keys = ['brands', 'countries', 'languages', 'asset_types', 'social_media_versions'] missing_keys = [key for key in required_keys if key not in self.data] if missing_keys: raise ValueError( f"data.json missing required keys: {', '.join(missing_keys)}" ) def get_brand_name(self, brand_code: str) -> Optional[str]: """ Resolve brand code to brand name Args: brand_code: Brand code (e.g., 'RAF', 'NUT') Returns: str: Brand name (e.g., 'RAFFAELLO', 'NUTELLA') None: If brand code not found """ return self.data['brands'].get(brand_code.upper()) def get_country_name(self, country_code: str) -> Optional[str]: """ Resolve country code to country name Args: country_code: 2-letter country code (e.g., 'GL', 'IT') Returns: str: Country name (e.g., 'GLOBAL', 'Italy') None: If country code not found """ return self.data['countries'].get(country_code.upper()) def get_language_name(self, language_code: str) -> Optional[str]: """ Resolve language code to language name Args: language_code: 2-3 letter language code (e.g., 'it', 'en') Returns: str: Language name (e.g., 'Italian', 'English') None: If language code not found """ return self.data['languages'].get(language_code.lower()) def get_social_media_name(self, social_code: str) -> Optional[str]: """ Resolve social media code to platform name Args: social_code: 3-letter social code (e.g., 'IGF', 'FBP') Returns: str: Platform name (e.g., 'IG - Feed', 'FB - Feed') None: If social code not found """ return self.data['social_media_versions'].get(social_code.upper()) def get_asset_type_name(self, asset_code: str) -> Optional[str]: """ Resolve asset type code to asset type name Args: asset_code: 3-letter asset type code (e.g., 'OLV', 'TVC') Returns: str: Asset type name (e.g., 'On Line Video', 'TVC') None: If asset type not found """ return self.data['asset_types'].get(asset_code.upper()) def validate_brand_code(self, code: str) -> bool: """ Check if brand code exists Args: code: Brand code to validate Returns: bool: True if valid """ return code.upper() in self.data['brands'] def validate_country_code(self, code: str) -> bool: """ Check if country code exists Args: code: Country code to validate Returns: bool: True if valid """ return code.upper() in self.data['countries'] def validate_language_code(self, code: str) -> bool: """ Check if language code exists Args: code: Language code to validate Returns: bool: True if valid """ return code.lower() in self.data['languages'] def validate_social_code(self, code: str) -> bool: """ Check if social media code exists Args: code: Social media code to validate Returns: bool: True if valid """ return code.upper() in self.data['social_media_versions'] def validate_asset_type(self, code: str) -> bool: """ Check if asset type exists Args: code: Asset type code to validate Returns: bool: True if valid """ return code.upper() in self.data['asset_types'] def get_similar_codes(self, code: str, code_type: str, n: int = 3) -> List[str]: """ Get similar codes for typo suggestions Args: code: Code to find matches for code_type: Type of code ('brand', 'country', 'language', 'social', 'asset') n: Number of suggestions to return Returns: list: List of similar codes """ code_map = { 'brand': self.data['brands'], 'country': self.data['countries'], 'language': self.data['languages'], 'social': self.data['social_media_versions'], 'asset': self.data['asset_types'], } if code_type not in code_map: return [] all_codes = list(code_map[code_type].keys()) matches = difflib.get_close_matches(code.upper(), all_codes, n=n, cutoff=0.6) return matches def get_all_brands(self) -> dict: """Get all brands as dict""" return self.data['brands'] def get_all_countries(self) -> dict: """Get all countries as dict""" return self.data['countries'] def get_all_languages(self) -> dict: """Get all languages as dict""" return self.data['languages'] def get_all_social_media(self) -> dict: """Get all social media platforms as dict""" return self.data['social_media_versions'] def get_all_asset_types(self) -> dict: """Get all asset types as dict""" return self.data['asset_types']