210 lines
8.3 KiB
Python
210 lines
8.3 KiB
Python
import os
|
|
import json
|
|
import re
|
|
from checks.analyze_with_gpt import analyze_with_gpt
|
|
|
|
def run_check(config: dict, context: dict, check_id: str) -> dict:
|
|
"""
|
|
QC check that parses static image filenames using pattern matching and GPT.
|
|
|
|
Static image naming conventions (from cheat sheet):
|
|
- SOME STATIC: Market_Language_campaignnumber_campaignname_format_creativetype_ratio_ImageNumber
|
|
- DOOH Static: CampaignNumber_DOOH_Static_CreativeOption_MediaOwner_FormatName_FormatSize_Market-Language
|
|
- OOH: CampaignNumber_OOH_Static_CreativeOption_MediaOwner_FormatName_FormatSize_Language-Market
|
|
- Display Banners: CampaignNumber_Display_Static_CreativeOption_MediaOwner_FormatName_FormatSize_Language-Market
|
|
- POS GEN: Size_Format_CampaignNumber_POPNumber_GenCode
|
|
- POS Country Specific: Size_Format_CampaignNumber_POPNumber_Language-Market
|
|
- DS: CampaignNumber_CampaignName_Index_BU_Resolution_language-COUNTRY
|
|
|
|
Key extraction: language/country code
|
|
"""
|
|
# Get parsed filename from HM_image_parse context
|
|
hm_parse_data = context.get("HM_image_parse", {})
|
|
filename = hm_parse_data.get("filename")
|
|
|
|
if not filename:
|
|
return {
|
|
"status": "error",
|
|
"error_message": "Filename not found in HM_image_parse context. Ensure HM_image_parse check runs first."
|
|
}
|
|
|
|
# Remove extension from filename
|
|
base_name, ext = os.path.splitext(os.path.basename(filename))
|
|
|
|
# Try to detect format type and extract language/country code using patterns
|
|
detected_format = None
|
|
language_code = None
|
|
|
|
# Pattern 1: SOME STATIC - Market_Language at the beginning
|
|
# Example: AT_de_4116A_Halloween_Stories_fb_9x16_1
|
|
some_static_pattern = r'^([A-Z]{2})_([a-z]{2})_'
|
|
match = re.match(some_static_pattern, base_name)
|
|
if match:
|
|
detected_format = "SOME_STATIC"
|
|
market = match.group(1)
|
|
language = match.group(2)
|
|
language_code = f"{language}-{market}"
|
|
|
|
# Pattern 2: DOOH/OOH/Display - ends with Market-Language or Language-Market
|
|
# Example: 4045_DOOH_Static_PRIO1_EyeMediaGiant_Noreport_1080x1920_NO-no
|
|
if not language_code:
|
|
dooh_pattern = r'_([a-z]{2})-([A-Z]{2})(?:\.[^.]+)?$'
|
|
match = re.search(dooh_pattern, base_name)
|
|
if match:
|
|
language = match.group(1)
|
|
market = match.group(2)
|
|
language_code = f"{language}-{market}"
|
|
|
|
if "_DOOH_" in base_name:
|
|
detected_format = "DOOH"
|
|
elif "_OOH_" in base_name:
|
|
detected_format = "OOH"
|
|
elif "_Display_" in base_name:
|
|
detected_format = "DISPLAY_BANNER"
|
|
|
|
# Pattern 3: Alternative Language-Market format
|
|
# Example: 4045_OOH_PRIO1_EyeMediaGiant_Noreport_1080x1920_no-NO
|
|
if not language_code:
|
|
alt_pattern = r'_([a-z]{2})-([A-Z]{2})(?:\.[^.]+)?$'
|
|
match = re.search(alt_pattern, base_name)
|
|
if match:
|
|
language = match.group(1)
|
|
market = match.group(2)
|
|
language_code = f"{language}-{market}"
|
|
if "_OOH_" in base_name:
|
|
detected_format = "OOH"
|
|
|
|
# Pattern 4: POS GEN format - ends with _GEN
|
|
# Example: 21x29.7cm_A4_4068A_10065-01_GEN
|
|
if not language_code:
|
|
gen_pattern = r'_(GEN|CEN)(?:\.[^.]+)?$'
|
|
match = re.search(gen_pattern, base_name, re.IGNORECASE)
|
|
if match:
|
|
language_code = match.group(1).upper()
|
|
detected_format = "POS_GEN"
|
|
|
|
# Pattern 5: POS Country Specific - ends with language-Market
|
|
# Example: 50x70cm_Poster_4068A_10107-01_en-GB
|
|
if not language_code:
|
|
pos_country_pattern = r'_([a-z]{2})-([A-Z]{2})(?:\.[^.]+)?$'
|
|
match = re.search(pos_country_pattern, base_name)
|
|
if match:
|
|
language = match.group(1)
|
|
market = match.group(2)
|
|
language_code = f"{language}-{market}"
|
|
detected_format = "POS_COUNTRY"
|
|
|
|
# Pattern 6: DS format - language-COUNTRY or GEN_LOGO
|
|
# Example: 1019_SPRINGBRAND_00_W_1400x1050_fr-CA
|
|
if not language_code:
|
|
ds_pattern = r'_([a-z]{2})-([A-Z]{2})(?:\.[^.]+)?$'
|
|
match = re.search(ds_pattern, base_name)
|
|
if match:
|
|
language = match.group(1)
|
|
market = match.group(2)
|
|
language_code = f"{language}-{market}"
|
|
detected_format = "DS"
|
|
else:
|
|
# Check for GEN_LOGO pattern
|
|
if "GEN_LOGO" in base_name.upper() or "_GEN" in base_name.upper():
|
|
language_code = "GEN"
|
|
detected_format = "DS_GEN"
|
|
|
|
# If pattern matching failed, use GPT as fallback
|
|
if not language_code:
|
|
prompt = f"""
|
|
Parse this H&M static image filename: {base_name}
|
|
|
|
H&M static image filenames follow various formats:
|
|
|
|
1. SOME STATIC: Market_Language_campaignnumber_campaignname_format_creativetype_ratio_ImageNumber
|
|
Example: AT_de_4116A_Halloween_Stories_fb_9x16_1 (language = de-AT)
|
|
|
|
2. DOOH/OOH/Display: CampaignNumber_Type_Static_..._FormatSize_Language-Market
|
|
Example: 4045_DOOH_Static_PRIO1_EyeMediaGiant_Noreport_1080x1920_NO-no (language = no-NO)
|
|
|
|
3. POS GEN: Size_Format_CampaignNumber_POPNumber_GEN
|
|
Example: 21x29.7cm_A4_4068A_10065-01_GEN (language = GEN)
|
|
|
|
4. POS Country: Size_Format_CampaignNumber_POPNumber_Language-Market
|
|
Example: 50x70cm_Poster_4068A_10107-01_en-GB (language = en-GB)
|
|
|
|
5. DS: CampaignNumber_CampaignName_Index_BU_Resolution_language-COUNTRY
|
|
Example: 1019_SPRINGBRAND_00_W_1400x1050_fr-CA (language = fr-CA)
|
|
|
|
Return only a JSON object with these exact keys:
|
|
format_type, campaign_number, language
|
|
|
|
For language: Use format 'xx-YY' (e.g., 'en-GB') or 'GEN' or 'CEN'
|
|
For any component that can't be identified, use an empty string.
|
|
"""
|
|
|
|
try:
|
|
gpt_response = analyze_with_gpt(
|
|
prompt=prompt,
|
|
content="",
|
|
images=None,
|
|
expect_json=True
|
|
)
|
|
|
|
parsed = json.loads(gpt_response)
|
|
language_code = parsed.get("language", "")
|
|
detected_format = parsed.get("format_type", "UNKNOWN")
|
|
campaign_number = parsed.get("campaign_number", "")
|
|
|
|
except Exception as e:
|
|
# Store partial results and return error
|
|
context[check_id] = {
|
|
"filename": filename,
|
|
"base_name": base_name,
|
|
"detected_format": "PARSE_FAILED",
|
|
"parsed": {
|
|
"language": "",
|
|
"format_type": "UNKNOWN",
|
|
"campaign_number": ""
|
|
}
|
|
}
|
|
return {
|
|
"status": "error",
|
|
"error_message": f"Failed to parse filename: {str(e)}"
|
|
}
|
|
else:
|
|
# Extract campaign number from filename if possible
|
|
campaign_pattern = r'(\d{4}[A-Z]?)'
|
|
campaign_match = re.search(campaign_pattern, base_name)
|
|
campaign_number = campaign_match.group(1) if campaign_match else ""
|
|
|
|
# Extract dimensions from filename (e.g., 1080x1920, 21x29.7cm, 1400x1050)
|
|
dimensions = ""
|
|
dimension_pattern = r'(\d+(?:\.\d+)?x\d+(?:\.\d+)?(?:px|cm|mm)?)'
|
|
dim_match = re.search(dimension_pattern, base_name, re.IGNORECASE)
|
|
if dim_match:
|
|
dimensions = dim_match.group(1)
|
|
|
|
# Store results in context
|
|
context[check_id] = {
|
|
"filename": filename,
|
|
"base_name": base_name,
|
|
"detected_format": detected_format,
|
|
"parsed": {
|
|
"language": language_code if language_code else "",
|
|
"format_type": detected_format if detected_format else "UNKNOWN",
|
|
"campaign_number": campaign_number if 'campaign_number' in locals() else "",
|
|
"dimensions": dimensions
|
|
}
|
|
}
|
|
|
|
return {
|
|
"status": "passed",
|
|
"details": {
|
|
"message": "Image filename parsed successfully",
|
|
"filename_source": "HM_image_parse context",
|
|
"detected_format": detected_format,
|
|
"parsed": {
|
|
"language": language_code if language_code else "",
|
|
"format_type": detected_format if detected_format else "UNKNOWN",
|
|
"campaign_number": campaign_number if 'campaign_number' in locals() else "",
|
|
"dimensions": dimensions
|
|
}
|
|
}
|
|
}
|