121 lines
4.5 KiB
Python
Executable file
121 lines
4.5 KiB
Python
Executable file
import os
|
|
import json
|
|
from checks.analyze_with_gpt import analyze_with_gpt # Imports the FUNCTION
|
|
import re
|
|
|
|
def run_check(config: dict, context: dict, check_id: str) -> dict:
|
|
"""
|
|
QC check that parses filename using GPT and shares results via context.
|
|
Now gets filename from HM_parse context instead of direct file access.
|
|
"""
|
|
# Get parsed filename from HM_parse context
|
|
hm_parse_data = context.get("HM_parse", {})
|
|
filename = hm_parse_data.get("filename")
|
|
|
|
if not filename:
|
|
return {
|
|
"status": "error",
|
|
"error_message": "Filename not found in HM_parse context. Ensure HM_parse check runs first."
|
|
}
|
|
|
|
# Remove extension from the filename we got from context
|
|
pattern = r'^(?:[^_]+_){5}'
|
|
base_name, ext = os.path.splitext(os.path.basename(filename))
|
|
|
|
short_name = re.sub(pattern, '', base_name)
|
|
|
|
# GPT prompt to parse the filename
|
|
prompt = f"""
|
|
Parse this H&M artwork filename.
|
|
|
|
Full filename: {base_name}
|
|
Shortened filename (prefixes removed): {short_name}
|
|
|
|
H&M filenames can follow these formats:
|
|
|
|
Format 1: dimensions_format_year_reference-number_language-country.pdf
|
|
Example: 21.6x27.9cm_letter_2028_10062-01_en-us.pdf
|
|
- year = 2028
|
|
- reference = 10062-01
|
|
- language = en-us
|
|
|
|
Format 2: dimensions_format_prefix_reference-number_language-country.pdf
|
|
Example: 50x70cm_Poster_9000_10107-06_el-CY.pdf
|
|
- reference = 9000_10107-06 (INCLUDE the prefix number before the dash code!)
|
|
- language = el-CY
|
|
|
|
Format 3: dimensions_format_reference-number_(GEN|CEN).pdf
|
|
Example: 04_10.8x14cm_quarter_letter_1001D_10004-02_GEN.pdf
|
|
- reference = 10004-02
|
|
- language = GEN
|
|
|
|
Format 4 (OOH files): campaigncode_OOH_identifiers_dimensions_codes_reference_language.pdf
|
|
Example: 1022A_OOH_Static_AMS_BL18_600x300cm_PL-pl_prio1_pl-PL.pdf
|
|
- reference = 1022A (campaign code at start, before OOH)
|
|
- language = pl-PL (last segment after final underscore)
|
|
|
|
CRITICAL INSTRUCTIONS:
|
|
- The "reference" field should include ALL numeric/alphanumeric codes that identify the document
|
|
- If there are multiple numeric segments before the language code (e.g., 9000_10107-06),
|
|
combine them with underscore as the reference (e.g., reference = "9000_10107-06")
|
|
- The language is ALWAYS at the END of the filename (e.g., en-us, pl-PL, GEN, CEN)
|
|
- Do NOT use "prio1" or similar priority indicators as part of the reference
|
|
- Only use "year" field if it's clearly a 4-digit year (2024, 2025, 2028, etc.)
|
|
- If unsure whether a number is year or reference prefix, include it in the reference
|
|
|
|
Return only a JSON object with these exact keys:
|
|
dimensions, format, year, reference, language
|
|
|
|
For any component that can't be identified, use an empty string.
|
|
"""
|
|
|
|
try:
|
|
# Get GPT analysis
|
|
gpt_response = analyze_with_gpt(
|
|
prompt=prompt,
|
|
content="",
|
|
images=None,
|
|
expect_json=True
|
|
)
|
|
|
|
# Parse and store results in context
|
|
parsed = json.loads(gpt_response)
|
|
context[check_id] = {
|
|
"filename": filename,
|
|
"short_name": short_name,
|
|
"parsed": {
|
|
"dimensions": parsed.get("dimensions", ""),
|
|
"format": parsed.get("format", ""),
|
|
"year": parsed.get("year", ""),
|
|
"reference": parsed.get("reference", ""),
|
|
"language": parsed.get("language", "")
|
|
}
|
|
}
|
|
|
|
return {
|
|
"status": "passed",
|
|
"details": {
|
|
"message": "Filename parsed and stored in context",
|
|
"filename_source": "HM_parse context",
|
|
"gpt_response_summary": f"Parsed {len(parsed)} components",
|
|
"parsed": {
|
|
"dimensions": parsed.get("dimensions", ""),
|
|
"format": parsed.get("format", ""),
|
|
"year": parsed.get("year", ""),
|
|
"reference": parsed.get("reference", ""),
|
|
"language": parsed.get("language", "")
|
|
}
|
|
}
|
|
}
|
|
|
|
except json.JSONDecodeError as e:
|
|
return {
|
|
"status": "error",
|
|
"error_message": f"GPT returned invalid JSON: {str(e)}",
|
|
"raw_response": gpt_response[:200] + "..." if gpt_response else None
|
|
}
|
|
except Exception as e:
|
|
return {
|
|
"status": "error",
|
|
"error_message": f"Filename parsing failed: {str(e)}"
|
|
}
|