import os import shutil import logging from PIL import Image def run_check(config: dict, context: dict, check_id: str): """ QC check that handles static image file parsing (JPG, PNG, PSD) Loads the image as a PIL object and stores it in context for downstream checks. Unlike PDF parsing, this does not extract text content since static images typically don't contain extractable text. """ input_file = config.get("input_file") working_dir = config.get("working_dir", "working") if not input_file or not os.path.isfile(input_file): return { "status": "error", "error_message": f"Image file '{input_file}' not provided or does not exist." } # Validate file extension valid_extensions = ['.jpg', '.jpeg', '.png', '.psd'] file_ext = os.path.splitext(input_file)[1].lower() if file_ext not in valid_extensions: return { "status": "error", "error_message": f"Input file '{input_file}' is not a supported image format. Supported: {', '.join(valid_extensions)}" } # Prepare working directory try: if os.path.exists(working_dir): for item in os.listdir(working_dir): item_path = os.path.join(working_dir, item) if os.path.isfile(item_path) or os.path.islink(item_path): os.remove(item_path) else: shutil.rmtree(item_path) else: os.makedirs(working_dir) except Exception as e: return { "status": "error", "error_message": f"Failed to prepare working directory '{working_dir}': {e}" } try: # Load image as PIL object with Image.open(input_file) as img: # For PSD files, PIL will load the composite image parsed_image = img.copy() # Get image metadata image_format = img.format image_mode = img.mode image_size = img.size # Optionally save a copy to working directory for debugging saved_image_path = os.path.join(working_dir, "parsed_image.jpg") if image_format == 'PSD': # Convert PSD to RGB if needed if parsed_image.mode not in ('RGB', 'RGBA'): parsed_image = parsed_image.convert('RGB') parsed_image.save(saved_image_path, 'JPEG') else: parsed_image.save(saved_image_path) filename = os.path.basename(input_file) # Store in context context[check_id] = { "filename": filename, "input_file_path": input_file, "parsed_image": parsed_image, "image_format": image_format, "image_mode": image_mode, "image_size": image_size, "saved_image_path": saved_image_path, "extracted_text": "" # No text extraction for static images } return { "status": "passed", "details": { "message": "Image parsed successfully.", "working_dir": working_dir, "filename": filename, "format": image_format, "size": f"{image_size[0]}x{image_size[1]}", "mode": image_mode, "saved_path": saved_image_path } } except Exception as e: logging.error(f"Failed to parse image '{input_file}': {e}") context[check_id] = { "filename": os.path.basename(input_file) if input_file else "unknown", "error": str(e), "partial_extraction": True } return { "status": "error", "error_message": f"Failed to parse image '{input_file}': {e}" }