108 lines
3.7 KiB
Python
108 lines
3.7 KiB
Python
import os
|
|
import shutil
|
|
import logging
|
|
from PIL import Image
|
|
|
|
def run_check(config: dict, context: dict, check_id: str):
|
|
"""
|
|
QC check that handles static image file parsing (JPG, PNG, PSD)
|
|
Loads the image as a PIL object and stores it in context for downstream checks.
|
|
|
|
Unlike PDF parsing, this does not extract text content since static images
|
|
typically don't contain extractable text.
|
|
"""
|
|
input_file = config.get("input_file")
|
|
working_dir = config.get("working_dir", "working")
|
|
|
|
if not input_file or not os.path.isfile(input_file):
|
|
return {
|
|
"status": "error",
|
|
"error_message": f"Image file '{input_file}' not provided or does not exist."
|
|
}
|
|
|
|
# Validate file extension
|
|
valid_extensions = ['.jpg', '.jpeg', '.png', '.psd']
|
|
file_ext = os.path.splitext(input_file)[1].lower()
|
|
|
|
if file_ext not in valid_extensions:
|
|
return {
|
|
"status": "error",
|
|
"error_message": f"Input file '{input_file}' is not a supported image format. Supported: {', '.join(valid_extensions)}"
|
|
}
|
|
|
|
# Prepare working directory
|
|
try:
|
|
if os.path.exists(working_dir):
|
|
for item in os.listdir(working_dir):
|
|
item_path = os.path.join(working_dir, item)
|
|
if os.path.isfile(item_path) or os.path.islink(item_path):
|
|
os.remove(item_path)
|
|
else:
|
|
shutil.rmtree(item_path)
|
|
else:
|
|
os.makedirs(working_dir)
|
|
except Exception as e:
|
|
return {
|
|
"status": "error",
|
|
"error_message": f"Failed to prepare working directory '{working_dir}': {e}"
|
|
}
|
|
|
|
try:
|
|
# Load image as PIL object
|
|
with Image.open(input_file) as img:
|
|
# For PSD files, PIL will load the composite image
|
|
parsed_image = img.copy()
|
|
|
|
# Get image metadata
|
|
image_format = img.format
|
|
image_mode = img.mode
|
|
image_size = img.size
|
|
|
|
# Optionally save a copy to working directory for debugging
|
|
saved_image_path = os.path.join(working_dir, "parsed_image.jpg")
|
|
if image_format == 'PSD':
|
|
# Convert PSD to RGB if needed
|
|
if parsed_image.mode not in ('RGB', 'RGBA'):
|
|
parsed_image = parsed_image.convert('RGB')
|
|
parsed_image.save(saved_image_path, 'JPEG')
|
|
else:
|
|
parsed_image.save(saved_image_path)
|
|
|
|
filename = os.path.basename(input_file)
|
|
|
|
# Store in context
|
|
context[check_id] = {
|
|
"filename": filename,
|
|
"input_file_path": input_file,
|
|
"parsed_image": parsed_image,
|
|
"image_format": image_format,
|
|
"image_mode": image_mode,
|
|
"image_size": image_size,
|
|
"saved_image_path": saved_image_path,
|
|
"extracted_text": "" # No text extraction for static images
|
|
}
|
|
|
|
return {
|
|
"status": "passed",
|
|
"details": {
|
|
"message": "Image parsed successfully.",
|
|
"working_dir": working_dir,
|
|
"filename": filename,
|
|
"format": image_format,
|
|
"size": f"{image_size[0]}x{image_size[1]}",
|
|
"mode": image_mode,
|
|
"saved_path": saved_image_path
|
|
}
|
|
}
|
|
|
|
except Exception as e:
|
|
logging.error(f"Failed to parse image '{input_file}': {e}")
|
|
context[check_id] = {
|
|
"filename": os.path.basename(input_file) if input_file else "unknown",
|
|
"error": str(e),
|
|
"partial_extraction": True
|
|
}
|
|
return {
|
|
"status": "error",
|
|
"error_message": f"Failed to parse image '{input_file}': {e}"
|
|
}
|