hm_qc/checks/missing_images_check.py
2025-09-30 10:37:12 -05:00

67 lines
No EOL
2.2 KiB
Python
Executable file

import os
import json
def run_check(config):
"""
Check for missing images defined in linkingrecord.json.
Expected config:
- working_dir: Directory where linkingrecord.json and extracted files are located.
- linkingrecord_filename: The name of the linking record file (default: 'linkingrecord.json').
Behavior:
- Load linkingrecord.json from working_dir.
- Iterate through items->records->assets->filename.
- Check if file exists at working_dir/filename.
- If any missing files, return "failed" with a list of missing files.
- Otherwise, return "passed".
"""
working_dir = config.get("working_dir", "working")
linkingrecord_filename = config.get("linkingrecord_filename", "linkingrecord.json")
linkingrecord_path = os.path.join(working_dir, linkingrecord_filename)
if not os.path.exists(linkingrecord_path):
return {
"status": "error",
"error_message": f"Linking record file '{linkingrecord_filename}' not found in {working_dir}."
}
# Load the linking record JSON
with open(linkingrecord_path, 'r', encoding='utf-8') as f:
linkingrecord = json.load(f)
if "items" not in linkingrecord or not isinstance(linkingrecord["items"], list):
return {
"status": "error",
"error_message": "Invalid linkingrecord.json structure: 'items' missing or not a list."
}
missing_files = set()
# Iterate over all items and their records
for item in linkingrecord["items"]:
records = item.get("records", [])
for record in records:
assets = record.get("assets", [])
for asset in assets:
filename = asset.get("filename")
if filename:
file_path = os.path.join(working_dir, filename)
if not os.path.exists(file_path):
missing_files.add(filename)
if missing_files:
return {
"status": "failed",
"details": {
"missing_files": sorted(list(missing_files))
}
}
return {
"status": "passed",
"details": {
"message": "All referenced images exist."
}
}