hm_qc/checks/image_linking_check.py
2025-09-30 10:37:12 -05:00

95 lines
No EOL
3.8 KiB
Python
Executable file

import os
import json
def run_check(config):
"""
Check that all .jpg/.jpeg/.png images (case-insensitive) in the working directory (recursively)
are referenced by at least one record in linkingrecord.json, matching the full path
relative to working_dir.
Expected config:
- working_dir: Directory where linkingrecord.json and image files are located.
- linkingrecord_filename: The name of the linking record file (default: 'linkingrecord.json').
Behavior:
1. Load linkingrecord.json from working_dir.
2. Collect all referenced filenames (items->records->assets->filename).
These are stored as relative paths under working_dir, e.g. "dev/cgw07/yyi/images/...png".
3. Recursively walk through working_dir, gathering all files that end with
.jpg/.jpeg/.png (case-insensitive).
4. For each found image, compute its relative path (e.g., "dev/cgw07/.../image.png")
and check if that path is in the referenced set.
5. If any image is not found in the JSON, return "failed" with a list of unreferenced images.
6. Otherwise, return "passed".
"""
working_dir = config.get("working_dir", "working")
linkingrecord_filename = config.get("linkingrecord_filename", "linkingrecord.json")
linkingrecord_path = os.path.join(working_dir, linkingrecord_filename)
# 1. Verify the linkingrecord.json file exists
if not os.path.exists(linkingrecord_path):
return {
"status": "error",
"error_message": f"Linking record file '{linkingrecord_filename}' not found in {working_dir}."
}
# 2. Load linkingrecord.json
with open(linkingrecord_path, 'r', encoding='utf-8') as f:
linkingrecord = json.load(f)
if "items" not in linkingrecord or not isinstance(linkingrecord["items"], list):
return {
"status": "error",
"error_message": "Invalid linkingrecord.json structure: 'items' missing or not a list."
}
# 3. Collect all referenced paths from linkingrecord.json (normalized)
referenced_files = set()
for item in linkingrecord["items"]:
for record in item.get("records", []):
for asset in record.get("assets", []):
filename = asset.get("filename")
if filename:
# Normalize the path exactly as it's stored relative to working_dir
norm_path = os.path.normpath(filename)
referenced_files.add(norm_path)
# 4. Define image extensions to be considered (lowercase for easy comparison)
image_extensions = {'.jpg', '.jpeg', '.png'}
unreferenced_files = []
# 5. Recursively walk through the directory tree
for root, dirs, files in os.walk(working_dir):
for file in files:
# skip the linking record file itself
if file == linkingrecord_filename:
continue
# check if it has an image extension
ext = os.path.splitext(file)[1].lower()
if ext in image_extensions:
# Compute the path relative to working_dir, then normalize it
abs_path = os.path.join(root, file)
rel_path = os.path.relpath(abs_path, working_dir)
norm_rel_path = os.path.normpath(rel_path)
# Check if the relative path is in our referencing set
if norm_rel_path not in referenced_files:
unreferenced_files.append(norm_rel_path)
# 6. Return results
if unreferenced_files:
return {
"status": "failed",
"details": {
"unreferenced_files": sorted(unreferenced_files)
}
}
return {
"status": "passed",
"details": {
"message": "All .jpg/.jpeg/.png images in the folder (and subfolders) match a path in linkingrecord.json."
}
}