diff --git a/checks/beltline_validation_check.py b/checks/beltline_validation_check.py index d550ecf..50b5e28 100644 --- a/checks/beltline_validation_check.py +++ b/checks/beltline_validation_check.py @@ -15,9 +15,12 @@ def run_check(config): Validation Requirements: 1. Frame Count: Exactly 72 desktop frames (0-71) and 23 mobile frames (49-71) - 2. Frame Range Warnings: Mobile frames 0-48 should not exist - 3. Angle/Filename Consistency: JSON angle field must match filename frame number - 4. Variant Consistency: imagetype must equal variant field + 2. File Existence: All referenced files must exist on disk + 3. Unreferenced Files: Detect beltline files on disk not referenced in JSON + 4. Orphaned Files: Detect beltline files when JSON has no proper imagetype + 5. Frame Range Warnings: Mobile frames 0-48 should not exist + 6. Angle/Filename Consistency: JSON angle field must match filename frame number + 7. Variant Consistency: imagetype must equal variant field Expected config: - working_dir (str): Directory where linkingrecord.json and images reside @@ -25,7 +28,7 @@ def run_check(config): Returns: - "passed" if all validations pass - - "failed" if required frames missing or critical issues found + - "failed" if files missing, frames missing, orphaned files, or unreferenced files found - Warnings for non-critical issues (unexpected frames, mismatches) """ @@ -97,8 +100,26 @@ def run_check(config): if imagetype is not None: record_skipped_type(skipped_types, "beltline_validation_check", viewtype, imagetype) - # If no beltline items found, this is likely not a beltline pack + # If no beltline items found in JSON, check filesystem for orphaned files if not desktop_items and not mobile_items: + # Scan filesystem to check if beltline files actually exist + filesystem_scan = scan_filesystem_for_beltline_files(working_dir) + + if filesystem_scan["total_count"] > 0: + # CRITICAL: Beltline files exist but JSON has no proper imagetype + return { + "status": "failed", + "details": { + "message": f"Orphaned beltline files detected: {filesystem_scan['total_count']} beltline files found on disk but no valid imagetype in JSON.", + "orphaned_files_count": filesystem_scan["total_count"], + "orphaned_desktop_files": sorted(list(filesystem_scan["desktop_files"])), + "orphaned_mobile_files": sorted(list(filesystem_scan["mobile_files"])), + "error_type": "orphaned_beltline_files", + "fix_instructions": "Beltline image files exist on disk but linkingrecord.json does not contain proper 'imagetype' fields (desktop/mobile) for vehicleselector items. Please check JSON structure." + } + } + + # No beltline files in JSON or on disk - truly not a beltline pack if skipped_types["beltline_validation_check"]: return prepare_skipped_result(skipped_types, "beltline_validation_check") @@ -116,29 +137,97 @@ def run_check(config): # Validate mobile frames (49-71, total 23) mobile_validation = validate_frame_set(mobile_items, "mobile", expected_frames=set(range(49, 72)), working_dir=working_dir) + # Scan filesystem for all beltline files + filesystem_scan = scan_filesystem_for_beltline_files(working_dir) + + # Collect all referenced files from JSON + all_referenced_files = desktop_validation["referenced_files"] | mobile_validation["referenced_files"] + + # Detect unreferenced files (on disk but not in JSON) + unreferenced_files = filesystem_scan["all_files"] - all_referenced_files + # Combine all validation results all_missing_frames = desktop_validation["missing_frames"] + mobile_validation["missing_frames"] + all_missing_files = desktop_validation["missing_files"] + mobile_validation["missing_files"] all_warnings = warnings + desktop_validation["warnings"] + mobile_validation["warnings"] - # Determine overall status - if all_missing_frames: + # Collect case violations from all sources + all_case_violations = [] + all_case_violations.extend(desktop_validation.get("case_violations", [])) + all_case_violations.extend(mobile_validation.get("case_violations", [])) + all_case_violations.extend(filesystem_scan.get("case_violations", [])) + + # Determine overall status - FAIL if any critical issues + has_missing_frames = len(all_missing_frames) > 0 + has_missing_files = len(all_missing_files) > 0 + has_unreferenced_files = len(unreferenced_files) > 0 + has_case_violations = len(all_case_violations) > 0 + + # Build detailed failure/success message + if has_missing_files or has_missing_frames or has_unreferenced_files or has_case_violations: + error_parts = [] + if has_missing_files: + error_parts.append(f"{len(all_missing_files)} files missing from filesystem") + if has_missing_frames: + error_parts.append(f"{len(all_missing_frames)} frames missing") + if has_unreferenced_files: + error_parts.append(f"{len(unreferenced_files)} unreferenced files on filesystem") + if has_case_violations: + error_parts.append(f"{len(all_case_violations)} case violation(s)") + result = { "status": "failed", "details": { - "message": f"Beltline validation failed - {len(all_missing_frames)} required frames missing.", - "missing_desktop_frames": desktop_validation["missing_frames"], - "missing_mobile_frames": mobile_validation["missing_frames"], + "message": f"Beltline validation failed - {', '.join(error_parts)}.", "desktop_frame_summary": f"{desktop_validation['found_count']}/72 desktop frames found", "mobile_frame_summary": f"{mobile_validation['found_count']}/23 mobile frames found" } } + + # Add missing frames details + if has_missing_frames: + result["details"]["missing_desktop_frames"] = desktop_validation["missing_frames"] + result["details"]["missing_mobile_frames"] = mobile_validation["missing_frames"] + + # Add missing files details (CRITICAL - files referenced in linking record but don't exist on disk) + if has_missing_files: + result["details"]["missing_files"] = all_missing_files + result["details"]["missing_files_count"] = len(all_missing_files) + result["details"]["error_type"] = "missing_beltline_files" + result["details"]["source"] = "Files referenced in linking record not found on filesystem" + + # Add unreferenced files details (files on disk not referenced in linking record) + if has_unreferenced_files: + result["details"]["unreferenced_files"] = sorted(list(unreferenced_files)) + result["details"]["unreferenced_files_count"] = len(unreferenced_files) + result["details"]["error_type"] = "unreferenced_beltline_files" + result["details"]["source"] = "Files found on filesystem not referenced in linking record" + + # Add case violations details + if has_case_violations: + # Separate by source for clarity + linkingrecord_violations = [v for v in all_case_violations if v.get("source") == "linking_record"] + filesystem_violations = [v for v in all_case_violations if v.get("source") != "linking_record"] + + result["details"]["case_violations"] = all_case_violations + result["details"]["case_violations_count"] = len(all_case_violations) + + if linkingrecord_violations: + result["details"]["case_violations_in_linking_record"] = linkingrecord_violations + result["details"]["case_violations_linking_record_count"] = len(linkingrecord_violations) + + if filesystem_violations: + result["details"]["case_violations_on_filesystem"] = filesystem_violations + result["details"]["case_violations_filesystem_count"] = len(filesystem_violations) + else: result = { "status": "passed", "details": { "message": "All required beltline frames found and validated successfully.", "desktop_frame_summary": f"{desktop_validation['found_count']}/72 desktop frames found", - "mobile_frame_summary": f"{mobile_validation['found_count']}/23 mobile frames found" + "mobile_frame_summary": f"{mobile_validation['found_count']}/23 mobile frames found", + "filesystem_validation": f"{filesystem_scan['total_count']} beltline files found on disk, all properly referenced" } } @@ -157,6 +246,87 @@ def run_check(config): return result +def scan_filesystem_for_beltline_files(working_dir: str) -> Dict[str, Any]: + """ + Scan the filesystem for beltline image files. + + Looks for files matching patterns: + - vsd_*_*.AVIF (desktop beltline - uppercase extension only) + - vsm_*_*.AVIF (mobile beltline - uppercase extension only) + - Also detects files with wrong case (.avif, .Avif, etc.) for error reporting + + Args: + working_dir: Directory to scan + + Returns: + Dict with: + - desktop_files: Set of desktop beltline file paths (relative to working_dir) + - mobile_files: Set of mobile beltline file paths + - all_files: Set of all beltline file paths + - desktop_frames: Set of frame numbers found in desktop files + - mobile_frames: Set of frame numbers found in mobile files + - case_violations: List of files with wrong case extensions + """ + # Strict patterns - uppercase .AVIF only + desktop_pattern = re.compile(r'vsd_(\d+)_\d+\.AVIF') + mobile_pattern = re.compile(r'vsm_(\d+)_\d+\.AVIF') + + # Case-insensitive patterns for detecting violations + desktop_pattern_any_case = re.compile(r'vsd_(\d+)_\d+\.avif', re.IGNORECASE) + mobile_pattern_any_case = re.compile(r'vsm_(\d+)_\d+\.avif', re.IGNORECASE) + + desktop_files = set() + mobile_files = set() + desktop_frames = set() + mobile_frames = set() + case_violations = [] + + # Walk through all subdirectories + for root, dirs, files in os.walk(working_dir): + for file in files: + # Check desktop pattern (strict uppercase) + desktop_match = desktop_pattern.search(file) + if desktop_match: + rel_path = os.path.relpath(os.path.join(root, file), working_dir) + desktop_files.add(rel_path) + desktop_frames.add(int(desktop_match.group(1))) + continue + + # Check mobile pattern (strict uppercase) + mobile_match = mobile_pattern.search(file) + if mobile_match: + rel_path = os.path.relpath(os.path.join(root, file), working_dir) + mobile_files.add(rel_path) + mobile_frames.add(int(mobile_match.group(1))) + continue + + # Check for case violations (beltline files with wrong case) + desktop_wrong_case = desktop_pattern_any_case.search(file) + mobile_wrong_case = mobile_pattern_any_case.search(file) + + if desktop_wrong_case or mobile_wrong_case: + # This is a beltline file but with wrong case extension + rel_path = os.path.relpath(os.path.join(root, file), working_dir) + _, ext = os.path.splitext(file) + case_violations.append({ + "filename": rel_path, + "found_extension": ext, + "required_extension": ".AVIF", + "message": f"Beltline file found with incorrect case '{ext}' (must be uppercase .AVIF)" + }) + + all_files = desktop_files | mobile_files + + return { + "desktop_files": desktop_files, + "mobile_files": mobile_files, + "all_files": all_files, + "desktop_frames": desktop_frames, + "mobile_frames": mobile_frames, + "case_violations": case_violations, + "total_count": len(all_files) + } + def validate_frame_set(items: List[Dict], variant_type: str, expected_frames: Set[int], working_dir: str) -> Dict[str, Any]: """ Validate a set of beltline frames (desktop or mobile). @@ -168,11 +338,24 @@ def validate_frame_set(items: List[Dict], variant_type: str, expected_frames: Se working_dir: Directory containing the images Returns: - Dict with validation results + Dict with validation results including: + - missing_frames: Frame numbers referenced in JSON but files don't exist + - missing_files: Specific filenames from linking record that don't exist on disk + - referenced_files: All files referenced in JSON for this variant + - found_count: Number of valid frames found + - warnings: List of validation warnings + - case_violations_in_linkingrecord: Files in JSON with wrong case extension """ found_frames = set() + missing_files = [] + referenced_files = set() warnings = [] - filename_pattern = r'(vsd|vsm)_(\d+)_\d+\.(avif|AVIF)' + case_violations = [] + + # Strict pattern - uppercase .AVIF only + filename_pattern_strict = r'(vsd|vsm)_(\d+)_\d+\.AVIF' + # Case-insensitive pattern for detecting violations + filename_pattern_any_case = r'(vsd|vsm)_(\d+)_\d+\.avif' for item in items: records = item.get("records", []) @@ -185,15 +368,47 @@ def validate_frame_set(items: List[Dict], variant_type: str, expected_frames: Se filename = asset.get("filename", "") if filename: - # Extract frame number from filename - match = re.search(filename_pattern, filename) - if match: - filename_frame = int(match.group(2)) - found_frames.add(filename_frame) + # Normalize path for cross-platform compatibility + normalized_filename = os.path.normpath(filename) + referenced_files.add(normalized_filename) + + # Check for case violations in linking record first + match_strict = re.search(filename_pattern_strict, filename) + match_any_case = re.search(filename_pattern_any_case, filename, re.IGNORECASE) + + if match_any_case and not match_strict: + # This is a beltline file but with wrong case extension in linking record + _, ext = os.path.splitext(filename) + case_violations.append({ + "filename": filename, + "found_extension": ext, + "required_extension": ".AVIF", + "message": f"Linking record references beltline file with incorrect case '{ext}' (must be uppercase .AVIF)", + "source": "linking_record" + }) + # Continue processing to check existence and frame number + + # Check if file exists on disk (CRITICAL VALIDATION) + file_path = os.path.join(working_dir, filename) + file_exists = os.path.exists(file_path) + + if not file_exists: + missing_files.append({ + "filename": filename, + "message": "File referenced in linking record but not found on disk" + }) + + # Extract frame number from filename (use any-case pattern for parsing) + if match_any_case: + filename_frame = int(match_any_case.group(2)) + + # Only count as found if file actually exists and has correct case + if file_exists and match_strict: + found_frames.add(filename_frame) # Validate angle/filename consistency if angle is not None and angle != filename_frame: - warnings.append(f"Angle/filename mismatch: angle={angle} but filename='{filename}' suggests frame {filename_frame}") + warnings.append(f"Angle/filename mismatch in linking record: angle={angle} but filename='{filename}' suggests frame {filename_frame}") # Check for unexpected mobile frames 0-48 if variant_type == "mobile" and filename_frame < 49: @@ -201,13 +416,16 @@ def validate_frame_set(items: List[Dict], variant_type: str, expected_frames: Se else: warnings.append(f"Could not parse frame number from filename: '{filename}'") - # Calculate missing frames + # Calculate missing frames (frames that should exist but don't have valid files) missing_frames = sorted(list(expected_frames - found_frames)) found_count = len(found_frames) return { "missing_frames": missing_frames, + "missing_files": missing_files, + "referenced_files": referenced_files, "found_count": found_count, "warnings": warnings, + "case_violations": case_violations, "found_frames": sorted(list(found_frames)) } \ No newline at end of file diff --git a/checks/check_series_permutations.py b/checks/check_series_permutations.py index d63d49a..4f3de56 100644 --- a/checks/check_series_permutations.py +++ b/checks/check_series_permutations.py @@ -1,38 +1,39 @@ import os import json -import itertools from typing import Dict, List, Set, Tuple, Any def run_check(config: Dict[str, Any]) -> Dict[str, Any]: """ - Validate that series images correspond to every possible permutation of specific exterior feature codes. - - This check validates that the defined "series" images in the linking record correspond to every - possible permutation of VS- (Visual Selection) and ACM (Accessory Content Management) codes - found in the exterior features section. - + Validate that series images exist for every unique VS/ACM combination found in exterior and showroom sections. + + This check validates that the "series" section contains an image for every unique combination of + VS- (Visual Selection) and ACM (Accessory Content Management) codes that appears in the + exterior and showroom sections. The validation creates a "source of truth" by extracting all + unique [ACM, VS] pairs from exterior and showroom records, then ensures the series section + provides an image for each combination. + :param config: Configuration dictionary containing: - working_dir: Directory where linkingrecord.json and extracted files are located - linkingrecord_filename: The name of the linking record file (default: 'linkingrecord.json') - + :return: Dictionary with validation results: - status: "passed", "failed", or "error" - details: Additional information about the validation results - error_message: Error details if status is "error" """ - + working_dir = config.get("working_dir", "working") linkingrecord_filename = config.get("linkingrecord_filename", "linkingrecord.json") linkingrecord_path = os.path.join(working_dir, linkingrecord_filename) - + # Check if linkingrecord.json exists if not os.path.exists(linkingrecord_path): return { "status": "error", "error_message": f"Linking record file '{linkingrecord_filename}' not found in {working_dir}." } - + # Load and validate linkingrecord.json try: with open(linkingrecord_path, 'r', encoding='utf-8') as f: @@ -47,173 +48,175 @@ def run_check(config: Dict[str, Any]) -> Dict[str, Any]: "status": "error", "error_message": f"Error reading '{linkingrecord_filename}': {str(e)}" } - + # Validate basic structure if not isinstance(linkingrecord, dict): return { "status": "error", "error_message": "Invalid linkingrecord.json structure: root is not an object." } - + if "items" not in linkingrecord or not isinstance(linkingrecord["items"], list): return { "status": "error", "error_message": "Invalid linkingrecord.json structure: 'items' missing or not a list." } - - # Extract exterior features - exterior_vs_codes = set() - exterior_acm_codes = set() - + + # STEP 1-3: Build "Source of Truth" - Extract unique [ACM, VS] pairs from exterior and showroom + source_of_truth = set() + for item in linkingrecord["items"]: if not isinstance(item, dict): continue - + conditions = item.get("conditions", {}) if not isinstance(conditions, dict): continue - - # Look for exterior viewtype items + viewtype = conditions.get("viewtype") - if viewtype == "exterior": + imagetype = conditions.get("imagetype") + + # Look for exterior or showroom sections + # This includes: viewtype="exterior" (with any imagetype or no imagetype) + # and viewtype="showroom" (if it exists as standalone) + is_exterior_or_showroom = ( + viewtype == "exterior" or + viewtype == "showroom" or + (viewtype == "exterior" and imagetype == "showroom") + ) + + if is_exterior_or_showroom: records = item.get("records", []) if not isinstance(records, list): continue - + for record in records: if not isinstance(record, dict): continue - + features = record.get("features", []) if not isinstance(features, list): continue - - # Extract VS- and ACM codes from features + + # Extract the actual [ACM, VS] pair from this record's features + # Each record represents ONE combination, not a cross-product + vs_code = None + acm_code = None + for feature in features: if isinstance(feature, str): if feature.startswith("vs-"): - exterior_vs_codes.add(feature) + vs_code = feature elif feature.startswith("acm"): - exterior_acm_codes.add(feature) - - # Check if we found any VS- or ACM codes in exterior - if not exterior_vs_codes and not exterior_acm_codes: + acm_code = feature + + # If this record has both ACM and VS, add the pair to source of truth + if vs_code and acm_code: + source_of_truth.add((acm_code, vs_code)) + + # Check if we found any valid combinations + if not source_of_truth: return { - "status": "failed", + "status": "passed", "details": { - "message": "No VS- or ACM codes found in exterior features." + "message": "No VS/ACM combinations found in exterior or showroom sections. Check may not be applicable to this pack type.", + "source_combinations_count": 0 } } - - if not exterior_vs_codes: - return { - "status": "failed", - "details": { - "message": "No VS- codes found in exterior features." - } - } - - if not exterior_acm_codes: - return { - "status": "failed", - "details": { - "message": "No ACM codes found in exterior features." - } - } - - # Generate expected permutations (Cartesian product) - expected_permutations = set() - for vs_code in exterior_vs_codes: - for acm_code in exterior_acm_codes: - expected_permutations.add((vs_code, acm_code)) - - # Extract series permutations - series_permutations = set() + + # STEP 4: Extract actual [ACM, VS] pairs from series section + series_combinations = set() series_section_found = False - + for item in linkingrecord["items"]: if not isinstance(item, dict): continue - + conditions = item.get("conditions", {}) if not isinstance(conditions, dict): continue - - # Look for series imagetype items + + viewtype = conditions.get("viewtype") imagetype = conditions.get("imagetype") - if imagetype == "series": + + # Look for series section: viewtype="carousel" AND imagetype="series" + if viewtype == "carousel" and imagetype == "series": series_section_found = True records = item.get("records", []) if not isinstance(records, list): continue - + for record in records: if not isinstance(record, dict): continue - + features = record.get("features", []) if not isinstance(features, list): continue - - # Extract VS- and ACM codes from this series record - vs_codes_in_record = [] - acm_codes_in_record = [] - + + # Extract the actual [ACM, VS] pair from this series record + # Each series record represents ONE combination, not a cross-product + vs_code = None + acm_code = None + for feature in features: if isinstance(feature, str): if feature.startswith("vs-"): - vs_codes_in_record.append(feature) + vs_code = feature elif feature.startswith("acm"): - acm_codes_in_record.append(feature) - - # Create permutations for this record (each VS- with each ACM in the same record) - for vs_code in vs_codes_in_record: - for acm_code in acm_codes_in_record: - series_permutations.add((vs_code, acm_code)) - + acm_code = feature + + # If this record has both ACM and VS, add the pair + if vs_code and acm_code: + series_combinations.add((acm_code, vs_code)) + # Check if series section exists if not series_section_found: return { "status": "failed", "details": { - "message": "No 'series' section found in linkingrecord." + "message": "No series section found (viewtype='carousel', imagetype='series') in linkingrecord.", + "source_combinations_count": len(source_of_truth), + "source_combinations": [f"({acm}, {vs})" for acm, vs in sorted(source_of_truth)] } } - - # Compare expected vs actual permutations - missing_permutations = expected_permutations - series_permutations - extra_permutations = series_permutations - expected_permutations - - # Generate detailed error message if there are mismatches - if missing_permutations or extra_permutations: + + # STEP 5: Compare - every combination in source of truth must have a series image + missing_combinations = source_of_truth - series_combinations + extra_combinations = series_combinations - source_of_truth + + # Generate detailed report + if missing_combinations: error_details = { - "message": "Series permutation mismatch detected.", - "expected_permutations_count": len(expected_permutations), - "actual_permutations_count": len(series_permutations) + "message": "Series validation failed: Some VS/ACM combinations from exterior/showroom are missing series images.", + "source_combinations_count": len(source_of_truth), + "series_combinations_count": len(series_combinations), + "missing_count": len(missing_combinations), + "missing_combinations": [f"({acm}, {vs})" for acm, vs in sorted(missing_combinations)] } - - if missing_permutations: - missing_list = [f"({vs}, {acm})" for vs, acm in sorted(missing_permutations)] - error_details["missing_permutations"] = missing_list - error_details["missing_count"] = len(missing_permutations) - - if extra_permutations: - extra_list = [f"({vs}, {acm})" for vs, acm in sorted(extra_permutations)] - error_details["extra_permutations"] = extra_list - error_details["extra_count"] = len(extra_permutations) - + + if extra_combinations: + error_details["extra_count"] = len(extra_combinations) + error_details["extra_combinations"] = [f"({acm}, {vs})" for acm, vs in sorted(extra_combinations)] + error_details["extra_note"] = "Series section contains combinations not found in exterior/showroom." + return { "status": "failed", "details": error_details } - - # All permutations match + + # All required combinations are present in series + success_details = { + "message": "All VS/ACM combinations from exterior and showroom have corresponding series images.", + "validated_combinations_count": len(source_of_truth), + "source_combinations": [f"({acm}, {vs})" for acm, vs in sorted(source_of_truth)] + } + + if extra_combinations: + success_details["note"] = f"{len(extra_combinations)} additional series combinations found that don't appear in exterior/showroom." + success_details["extra_combinations"] = [f"({acm}, {vs})" for acm, vs in sorted(extra_combinations)] + return { "status": "passed", - "details": { - "message": "All exterior VS- and ACM feature permutations have corresponding series entries.", - "permutations_validated": len(expected_permutations), - "vs_codes_found": sorted(list(exterior_vs_codes)), - "acm_codes_found": sorted(list(exterior_acm_codes)) - } + "details": success_details } \ No newline at end of file diff --git a/checks/html_reporter.py b/checks/html_reporter.py index c7eda31..2ada65b 100755 --- a/checks/html_reporter.py +++ b/checks/html_reporter.py @@ -283,10 +283,32 @@ class HTMLReporter: # Add skipped types information if present if 'skipped_types' in details: formatted_parts.append(HTMLReporter._format_skipped_types(details)) - + + # Format new error categories first (Check 1 and Check 3 enhancements) + if 'unauthorized_types_in_linking_record' in details: + formatted_parts.append(HTMLReporter._format_unauthorized_types(details['unauthorized_types_in_linking_record'])) + if 'unauthorized_file_types' in details: + formatted_parts.append(HTMLReporter._format_unauthorized_types(details['unauthorized_file_types'])) + + if 'case_violations_in_linking_record' in details: + formatted_parts.append(HTMLReporter._format_case_violations(details['case_violations_in_linking_record'])) + if 'case_violations' in details and isinstance(details['case_violations'], list): + formatted_parts.append(HTMLReporter._format_case_violations(details['case_violations'])) + + if 'extraneous_files_in_filesystem' in details: + formatted_parts.append(HTMLReporter._format_extraneous_files(details['extraneous_files_in_filesystem'])) + # Format different detail types if 'missing_files' in details: - formatted_parts.append(HTMLReporter._format_missing_files(details['missing_files'])) + # Handle both old format (list of strings) and new format (list of dicts) + missing_files = details['missing_files'] + if missing_files and isinstance(missing_files[0], dict): + # New format: extract filenames from dict + filenames = [f.get('filename', f) for f in missing_files] + formatted_parts.append(HTMLReporter._format_missing_files(filenames)) + else: + # Old format: list of strings + formatted_parts.append(HTMLReporter._format_missing_files(missing_files)) elif 'failed_images' in details: # Determine which formatter to use based on check_type or content if check_type == "image_resolution_check" or any('expected_resolution' in img for img in details['failed_images'] if isinstance(img, dict)): @@ -446,6 +468,153 @@ class HTMLReporter:
{json.dumps(items, indent=2, default=str)}
''' + @staticmethod + def _format_unauthorized_types(items: list) -> str: + """Format unauthorized file types in linking record.""" + try: + if not items: + return "" + + return f''' +
+
Unauthorized File Types in Linking Record ({len(items)})
+
+ Issue: The linking record references file types that are not allowed in asset packs. +
+ + + + + + + + + + + + {''.join(f""" + + + + + + + + """ for idx, item in enumerate(items))} + +
#FilenameExtensionLocation in JSONMessage
{idx+1}{item.get('filename', 'N/A')}{item.get('extension', 'N/A')}{item.get('location', 'N/A')}{item.get('message', 'N/A')}
+
+ Fix: Update the linking record to only reference allowed file types: .jpg, .jpeg, .png, .AVIF, .json +
+
+ ''' + except Exception as e: + logging.warning(f"Error formatting unauthorized types: {e}") + return f''' +
+ Error formatting unauthorized types: {str(e)} +
+
{json.dumps(items, indent=2, default=str)}
+ ''' + + @staticmethod + def _format_case_violations(items: list) -> str: + """Format case violations (AVIF extensions not uppercase).""" + try: + if not items: + return "" + + return f''' +
+
Case Violations - AVIF Extensions Must Be Uppercase ({len(items)})
+
+ Issue: AVIF file extensions must be uppercase (.AVIF) in the linking record. +
+ + + + + + + + + + + + {''.join(f""" + + + + + + + + """ for idx, item in enumerate(items))} + +
#FilenameFound ExtensionRequired ExtensionLocation
{idx+1}{item.get('filename', 'N/A')}{item.get('found_extension', 'N/A')}{item.get('required_extension', 'N/A')}{item.get('location', item.get('source', 'N/A'))}
+
+ Fix: Update all AVIF extensions in the linking record to uppercase (.AVIF) +
+
+ ''' + except Exception as e: + logging.warning(f"Error formatting case violations: {e}") + return f''' +
+ Error formatting case violations: {str(e)} +
+
{json.dumps(items, indent=2, default=str)}
+ ''' + + @staticmethod + def _format_extraneous_files(items: list) -> str: + """Format extraneous files found in filesystem.""" + try: + if not items: + return "" + + return f''' +
+
Extraneous Files in Filesystem ({len(items)})
+
+ Issue: Unauthorized file types found in the asset pack that are not allowed. +
+ + + + + + + + + + + + {''.join(f""" + + + + + + + + """ for idx, item in enumerate(items))} + +
#PathFilenameExtensionSize (bytes)
{idx+1}{item.get('path', 'N/A')}{item.get('filename', 'N/A')}{item.get('extension', 'N/A')}{item.get('size_bytes', 0):,}
+
+ Fix: Remove these files from the asset pack. Only .jpg, .jpeg, .png, .AVIF, and .json files are allowed. +
+
+ ''' + except Exception as e: + logging.warning(f"Error formatting extraneous files: {e}") + return f''' +
+ Error formatting extraneous files: {str(e)} +
+
{json.dumps(items, indent=2, default=str)}
+ ''' + @staticmethod def _format_format_failed_images(images: list) -> str: try: diff --git a/checks/image_format_check.py b/checks/image_format_check.py index 3f26df4..26a183a 100644 --- a/checks/image_format_check.py +++ b/checks/image_format_check.py @@ -188,12 +188,25 @@ def run_check(config): # Check actual image format # Special handling for AVIF files - use extension validation since PIL may not support them if not is_base_asset_flag and expected_format == "AVIF": - # For AVIF files, validate by extension instead of PIL - if filename.lower().endswith('.avif'): - # AVIF file with correct extension - pass validation + # For AVIF files, validate by extension - MUST be uppercase .AVIF + if filename.endswith('.AVIF'): + # AVIF file with correct uppercase extension - pass validation + continue + elif filename.lower().endswith('.avif'): + # AVIF extension but wrong case (lowercase or mixed case) + if filename not in failed_images_dict: + _, ext = os.path.splitext(filename) + failed_images_dict[filename] = { + "filename": filename, + "viewtype": viewtype, + "imagetype": imagetype, + "expected_format": "AVIF (uppercase .AVIF extension required)", + "actual_format": f"Case violation: found '{ext}' instead of '.AVIF'", + "issue": "case_violation" + } continue else: - # AVIF expected but wrong extension + # AVIF expected but completely wrong extension if filename not in failed_images_dict: failed_images_dict[filename] = { "filename": filename, diff --git a/checks/missing_images_check.py b/checks/missing_images_check.py index 893b95e..c525b2b 100755 --- a/checks/missing_images_check.py +++ b/checks/missing_images_check.py @@ -3,7 +3,7 @@ import json def run_check(config): """ - Check for missing images defined in linkingrecord.json. + Check for missing images defined in linkingrecord.json with enhanced validation. Expected config: - working_dir: Directory where linkingrecord.json and extracted files are located. @@ -12,9 +12,10 @@ def run_check(config): Behavior: - Load linkingrecord.json from working_dir. - Iterate through items->records->assets->filename. + - Pre-validate that file types are allowed and AVIF extensions are uppercase. - Check if file exists at working_dir/filename. - - If any missing files, return "failed" with a list of missing files. - - Otherwise, return "passed". + - Categorize issues as: unauthorized types, case violations, or missing files. + - Return "failed" if any issues found, otherwise "passed". """ working_dir = config.get("working_dir", "working") @@ -37,31 +38,96 @@ def run_check(config): "error_message": "Invalid linkingrecord.json structure: 'items' missing or not a list." } - missing_files = set() + # Allowed file extensions - note uppercase .AVIF + allowed_extensions = {'.jpg', '.jpeg', '.png', '.AVIF', '.json'} + + # Track different types of issues + unauthorized_types = [] + case_violations = [] + missing_files = [] # Iterate over all items and their records - for item in linkingrecord["items"]: + for item_idx, item in enumerate(linkingrecord["items"]): records = item.get("records", []) - for record in records: + for record_idx, record in enumerate(records): assets = record.get("assets", []) - for asset in assets: + for asset_idx, asset in enumerate(assets): filename = asset.get("filename") - if filename: - file_path = os.path.join(working_dir, filename) - if not os.path.exists(file_path): - missing_files.add(filename) + if not filename: + continue + + # Get the file extension + _, ext = os.path.splitext(filename) + + # Pre-validation: Check if file type is allowed + if ext not in allowed_extensions: + # Special check: if it's lowercase .avif, it's a case violation not unauthorized + if ext.lower() == '.avif': + case_violations.append({ + "filename": filename, + "found_extension": ext, + "required_extension": ".AVIF", + "message": "AVIF extensions must be uppercase .AVIF", + "location": f"items[{item_idx}].records[{record_idx}].assets[{asset_idx}]" + }) + else: + unauthorized_types.append({ + "filename": filename, + "extension": ext, + "message": f"File type '{ext}' is not allowed in asset packs", + "allowed_types": list(allowed_extensions), + "location": f"items[{item_idx}].records[{record_idx}].assets[{asset_idx}]" + }) + # Skip file existence check for invalid types + continue + + # Check if file exists (only for valid file types) + file_path = os.path.join(working_dir, filename) + if not os.path.exists(file_path): + missing_files.append({ + "filename": filename, + "message": "File requested in linking record but not found in pack", + "expected_path": filename + }) + + # Determine overall status + has_issues = len(unauthorized_types) > 0 or len(case_violations) > 0 or len(missing_files) > 0 + + if has_issues: + # Build error message + error_parts = [] + if len(unauthorized_types) > 0: + error_parts.append(f"{len(unauthorized_types)} unauthorized file type(s)") + if len(case_violations) > 0: + error_parts.append(f"{len(case_violations)} case violation(s)") + if len(missing_files) > 0: + error_parts.append(f"{len(missing_files)} missing file(s)") + + details = { + "message": "Issues found in linking record: " + ", ".join(error_parts), + "allowed_types": list(allowed_extensions) + } + + if len(unauthorized_types) > 0: + details["unauthorized_file_types"] = unauthorized_types + details["total_unauthorized"] = len(unauthorized_types) + + if len(case_violations) > 0: + details["case_violations"] = case_violations + details["total_case_violations"] = len(case_violations) + + if len(missing_files) > 0: + details["missing_files"] = missing_files + details["total_missing"] = len(missing_files) - if missing_files: return { "status": "failed", - "details": { - "missing_files": sorted(list(missing_files)) - } + "details": details } return { "status": "passed", "details": { - "message": "All referenced images exist." + "message": "All referenced images exist and use valid file types." } } \ No newline at end of file diff --git a/checks/unzip_and_verify_check.py b/checks/unzip_and_verify_check.py index 7d7d821..c58fab6 100755 --- a/checks/unzip_and_verify_check.py +++ b/checks/unzip_and_verify_check.py @@ -109,6 +109,64 @@ def validate_linkingrecord_header(linkingrecord_data): return len(errors) == 0, errors +def validate_linking_record_filetypes(linkingrecord_data): + """ + Validate that all filenames referenced in the linking record use allowed file types + and that AVIF extensions are uppercase only. + + Args: + linkingrecord_data (dict): The parsed linkingrecord JSON data + + Returns: + tuple: (is_valid: bool, violations: list) + """ + # Allowed extensions in linking record - note uppercase .AVIF + allowed_extensions = {'.jpg', '.jpeg', '.png', '.AVIF', '.json'} + + violations = [] + + # Extract all filenames from items → records → assets + items = linkingrecord_data.get("items", []) + + for item_idx, item in enumerate(items): + records = item.get("records", []) + + for record_idx, record in enumerate(records): + assets = record.get("assets", []) + + for asset_idx, asset in enumerate(assets): + filename = asset.get("filename", "") + + if not filename: + continue # Skip empty filenames + + # Get the file extension + _, ext = os.path.splitext(filename) + + # Check for unauthorized file types + if ext not in allowed_extensions: + # Special check: if it's lowercase .avif, it's a case violation not unauthorized + if ext.lower() == '.avif': + violations.append({ + "filename": filename, + "issue": "case_violation", + "found_extension": ext, + "required_extension": ".AVIF", + "message": "AVIF extensions must be uppercase .AVIF", + "location": f"items[{item_idx}].records[{record_idx}].assets[{asset_idx}]" + }) + else: + violations.append({ + "filename": filename, + "issue": "unauthorized_type", + "extension": ext, + "message": f"File type '{ext}' is not allowed in asset packs", + "allowed_types": list(allowed_extensions), + "location": f"items[{item_idx}].records[{record_idx}].assets[{asset_idx}]" + }) + + return len(violations) == 0, violations + def run_check(config): # We expect config to contain: # - input_file: The path to the zip file to unzip @@ -219,6 +277,10 @@ def run_check(config): } } + # Initialize validation results + filetypes_valid = True + filetype_violations = [] + # Validate header fields if this is a linkingrecord.json file if expected_file.lower() == 'linkingrecord.json': is_valid, header_errors = validate_linkingrecord_header(linkingrecord_data) @@ -231,17 +293,57 @@ def run_check(config): } } + # Validate file types referenced in linking record + filetypes_valid, filetype_violations = validate_linking_record_filetypes(linkingrecord_data) + # Scan for extraneous files (files not in the allowed whitelist) extraneous_files = scan_for_extraneous_files(working_dir) - if extraneous_files: + + # Check if we have any violations + has_filetype_violations = not filetypes_valid + has_extraneous_files = len(extraneous_files) > 0 + + if has_filetype_violations or has_extraneous_files: + # Categorize violations + case_violations = [] + unauthorized_types = [] + + if has_filetype_violations: + for violation in filetype_violations: + if violation["issue"] == "case_violation": + case_violations.append(violation) + else: + unauthorized_types.append(violation) + + # Build error message + error_parts = [] + if len(unauthorized_types) > 0: + error_parts.append(f"{len(unauthorized_types)} unauthorized file type(s) in linking record") + if len(case_violations) > 0: + error_parts.append(f"{len(case_violations)} case violation(s) in linking record") + if has_extraneous_files: + error_parts.append(f"{len(extraneous_files)} extraneous file(s) in filesystem") + + details = { + "message": "Issues found: " + ", ".join(error_parts), + "allowed_types": [".jpg", ".jpeg", ".png", ".AVIF", ".json"] + } + + if len(unauthorized_types) > 0: + details["unauthorized_types_in_linking_record"] = unauthorized_types + details["total_unauthorized_types"] = len(unauthorized_types) + + if len(case_violations) > 0: + details["case_violations_in_linking_record"] = case_violations + details["total_case_violations"] = len(case_violations) + + if has_extraneous_files: + details["extraneous_files_in_filesystem"] = extraneous_files + details["total_extraneous_files"] = len(extraneous_files) + return { "status": "failed", - "details": { - "message": "Extraneous files found that are not allowed in asset packs.", - "extraneous_files": extraneous_files, - "allowed_types": [".jpg", ".jpeg", ".png", ".avif", ".json"], - "total_extraneous_count": len(extraneous_files) - } + "details": details } return {