ford_qc/checks/image_format_check.py

import os
import json
import logging
from typing import Dict, Any, Set, Tuple, Optional
from PIL import Image
from utils.check_helpers import record_skipped_type, prepare_skipped_result

def run_check(config):
    """
    Check that each image in linkingrecord.json matches the expected file format (PNG, JPG, or AVIF)
    based on the viewtype and imagetype conditions.

    Expected config:
    - working_dir (str): Directory where linkingrecord.json and images reside.
    - linkingrecord_filename (str): Name of the linking record file. Default: linkingrecord.json

    Returns:
    - "passed" if all checked images match their expected format
    - "passed" with skipped_types details if unknown type combinations were encountered
    - "failed" if any present images don't match, listing each that failed
    - "error" if linkingrecord is missing or invalid
    """

    working_dir = config.get("working_dir", "working")
    linkingrecord_filename = config.get("linkingrecord_filename", "linkingrecord.json")
    linkingrecord_path = os.path.join(working_dir, linkingrecord_filename)

    if not os.path.exists(linkingrecord_path):
        return {
            "status": "error",
            "error_message": f"Linking record '{linkingrecord_filename}' not found in {working_dir}."
        }

    # Load linkingrecord.json
    with open(linkingrecord_path, 'r', encoding='utf-8') as f:
        linkingrecord = json.load(f)

    if "items" not in linkingrecord or not isinstance(linkingrecord["items"], list):
        return {
            "status": "error",
            "error_message": "Invalid linkingrecord.json structure: 'items' missing or not a list."
        }

    # Detect if this is a MEC pack (contains experienceCondition="2d-background")
    is_mec = any(
        item.get("conditions", {}).get("experienceCondition") == "2d-background"
        for item in linkingrecord["items"]
    )
    pack_type = "MEC" if is_mec else "BAU"

    # Define format requirements (base assets handled separately)
    # Format: (viewtype, imagetype): expected_format
    format_requirements = {
        # Base assets ('exterior', None) and ('interior', None) are handled with flexible logic below
        ('exterior', 'layeroptext'): 'PNG',
        ('interior', 'layeroptint'): 'PNG',
        ('carousel', 'extra'): 'JPEG',
        ('carousel', 'powertrain'): 'JPEG',  # BAU powertrain only
        ('exterior', 'showroom'): 'PNG',
        ('carousel', 'colour'): 'JPEG',
        ('carousel', 'bodystyle'): 'JPEG',
        ('exterior', 'series'): 'JPEG',  # Per latest guidance (newer format)
        ('carousel', 'series'): 'JPEG',  # Legacy format (older packs)
        ('carousel', 'trim'): 'JPEG',
        # Adding lifestyle and inventory requirements
        ('lifestyle', None): 'JPEG',
        ('inventory', None): 'JPEG',
        # Adding beltline requirements (desktop and mobile variants)
        ('vehicleselector', 'desktop'): 'AVIF',
        ('vehicleselector', 'mobile'): 'AVIF',
    }

    def is_base_asset(viewtype, imagetype):
        """Check if this is a base exterior or interior asset."""
        return (viewtype == 'exterior' and imagetype is None) or (viewtype == 'interior' and imagetype is None)

    def is_mec_powertrain(viewtype, imagetype, conditions):
        """
        Check if this is a MEC powertrain image.
        MEC powertrains are identified by:
        - viewtype="exterior"
        - imagetype=None
        - experienceCondition="2d-background"
        - angle=30
        """
        return (
            viewtype == 'exterior' and
            imagetype is None and
            conditions.get('experienceCondition') == '2d-background' and
            conditions.get('angle') == 30
        )

    def validate_base_asset_format(filename, actual_format, asset_type, viewtype):
        """
        Validate base asset format with flexible rules:
        - MEC base assets (with experienceCondition="2d-background"): Strict JPG only (fail if PNG)
        - BAU base assets (without experienceCondition): JPG preferred, PNG acceptable with warning

        Args:
            filename: The image filename
            actual_format: The detected image format (e.g., "JPEG", "PNG")
            asset_type: "MEC" or "BAU" - indicates if this specific asset has experienceCondition
            viewtype: The viewtype of the asset (e.g., "exterior", "interior")

        Returns: (is_valid, warning_message)
        """
        if asset_type == "MEC":
            # MEC base assets must be JPG only
            if actual_format != "JPEG":
                return False, None
            return True, None
        else:  # BAU asset
            # BAU base assets: JPG preferred, PNG acceptable with warning
            if actual_format == "JPEG":
                return True, None
            elif actual_format == "PNG":
                return True, f"PNG format found but JPG format is preferred for {viewtype} base assets. Please consider changing this to JPG to match business requirements."
            else:
                return False, None

    # Use a dictionary to avoid duplicates (keyed by filename)
    failed_images_dict = {}
    warnings_list = []

    # Track skipped viewtype/imagetype combinations
    skipped_types: Dict[str, Set[Tuple[str, Optional[str]]]] = {"image_format_check": set()}

    for item in linkingrecord["items"]:
        conditions = item.get("conditions", {})
        viewtype = conditions.get("viewtype")
        imagetype = conditions.get("imagetype")

        # Skip items with missing viewtype
        if not viewtype:
            logging.debug(f"Skipping item with missing viewtype: {conditions}")
            continue

        # Determine asset type and validation strategy
        is_mec_powertrain_asset = False
        is_base_asset_flag = False
        base_asset_type = None
        expected_format = None

        # Check 1: Is this a MEC powertrain? (must check before base asset check)
        if is_mec_powertrain(viewtype, imagetype, conditions):
            is_mec_powertrain_asset = True
            expected_format = "JPEG"

        # Check 2: Is this a base asset? (exterior/interior with no imagetype)
        elif is_base_asset(viewtype, imagetype):
            is_base_asset_flag = True
            # Determine if this specific base asset is MEC or BAU version
            # MEC version has experienceCondition="2d-background" (but not angle=30)
            if conditions.get('experienceCondition') == '2d-background':
                base_asset_type = "MEC"
            else:
                base_asset_type = "BAU"
            # expected_format will be handled in the validation function

        # Check 3: Regular assets - look up in format requirements
        else:
            key = (viewtype, imagetype)

            if key in format_requirements:
                expected_format = format_requirements[key]
            # Try with None imagetype as fallback
            elif (viewtype, None) in format_requirements:
                expected_format = format_requirements[(viewtype, None)]

            # If no requirement is found, record it and skip this item
            if expected_format is None:
                record_skipped_type(skipped_types, "image_format_check", viewtype, imagetype)
                continue

        records = item.get("records", [])
        for record in records:
            assets = record.get("assets", [])
            for asset in assets:
                filename = asset.get("filename")
                if not filename:
                    continue

                image_path = os.path.join(working_dir, filename)
                if not os.path.exists(image_path):
                    # Ignore missing files for this check
                    continue

                # Check actual image format
                # Special handling for AVIF files - use extension validation since PIL may not support them
                if not is_base_asset_flag and expected_format == "AVIF":
                    # For AVIF files, validate by extension - MUST be uppercase .AVIF
                    if filename.endswith('.AVIF'):
                        # AVIF file with correct uppercase extension - pass validation
                        continue
                    elif filename.lower().endswith('.avif'):
                        # AVIF extension but wrong case (lowercase or mixed case)
                        if filename not in failed_images_dict:
                            _, ext = os.path.splitext(filename)
                            failed_images_dict[filename] = {
                                "filename": filename,
                                "viewtype": viewtype,
                                "imagetype": imagetype,
                                "expected_format": "AVIF (uppercase .AVIF extension required)",
                                "actual_format": f"Case violation: found '{ext}' instead of '.AVIF'",
                                "issue": "case_violation"
                            }
                        continue
                    else:
                        # AVIF expected but completely wrong extension
                        if filename not in failed_images_dict:
                            failed_images_dict[filename] = {
                                "filename": filename,
                                "viewtype": viewtype,
                                "imagetype": imagetype,
                                "expected_format": expected_format,
                                "actual_format": "Non-AVIF extension"
                            }
                        continue

                # Use PIL validation for all other formats
                try:
                    with Image.open(image_path) as img:
                        actual_format = img.format

                        if is_base_asset_flag:
                            # Use flexible validation for base assets
                            is_valid, warning_message = validate_base_asset_format(filename, actual_format, base_asset_type, viewtype)

                            if not is_valid:
                                # Only add an entry if this filename not yet in the dict
                                if filename not in failed_images_dict:
                                    expected_desc = "JPEG" if base_asset_type == "MEC" else "JPEG (preferred) or PNG (acceptable)"
                                    failed_images_dict[filename] = {
                                        "filename": filename,
                                        "viewtype": viewtype,
                                        "imagetype": imagetype if imagetype else "base",
                                        "expected_format": expected_desc,
                                        "actual_format": actual_format,
                                        "asset_type": base_asset_type
                                    }
                            elif warning_message:
                                # Add warning but don't fail
                                warnings_list.append({
                                    "filename": filename,
                                    "message": warning_message
                                })
                        elif is_mec_powertrain_asset:
                            # Validate MEC powertrain format (strict JPEG)
                            if actual_format != expected_format:
                                if filename not in failed_images_dict:
                                    failed_images_dict[filename] = {
                                        "filename": filename,
                                        "viewtype": viewtype,
                                        "imagetype": "powertrain (MEC)",
                                        "expected_format": expected_format,
                                        "actual_format": actual_format
                                    }
                        else:
                            # Standard validation for regular assets
                            # PIL reports "JPEG" for JPG files
                            if actual_format != expected_format:
                                # Only add an entry if this filename not yet in the dict
                                if filename not in failed_images_dict:
                                    failed_images_dict[filename] = {
                                        "filename": filename,
                                        "viewtype": viewtype,
                                        "imagetype": imagetype,
                                        "expected_format": expected_format,
                                        "actual_format": actual_format
                                    }

                except Exception as e:
                    # If we can't open the image, mark it as failed
                    if filename not in failed_images_dict:
                        # Determine expected description based on asset type
                        if is_base_asset_flag:
                            expected_desc = "JPEG" if base_asset_type == "MEC" else "JPEG (preferred) or PNG (acceptable)"
                        elif expected_format:
                            expected_desc = expected_format
                        else:
                            expected_desc = "Unknown"

                        failed_images_dict[filename] = {
                            "filename": filename,
                            "viewtype": viewtype,
                            "imagetype": imagetype if imagetype else ("base" if is_base_asset_flag else None),
                            "expected_format": expected_desc,
                            "actual_format": f"Error: {str(e)}"
                        }

    # Convert dict of failures to a list
    if failed_images_dict:
        result = {
            "status": "failed",
            "details": {
                "message": "Some images are not in the expected file format.",
                "failed_images": list(failed_images_dict.values()),
                "pack_type": pack_type
            }
        }

        # Add warnings to failed result if any exist
        if warnings_list:
            result["details"]["warnings"] = warnings_list

        # Add skipped types to the result if any were found
        if skipped_types["image_format_check"]:
            result["details"]["skipped_types_message"] = f"{len(skipped_types['image_format_check'])} unknown viewtype/imagetype combinations were skipped."
            result["details"]["skipped_types"] = [
                {"viewtype": vt, "imagetype": it if it is not None else "None"}
                for vt, it in skipped_types["image_format_check"]
            ]

        return result

    # No failures - construct success result
    base_message = "All present images match their required file format."

    # If we have warnings, it's still a pass but with warnings
    result = {
        "status": "passed",
        "details": {
            "message": base_message,
            "pack_type": pack_type
        }
    }

    # Add warnings to successful result if any exist
    if warnings_list:
        result["details"]["warnings"] = warnings_list
        result["details"]["message"] = f"{base_message} Some recommendations noted below."

    # If we skipped any types, add that to the result
    if skipped_types["image_format_check"]:
        result["details"]["skipped_types_message"] = f"{len(skipped_types['image_format_check'])} unknown viewtype/imagetype combinations were skipped."
        result["details"]["skipped_types"] = [
            {"viewtype": vt, "imagetype": it if it is not None else "None"}
            for vt, it in skipped_types["image_format_check"]
        ]

    return result