ai_qc/visual_qc_apps/utils.py

import os
import base64
import io
import time
from PIL import Image
import fitz  # PyMuPDF
import cv2   # OpenCV for video frames

# Import from centralized LLM configuration
from llm_config import run_visual_qc, pil_image_to_base64, get_model_info

# --- Helper Functions ---
def get_image_from_asset(asset_path, target_size=(1024, 1024)):
    """
    Loads an image from various asset types (image, pdf, video).
    Extracts the first page/frame and returns a PIL Image object.
    Resizes the image if it's larger than target_size while maintaining aspect ratio.
    """
    try:
        file_extension = os.path.splitext(asset_path)[1].lower()
        pil_image = None

        if file_extension in ['.png', '.jpg', '.jpeg', '.bmp', '.webp', '.gif', '.tiff']:
            pil_image = Image.open(asset_path).convert('RGB')
        elif file_extension == '.pdf':
            doc = fitz.open(asset_path)
            if doc.page_count > 0:
                page = doc.load_page(0)  # Load the first page
                # Render page to a pixmap at a reasonable DPI
                zoom = 2.0  # Increase DPI for better quality (150 DPI)
                mat = fitz.Matrix(zoom, zoom)
                pix = page.get_pixmap(matrix=mat, alpha=False)  # alpha=False for RGB
                pil_image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            doc.close()
        elif file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv']:
            cap = cv2.VideoCapture(asset_path)
            if cap.isOpened():
                ret, frame = cap.read()
                if ret:
                    # Convert OpenCV frame (BGR) to PIL Image (RGB)
                    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    pil_image = Image.fromarray(frame_rgb)
            cap.release()

        if pil_image:
            # Resize image if it's too large for the API, maintaining aspect ratio
            pil_image.thumbnail(target_size, Image.Resampling.LANCZOS)
            return pil_image
        else:
            print(f"Unsupported file type or error loading: {asset_path}")
            return None

    except Exception as e:
        print(f"Error processing asset {asset_path}: {e}")
        return None