import os import base64 import io import time from PIL import Image import fitz # PyMuPDF import cv2 # OpenCV for video frames # Import from centralized LLM configuration from llm_config import run_visual_qc, pil_image_to_base64, get_model_info # --- Helper Functions --- def get_image_from_asset(asset_path, target_size=(1024, 1024)): """ Loads an image from various asset types (image, pdf, video). Extracts the first page/frame and returns a PIL Image object. Resizes the image if it's larger than target_size while maintaining aspect ratio. """ try: file_extension = os.path.splitext(asset_path)[1].lower() pil_image = None if file_extension in ['.png', '.jpg', '.jpeg', '.bmp', '.webp', '.gif', '.tiff']: pil_image = Image.open(asset_path).convert('RGB') elif file_extension == '.pdf': doc = fitz.open(asset_path) if doc.page_count > 0: page = doc.load_page(0) # Load the first page # Render page to a pixmap at a reasonable DPI zoom = 2.0 # Increase DPI for better quality (150 DPI) mat = fitz.Matrix(zoom, zoom) pix = page.get_pixmap(matrix=mat, alpha=False) # alpha=False for RGB pil_image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) doc.close() elif file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv']: cap = cv2.VideoCapture(asset_path) if cap.isOpened(): ret, frame = cap.read() if ret: # Convert OpenCV frame (BGR) to PIL Image (RGB) frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) pil_image = Image.fromarray(frame_rgb) cap.release() if pil_image: # Resize image if it's too large for the API, maintaining aspect ratio pil_image.thumbnail(target_size, Image.Resampling.LANCZOS) return pil_image else: print(f"Unsupported file type or error loading: {asset_path}") return None except Exception as e: print(f"Error processing asset {asset_path}: {e}") return None