55 lines
No EOL
2.2 KiB
Python
Executable file
55 lines
No EOL
2.2 KiB
Python
Executable file
import os
|
|
import base64
|
|
import io
|
|
import time
|
|
from PIL import Image
|
|
import fitz # PyMuPDF
|
|
import cv2 # OpenCV for video frames
|
|
|
|
# Import from centralized LLM configuration
|
|
from llm_config import run_visual_qc, pil_image_to_base64, get_model_info
|
|
|
|
# --- Helper Functions ---
|
|
def get_image_from_asset(asset_path, target_size=(1024, 1024)):
|
|
"""
|
|
Loads an image from various asset types (image, pdf, video).
|
|
Extracts the first page/frame and returns a PIL Image object.
|
|
Resizes the image if it's larger than target_size while maintaining aspect ratio.
|
|
"""
|
|
try:
|
|
file_extension = os.path.splitext(asset_path)[1].lower()
|
|
pil_image = None
|
|
|
|
if file_extension in ['.png', '.jpg', '.jpeg', '.bmp', '.webp', '.gif', '.tiff']:
|
|
pil_image = Image.open(asset_path).convert('RGB')
|
|
elif file_extension == '.pdf':
|
|
doc = fitz.open(asset_path)
|
|
if doc.page_count > 0:
|
|
page = doc.load_page(0) # Load the first page
|
|
# Render page to a pixmap at a reasonable DPI
|
|
zoom = 2.0 # Increase DPI for better quality (150 DPI)
|
|
mat = fitz.Matrix(zoom, zoom)
|
|
pix = page.get_pixmap(matrix=mat, alpha=False) # alpha=False for RGB
|
|
pil_image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
|
doc.close()
|
|
elif file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv']:
|
|
cap = cv2.VideoCapture(asset_path)
|
|
if cap.isOpened():
|
|
ret, frame = cap.read()
|
|
if ret:
|
|
# Convert OpenCV frame (BGR) to PIL Image (RGB)
|
|
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
pil_image = Image.fromarray(frame_rgb)
|
|
cap.release()
|
|
|
|
if pil_image:
|
|
# Resize image if it's too large for the API, maintaining aspect ratio
|
|
pil_image.thumbnail(target_size, Image.Resampling.LANCZOS)
|
|
return pil_image
|
|
else:
|
|
print(f"Unsupported file type or error loading: {asset_path}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
print(f"Error processing asset {asset_path}: {e}")
|
|
return None |