PDFs are now converted to PNG images at 200 DPI before being sent to Gemini for analysis. This fixes the unreliable iframe-based PDF preview and ensures all pages are properly analyzed. - Add PyMuPDF dependency for PDF rasterization - Create pdf_service.py with rasterize() and get_page_count() - Update agent interfaces to accept list of images for multi-page support - Add analyze_with_images() to Gemini service for multi-image analysis - Return rasterized PDF pages via WebSocket for frontend display - Add page navigation UI for multi-page PDFs in preview components Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
106 lines
3.3 KiB
Python
106 lines
3.3 KiB
Python
"""
|
|
PDF Rasterization Service.
|
|
|
|
Converts PDF pages to PNG images for analysis and display.
|
|
Uses PyMuPDF (fitz) for high-quality rasterization.
|
|
"""
|
|
|
|
import logging
|
|
from typing import List, Tuple
|
|
|
|
import fitz # PyMuPDF
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Target DPI for rasterization (150 DPI minimum required, using 200 for quality)
|
|
TARGET_DPI = 200
|
|
# Default PDF resolution is 72 DPI, so scale factor = target_dpi / 72
|
|
SCALE_FACTOR = TARGET_DPI / 72
|
|
|
|
|
|
class PDFService:
|
|
"""Service for PDF rasterization operations."""
|
|
|
|
def rasterize(
|
|
self, pdf_data: bytes, max_pages: int = 10
|
|
) -> List[Tuple[bytes, int, int]]:
|
|
"""
|
|
Convert PDF pages to PNG images.
|
|
|
|
Args:
|
|
pdf_data: Raw PDF file bytes
|
|
max_pages: Maximum number of pages to rasterize (default 10)
|
|
|
|
Returns:
|
|
List of tuples containing (png_bytes, width, height) for each page
|
|
|
|
Raises:
|
|
ValueError: If the PDF cannot be opened or is password-protected
|
|
"""
|
|
try:
|
|
logger.info(f"[PDF] Starting rasterization, max_pages={max_pages}")
|
|
doc = fitz.open(stream=pdf_data, filetype="pdf")
|
|
|
|
if doc.is_encrypted:
|
|
doc.close()
|
|
raise ValueError("Password-protected PDFs are not supported")
|
|
|
|
pages: List[Tuple[bytes, int, int]] = []
|
|
num_pages = min(doc.page_count, max_pages)
|
|
|
|
logger.info(f"[PDF] Document has {doc.page_count} pages, processing {num_pages}")
|
|
|
|
for page_num in range(num_pages):
|
|
page = doc.load_page(page_num)
|
|
|
|
# Create transformation matrix for desired DPI
|
|
mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
|
|
|
|
# Render page to pixmap (RGB)
|
|
pix = page.get_pixmap(matrix=mat, alpha=False)
|
|
|
|
# Convert to PNG bytes
|
|
png_data = pix.tobytes("png")
|
|
|
|
pages.append((png_data, pix.width, pix.height))
|
|
logger.info(
|
|
f"[PDF] Rasterized page {page_num + 1}/{num_pages}: "
|
|
f"{pix.width}x{pix.height}px at {TARGET_DPI} DPI"
|
|
)
|
|
|
|
doc.close()
|
|
logger.info(f"[PDF] Rasterization complete, {len(pages)} pages processed")
|
|
return pages
|
|
|
|
except fitz.FileDataError as e:
|
|
logger.error(f"[PDF] Invalid or corrupted PDF: {str(e)}")
|
|
raise ValueError(f"Invalid or corrupted PDF file: {str(e)}")
|
|
except Exception as e:
|
|
logger.error(f"[PDF] Rasterization error: {str(e)}")
|
|
raise ValueError(f"Failed to rasterize PDF: {str(e)}")
|
|
|
|
def get_page_count(self, pdf_data: bytes) -> int:
|
|
"""
|
|
Get the number of pages in a PDF.
|
|
|
|
Args:
|
|
pdf_data: Raw PDF file bytes
|
|
|
|
Returns:
|
|
Number of pages in the PDF
|
|
|
|
Raises:
|
|
ValueError: If the PDF cannot be opened
|
|
"""
|
|
try:
|
|
doc = fitz.open(stream=pdf_data, filetype="pdf")
|
|
count = doc.page_count
|
|
doc.close()
|
|
return count
|
|
except Exception as e:
|
|
logger.error(f"[PDF] Failed to get page count: {str(e)}")
|
|
raise ValueError(f"Failed to read PDF: {str(e)}")
|
|
|
|
|
|
# Singleton instance
|
|
pdf_service = PDFService()
|