Add PDF rasterization support for reliable preview and analysis

PDFs are now converted to PNG images at 200 DPI before being sent to Gemini for analysis. This fixes the unreliable iframe-based PDF preview and ensures all pages are properly analyzed. - Add PyMuPDF dependency for PDF rasterization - Create pdf_service.py with rasterize() and get_page_count() - Update agent interfaces to accept list of images for multi-page support - Add analyze_with_images() to Gemini service for multi-image analysis - Return rasterized PDF pages via WebSocket for frontend display - Add page navigation UI for multi-page PDFs in preview components Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 09:36:56 -06:00 · 2026-01-24 09:36:56 -06:00 · c1030ee292
commit c1030ee292
parent caf4539e1d
14 changed files with 450 additions and 43 deletions
--- a/backend/app/agents/base_agent.py
+++ b/backend/app/agents/base_agent.py
@ -1,4 +1,6 @@
 from abc import ABC, abstractmethod
+from typing import List, Tuple
+
 from app.models.schemas import SubReview


@ -8,13 +10,14 @@ class BaseAgent(ABC):
    name: str = "Base Agent"

    @abstractmethod
-    async def analyze(self, file_data: bytes, file_type: str) -> SubReview:
+    async def analyze(self, images: List[Tuple[bytes, str]]) -> SubReview:
        """
        Analyze the proof and return a SubReview.

        Args:
-            file_data: Raw bytes of the file to analyze
-            file_type: MIME type of the file
+            images: List of (file_data, mime_type) tuples representing the proof.
+                   For single images/videos, this will contain one tuple.
+                   For multi-page PDFs, this will contain one tuple per page.

        Returns:
            SubReview containing ragStatus, feedback, and issues
--- a/backend/app/agents/brand_agent.py
+++ b/backend/app/agents/brand_agent.py
@ -1,3 +1,5 @@
+from typing import List, Tuple
+
 from app.agents.base_agent import BaseAgent
 from app.models.schemas import SubReview
 from app.services.gemini_service import GeminiService
@ -20,13 +22,12 @@ class BrandAgent(BaseAgent):
        self.gemini = gemini_service
        self.brand_context = reference_docs.get_brand_context()

-    async def analyze(self, file_data: bytes, file_type: str) -> SubReview:
+    async def analyze(self, images: List[Tuple[bytes, str]]) -> SubReview:
        """
        Analyze the proof for brand guideline adherence.

        Args:
-            file_data: Raw bytes of the file to analyze
-            file_type: MIME type of the file
+            images: List of (file_data, mime_type) tuples representing the proof

        Returns:
            SubReview with brand compliance assessment
@ -63,4 +64,9 @@ RAG Status Guidelines:
 If the proof is nonsensical, not a marketing material, or cannot be analyzed, set analysisStatus to 'low_confidence'.
 """

-        return await self.gemini.analyze_with_image(prompt, file_data, file_type)
+        # Use single-image or multi-image analysis depending on input
+        if len(images) == 1:
+            file_data, file_type = images[0]
+            return await self.gemini.analyze_with_image(prompt, file_data, file_type)
+        else:
+            return await self.gemini.analyze_with_images(prompt, images)
--- a/backend/app/agents/channel_agent.py
+++ b/backend/app/agents/channel_agent.py
@ -1,3 +1,5 @@
+from typing import List, Tuple
+
 from app.agents.base_agent import BaseAgent
 from app.models.schemas import SubReview
 from app.services.gemini_service import GeminiService
@ -20,13 +22,12 @@ class ChannelAgent(BaseAgent):
        self.gemini = gemini_service
        self.channel_context = reference_docs.get_channel_context()

-    async def analyze(self, file_data: bytes, file_type: str) -> SubReview:
+    async def analyze(self, images: List[Tuple[bytes, str]]) -> SubReview:
        """
        Analyze the proof for channel suitability.

        Args:
-            file_data: Raw bytes of the file to analyze
-            file_type: MIME type of the file
+            images: List of (file_data, mime_type) tuples representing the proof

        Returns:
            SubReview with channel suitability assessment
@ -81,4 +82,9 @@ RAG Status Guidelines:
 If the proof is nonsensical, not a marketing material, or cannot be analyzed, set analysisStatus to 'low_confidence'.
 """

-        return await self.gemini.analyze_with_image(prompt, file_data, file_type)
+        # Use single-image or multi-image analysis depending on input
+        if len(images) == 1:
+            file_data, file_type = images[0]
+            return await self.gemini.analyze_with_image(prompt, file_data, file_type)
+        else:
+            return await self.gemini.analyze_with_images(prompt, images)
--- a/backend/app/agents/legal_agent.py
+++ b/backend/app/agents/legal_agent.py
@ -1,4 +1,6 @@
 import asyncio
+from typing import List, Tuple
+
 from app.agents.base_agent import BaseAgent
 from app.models.schemas import SubReview, RagStatus

@ -12,13 +14,12 @@ class LegalAgent(BaseAgent):

    name = "Legal Agent"

-    async def analyze(self, file_data: bytes, file_type: str) -> SubReview:
+    async def analyze(self, images: List[Tuple[bytes, str]]) -> SubReview:
        """
        Stub implementation that returns mock Green status.

        Args:
-            file_data: Raw bytes of the file (not used in stub)
-            file_type: MIME type of the file (not used in stub)
+            images: List of (file_data, mime_type) tuples (not used in stub)

        Returns:
            SubReview with Green status and stub notice
--- a/backend/app/agents/tone_agent.py
+++ b/backend/app/agents/tone_agent.py
@ -1,4 +1,6 @@
 import asyncio
+from typing import List, Tuple
+
 from app.agents.base_agent import BaseAgent
 from app.models.schemas import SubReview, RagStatus

@ -12,13 +14,12 @@ class ToneAgent(BaseAgent):

    name = "Tone Agent"

-    async def analyze(self, file_data: bytes, file_type: str) -> SubReview:
+    async def analyze(self, images: List[Tuple[bytes, str]]) -> SubReview:
        """
        Stub implementation that returns mock Green status.

        Args:
-            file_data: Raw bytes of the file (not used in stub)
-            file_type: MIME type of the file (not used in stub)
+            images: List of (file_data, mime_type) tuples (not used in stub)

        Returns:
            SubReview with Green status and stub notice
--- a/backend/app/services/analysis_service.py
+++ b/backend/app/services/analysis_service.py
@ -1,5 +1,5 @@
 import logging
-from typing import Callable, Awaitable
+from typing import Callable, Awaitable, List, Tuple, Optional

 from app.models.schemas import SubReview, AgentReview, OverallStatus

@ -11,6 +11,7 @@ from app.agents.tone_agent import ToneAgent
 from app.agents.lead_agent import LeadAgent
 from app.services.gemini_service import GeminiService
 from app.services.reference_docs import ReferenceDocsService
+from app.services.pdf_service import pdf_service


 # Type alias for the callback function
@ -57,7 +58,7 @@ class AnalysisService:
        file_type: str,
        on_agent_update: AgentCallback | None = None,
        is_wip: bool = False,
-    ) -> AgentReview:
+    ) -> Tuple[AgentReview, Optional[List[Tuple[bytes, int, int]]]]:
        """
        Analyze a proof using all agents sequentially.

@ -70,11 +71,46 @@ class AnalysisService:
            is_wip: Whether this is a work-in-progress analysis

        Returns:
-            Complete AgentReview with all agent results and overall verdict
+            Tuple of:
+            - Complete AgentReview with all agent results and overall verdict
+            - List of rasterized PDF pages if input was PDF, else None
+              Each page is (png_bytes, width, height)
        """
        logger.info(f"[ANALYSIS] Starting proof analysis - file_type: {file_type}, file_size: {len(file_data)} bytes, is_wip: {is_wip}")
        reviews: dict[str, SubReview] = {}

+        # Prepare images for analysis
+        pdf_pages: Optional[List[Tuple[bytes, int, int]]] = None
+        images: List[Tuple[bytes, str]] = []
+
+        if file_type == "application/pdf":
+            # Rasterize PDF to PNG images
+            logger.info("[ANALYSIS] Detected PDF, rasterizing pages...")
+            try:
+                pdf_pages = pdf_service.rasterize(file_data, max_pages=10)
+                images = [(png_data, "image/png") for png_data, _, _ in pdf_pages]
+                logger.info(f"[ANALYSIS] Rasterized {len(images)} PDF pages")
+            except ValueError as e:
+                logger.error(f"[ANALYSIS] PDF rasterization failed: {str(e)}")
+                # Return error review if PDF cannot be processed
+                error_review = SubReview(
+                    ragStatus="Error",
+                    feedback=f"Failed to process PDF: {str(e)}",
+                    issues=[]
+                )
+                return AgentReview(
+                    legalAgentReview=error_review,
+                    brandAgentReview=error_review,
+                    toneAgentReview=error_review,
+                    channelAgentReview=error_review,
+                    leadAgentSummary=f"Analysis could not proceed due to PDF processing error: {str(e)}",
+                    overallStatus="Analysis Error",
+                    financialPromotionReason=None,
+                ), None
+        else:
+            # Single image/video - wrap in list
+            images = [(file_data, file_type)]
+
        # Run each agent sequentially
        for agent_name in self.AGENT_ORDER:
            agent = self.agents[agent_name]
@ -85,8 +121,8 @@ class AnalysisService:
            if on_agent_update:
                await on_agent_update(agent_name, None)

-            # Run the agent
-            review = await agent.analyze(file_data, file_type)
+            # Run the agent with images list
+            review = await agent.analyze(images)
            reviews[agent_name] = review

            logger.info(f"[ANALYSIS] Agent completed: {agent_name} - ragStatus: {review.ragStatus}")
@ -112,4 +148,4 @@ class AnalysisService:
            leadAgentSummary=summary,
            overallStatus=overall_status,
            financialPromotionReason=financial_promotion_reason,
-        )
+        ), pdf_pages
--- a/backend/app/services/gemini_service.py
+++ b/backend/app/services/gemini_service.py
@ -1,5 +1,7 @@
 import json
 import logging
+from typing import List, Tuple
+
 from google import genai
 from google.genai import types

@ -122,6 +124,110 @@ class GeminiService:
                issues=[]
            )

+    async def analyze_with_images(
+        self,
+        prompt: str,
+        images: List[Tuple[bytes, str]],
+    ) -> SubReview:
+        """
+        Analyze multiple images with Gemini and return a structured SubReview.
+
+        This is used for multi-page PDFs where all pages need to be analyzed together.
+
+        Args:
+            prompt: The analysis prompt including reference doc context
+            images: List of (file_data, mime_type) tuples for each image
+
+        Returns:
+            SubReview with ragStatus, feedback, and issues
+        """
+        try:
+            logger.info(f"[GEMINI API] Starting multi-image analysis - {len(images)} images")
+
+            # Create inline data parts for all images
+            file_parts = []
+            for i, (file_data, file_type) in enumerate(images):
+                part = types.Part.from_bytes(data=file_data, mime_type=file_type)
+                file_parts.append(part)
+                logger.info(f"[GEMINI API] Added image {i + 1}/{len(images)} - type: {file_type}, size: {len(file_data)} bytes")
+
+            # Define the response schema for structured output
+            response_schema = {
+                "type": "object",
+                "properties": {
+                    "analysisStatus": {
+                        "type": "string",
+                        "enum": ["success", "low_confidence"],
+                        "description": "Set to 'low_confidence' if the proof is nonsensical, completely irrelevant to marketing, or otherwise impossible to analyze. Otherwise, set to 'success'."
+                    },
+                    "ragStatus": {
+                        "type": "string",
+                        "enum": ["Red", "Amber", "Green"],
+                        "description": "A RAG status. Red: Issues that must be resolved. Amber: Issues that should be addressed. Green: No issues found."
+                    },
+                    "feedback": {
+                        "type": "string",
+                        "description": "Constructive, professional feedback explaining the RAG status and highlighting both positive aspects and areas for improvement."
+                    },
+                    "issues": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "A list of specific, actionable issues found. If no issues, return an empty array."
+                    }
+                },
+                "required": ["analysisStatus", "ragStatus", "feedback", "issues"]
+            }
+
+            # Combine file parts with prompt
+            contents = file_parts + [prompt]
+
+            # Make the API call
+            logger.info(f"[GEMINI API] Calling Gemini model: {self.model} with {len(images)} images")
+            response = await self.client.aio.models.generate_content(
+                model=self.model,
+                contents=contents,
+                config=types.GenerateContentConfig(
+                    response_mime_type="application/json",
+                    response_schema=response_schema
+                )
+            )
+            logger.info(f"[GEMINI API] Response received from Gemini (multi-image)")
+
+            # Parse the JSON response
+            json_text = response.text.strip()
+            parsed_result = json.loads(json_text)
+            logger.info(f"[GEMINI API] Parsed result - ragStatus: {parsed_result.get('ragStatus')}, analysisStatus: {parsed_result.get('analysisStatus')}")
+
+            # Handle low confidence analysis
+            if parsed_result.get("analysisStatus") == "low_confidence":
+                return SubReview(
+                    ragStatus=RagStatus.ERROR,
+                    feedback="The agent could not analyze this proof with high confidence. This may be because the content is irrelevant, nonsensical, or too far outside of expected marketing materials.",
+                    issues=[]
+                )
+
+            # Return successful analysis
+            return SubReview(
+                ragStatus=RagStatus(parsed_result["ragStatus"]),
+                feedback=parsed_result["feedback"],
+                issues=parsed_result["issues"]
+            )
+
+        except json.JSONDecodeError as e:
+            logger.error(f"[GEMINI API] JSON parse error: {str(e)}")
+            return SubReview(
+                ragStatus=RagStatus.ERROR,
+                feedback=f"Failed to parse AI response as JSON: {str(e)}",
+                issues=[]
+            )
+        except Exception as e:
+            logger.error(f"[GEMINI API] Error during multi-image analysis: {str(e)}")
+            return SubReview(
+                ragStatus=RagStatus.ERROR,
+                feedback=f"An error occurred during analysis: {str(e)}",
+                issues=[]
+            )
+
    async def generate_summary(
        self,
        prompt: str,
--- a/backend/app/services/pdf_service.py
+++ b/backend/app/services/pdf_service.py
@ -0,0 +1,106 @@
+"""
+PDF Rasterization Service.
+
+Converts PDF pages to PNG images for analysis and display.
+Uses PyMuPDF (fitz) for high-quality rasterization.
+"""
+
+import logging
+from typing import List, Tuple
+
+import fitz  # PyMuPDF
+
+logger = logging.getLogger(__name__)
+
+# Target DPI for rasterization (150 DPI minimum required, using 200 for quality)
+TARGET_DPI = 200
+# Default PDF resolution is 72 DPI, so scale factor = target_dpi / 72
+SCALE_FACTOR = TARGET_DPI / 72
+
+
+class PDFService:
+    """Service for PDF rasterization operations."""
+
+    def rasterize(
+        self, pdf_data: bytes, max_pages: int = 10
+    ) -> List[Tuple[bytes, int, int]]:
+        """
+        Convert PDF pages to PNG images.
+
+        Args:
+            pdf_data: Raw PDF file bytes
+            max_pages: Maximum number of pages to rasterize (default 10)
+
+        Returns:
+            List of tuples containing (png_bytes, width, height) for each page
+
+        Raises:
+            ValueError: If the PDF cannot be opened or is password-protected
+        """
+        try:
+            logger.info(f"[PDF] Starting rasterization, max_pages={max_pages}")
+            doc = fitz.open(stream=pdf_data, filetype="pdf")
+
+            if doc.is_encrypted:
+                doc.close()
+                raise ValueError("Password-protected PDFs are not supported")
+
+            pages: List[Tuple[bytes, int, int]] = []
+            num_pages = min(doc.page_count, max_pages)
+
+            logger.info(f"[PDF] Document has {doc.page_count} pages, processing {num_pages}")
+
+            for page_num in range(num_pages):
+                page = doc.load_page(page_num)
+
+                # Create transformation matrix for desired DPI
+                mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
+
+                # Render page to pixmap (RGB)
+                pix = page.get_pixmap(matrix=mat, alpha=False)
+
+                # Convert to PNG bytes
+                png_data = pix.tobytes("png")
+
+                pages.append((png_data, pix.width, pix.height))
+                logger.info(
+                    f"[PDF] Rasterized page {page_num + 1}/{num_pages}: "
+                    f"{pix.width}x{pix.height}px at {TARGET_DPI} DPI"
+                )
+
+            doc.close()
+            logger.info(f"[PDF] Rasterization complete, {len(pages)} pages processed")
+            return pages
+
+        except fitz.FileDataError as e:
+            logger.error(f"[PDF] Invalid or corrupted PDF: {str(e)}")
+            raise ValueError(f"Invalid or corrupted PDF file: {str(e)}")
+        except Exception as e:
+            logger.error(f"[PDF] Rasterization error: {str(e)}")
+            raise ValueError(f"Failed to rasterize PDF: {str(e)}")
+
+    def get_page_count(self, pdf_data: bytes) -> int:
+        """
+        Get the number of pages in a PDF.
+
+        Args:
+            pdf_data: Raw PDF file bytes
+
+        Returns:
+            Number of pages in the PDF
+
+        Raises:
+            ValueError: If the PDF cannot be opened
+        """
+        try:
+            doc = fitz.open(stream=pdf_data, filetype="pdf")
+            count = doc.page_count
+            doc.close()
+            return count
+        except Exception as e:
+            logger.error(f"[PDF] Failed to get page count: {str(e)}")
+            raise ValueError(f"Failed to read PDF: {str(e)}")
+
+
+# Singleton instance
+pdf_service = PDFService()
--- a/backend/app/websocket/handlers.py
+++ b/backend/app/websocket/handlers.py
@ -86,7 +86,7 @@ async def handle_analyze_message(

        # Run the analysis
        logger.info("[WEBSOCKET] Starting analysis...")
-        result = await analysis_service.analyze_proof(
+        result, pdf_pages = await analysis_service.analyze_proof(
            file_data=file_data,
            file_type=file_type,
            on_agent_update=on_agent_update,
@ -183,6 +183,20 @@ async def handle_analyze_message(
            if version_id:
                response["version_id"] = version_id

+            # Include rasterized PDF pages if present
+            if pdf_pages:
+                import base64 as b64_module
+                response["pdf_pages"] = [
+                    {
+                        "page": i + 1,
+                        "data_url": f"data:image/png;base64,{b64_module.b64encode(png_data).decode('utf-8')}",
+                        "width": width,
+                        "height": height,
+                    }
+                    for i, (png_data, width, height) in enumerate(pdf_pages)
+                ]
+                logger.info(f"[WEBSOCKET] Including {len(pdf_pages)} rasterized PDF pages in response")
+
            await manager.send_message(client_id, response)
            logger.info(f"[WEBSOCKET] Result sent to client: {client_id}")

--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -11,3 +11,4 @@ httpx>=0.26.0
 sqlalchemy[asyncio]>=2.0.0
 asyncpg>=0.29.0
 alembic>=1.13.0
+PyMuPDF>=1.23.0
--- a/frontend/components/AssetPreview.tsx
+++ b/frontend/components/AssetPreview.tsx
@ -1,20 +1,24 @@
-import React from 'react';
+import React, { useState } from 'react';
 import { DocumentIcon } from './icons/DocumentIcon';
+import type { PDFPage } from '../types';

 interface AssetPreviewProps {
    file?: File | null;
    previewUrl: string | null;
    fileName?: string;
+    pdfPages?: PDFPage[];
 }

-export const AssetPreview: React.FC<AssetPreviewProps> = ({ file, previewUrl, fileName }) => {
-    if (!previewUrl) {
+export const AssetPreview: React.FC<AssetPreviewProps> = ({ file, previewUrl, fileName, pdfPages }) => {
+    const [currentPage, setCurrentPage] = useState(1);
+
+    if (!previewUrl && (!pdfPages || pdfPages.length === 0)) {
        return null;
    }
-    
+
    const getMimeType = (): string => {
        if (file?.type) return file.type;
-        if (previewUrl.startsWith('data:')) {
+        if (previewUrl?.startsWith('data:')) {
            const match = previewUrl.match(/data:([a-zA-Z0-9]+\/[a-zA-Z0-9-.+]+);/);
            if (match && match[1]) {
                return match[1];
@ -22,16 +26,70 @@ export const AssetPreview: React.FC<AssetPreviewProps> = ({ file, previewUrl, fi
        }
        return 'application/octet-stream'; // Fallback
    };
-    
+
    const fileType = getMimeType();
    const displayName = fileName || file?.name || 'Asset Preview';

+    // Check if we have rasterized PDF pages to display
+    const hasPdfPages = pdfPages && pdfPages.length > 0;
+    const totalPages = pdfPages?.length || 0;
+
+    const handlePrevPage = () => {
+        setCurrentPage(prev => Math.max(1, prev - 1));
+    };
+
+    const handleNextPage = () => {
+        setCurrentPage(prev => Math.min(totalPages, prev + 1));
+    };
+
+    const renderPdfPages = () => {
+        if (!pdfPages || pdfPages.length === 0) return null;
+
+        const currentPdfPage = pdfPages[currentPage - 1];
+
+        return (
+            <div className="flex flex-col">
+                <img
+                    src={currentPdfPage.data_url}
+                    alt={`${displayName} - Page ${currentPage}`}
+                    className="w-full rounded-lg shadow-2xl object-contain border border-gray-200 bg-white p-2"
+                    style={{ maxHeight: 'calc(100vh - 12rem)' }}
+                />
+                {totalPages > 1 && (
+                    <div className="flex items-center justify-center gap-4 mt-4 p-2 bg-white rounded-lg shadow border border-gray-200">
+                        <button
+                            onClick={handlePrevPage}
+                            disabled={currentPage === 1}
+                            className="px-3 py-1.5 text-sm font-medium rounded-md bg-gray-100 hover:bg-gray-200 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
+                        >
+                            Previous
+                        </button>
+                        <span className="text-sm text-gray-600">
+                            Page {currentPage} of {totalPages}
+                        </span>
+                        <button
+                            onClick={handleNextPage}
+                            disabled={currentPage === totalPages}
+                            className="px-3 py-1.5 text-sm font-medium rounded-md bg-gray-100 hover:bg-gray-200 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
+                        >
+                            Next
+                        </button>
+                    </div>
+                )}
+            </div>
+        );
+    };

    const renderPreview = () => {
+        // If we have rasterized PDF pages, use those
+        if (hasPdfPages) {
+            return renderPdfPages();
+        }
+
        if (fileType.startsWith('image/')) {
            return (
                <img
-                    src={previewUrl}
+                    src={previewUrl!}
                    alt={displayName}
                    className="w-full rounded-lg shadow-2xl object-contain border border-gray-200 bg-white p-2"
                    style={{ maxHeight: 'calc(100vh - 9rem)' }}
@ -42,7 +100,7 @@ export const AssetPreview: React.FC<AssetPreviewProps> = ({ file, previewUrl, fi
        if (fileType === 'video/mp4') {
            return (
                <video
-                    src={previewUrl}
+                    src={previewUrl!}
                    controls
                    className="w-full rounded-lg shadow-2xl object-contain border border-gray-200 bg-white p-2"
                    style={{ maxHeight: 'calc(100vh - 9rem)' }}
@ -53,6 +111,7 @@ export const AssetPreview: React.FC<AssetPreviewProps> = ({ file, previewUrl, fi
        }

        if (fileType === 'application/pdf') {
+            // Fallback to iframe if no rasterized pages available
            return (
                <iframe
                    src={`${previewUrl}#view=fitH`}
@ -81,4 +140,4 @@ export const AssetPreview: React.FC<AssetPreviewProps> = ({ file, previewUrl, fi
            {renderPreview()}
        </div>
    );
-};
+};
--- a/frontend/components/ProofPreview.tsx
+++ b/frontend/components/ProofPreview.tsx
@ -1,20 +1,24 @@
-import React from 'react';
+import React, { useState } from 'react';
 import { DocumentIcon } from './icons/DocumentIcon';
+import type { PDFPage } from '../types';

 interface ProofPreviewProps {
    file?: File | null;
    previewUrl: string | null;
    fileName?: string;
+    pdfPages?: PDFPage[];
 }

-export const ProofPreview: React.FC<ProofPreviewProps> = ({ file, previewUrl, fileName }) => {
-    if (!previewUrl) {
+export const ProofPreview: React.FC<ProofPreviewProps> = ({ file, previewUrl, fileName, pdfPages }) => {
+    const [currentPage, setCurrentPage] = useState(1);
+
+    if (!previewUrl && (!pdfPages || pdfPages.length === 0)) {
        return null;
    }
-    
+
    const getMimeType = (): string => {
        if (file?.type) return file.type;
-        if (previewUrl.startsWith('data:')) {
+        if (previewUrl?.startsWith('data:')) {
            const match = previewUrl.match(/data:([a-zA-Z0-9]+\/[a-zA-Z0-9-.+]+);/);
            if (match && match[1]) {
                return match[1];
@ -22,16 +26,70 @@ export const ProofPreview: React.FC<ProofPreviewProps> = ({ file, previewUrl, fi
        }
        return 'application/octet-stream'; // Fallback
    };
-    
+
    const fileType = getMimeType();
    const displayName = fileName || file?.name || 'Proof Preview';

+    // Check if we have rasterized PDF pages to display
+    const hasPdfPages = pdfPages && pdfPages.length > 0;
+    const totalPages = pdfPages?.length || 0;
+
+    const handlePrevPage = () => {
+        setCurrentPage(prev => Math.max(1, prev - 1));
+    };
+
+    const handleNextPage = () => {
+        setCurrentPage(prev => Math.min(totalPages, prev + 1));
+    };
+
+    const renderPdfPages = () => {
+        if (!pdfPages || pdfPages.length === 0) return null;
+
+        const currentPdfPage = pdfPages[currentPage - 1];
+
+        return (
+            <div className="flex flex-col">
+                <img
+                    src={currentPdfPage.data_url}
+                    alt={`${displayName} - Page ${currentPage}`}
+                    className="w-full rounded-lg shadow-2xl object-contain border border-gray-200 bg-white p-2"
+                    style={{ maxHeight: 'calc(100vh - 12rem)' }}
+                />
+                {totalPages > 1 && (
+                    <div className="flex items-center justify-center gap-4 mt-4 p-2 bg-white rounded-lg shadow border border-gray-200">
+                        <button
+                            onClick={handlePrevPage}
+                            disabled={currentPage === 1}
+                            className="px-3 py-1.5 text-sm font-medium rounded-md bg-gray-100 hover:bg-gray-200 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
+                        >
+                            Previous
+                        </button>
+                        <span className="text-sm text-gray-600">
+                            Page {currentPage} of {totalPages}
+                        </span>
+                        <button
+                            onClick={handleNextPage}
+                            disabled={currentPage === totalPages}
+                            className="px-3 py-1.5 text-sm font-medium rounded-md bg-gray-100 hover:bg-gray-200 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
+                        >
+                            Next
+                        </button>
+                    </div>
+                )}
+            </div>
+        );
+    };

    const renderPreview = () => {
+        // If we have rasterized PDF pages, use those
+        if (hasPdfPages) {
+            return renderPdfPages();
+        }
+
        if (fileType.startsWith('image/')) {
            return (
                <img
-                    src={previewUrl}
+                    src={previewUrl!}
                    alt={displayName}
                    className="w-full rounded-lg shadow-2xl object-contain border border-gray-200 bg-white p-2"
                    style={{ maxHeight: 'calc(100vh - 9rem)' }}
@ -42,7 +100,7 @@ export const ProofPreview: React.FC<ProofPreviewProps> = ({ file, previewUrl, fi
        if (fileType === 'video/mp4') {
            return (
                <video
-                    src={previewUrl}
+                    src={previewUrl!}
                    controls
                    className="w-full rounded-lg shadow-2xl object-contain border border-gray-200 bg-white p-2"
                    style={{ maxHeight: 'calc(100vh - 9rem)' }}
@ -53,6 +111,7 @@ export const ProofPreview: React.FC<ProofPreviewProps> = ({ file, previewUrl, fi
        }

        if (fileType === 'application/pdf') {
+            // Fallback to iframe if no rasterized pages available
            return (
                <iframe
                    src={`${previewUrl}#view=fitH`}
@ -81,4 +140,4 @@ export const ProofPreview: React.FC<ProofPreviewProps> = ({ file, previewUrl, fi
            {renderPreview()}
        </div>
    );
-};
+};
--- a/frontend/services/geminiService.ts
+++ b/frontend/services/geminiService.ts
@ -1,4 +1,4 @@
-import type { AgentReview, SubReview, AgentName } from '../types';
+import type { AgentReview, SubReview, AgentName, PDFPage } from '../types';
 import { IPublicClientApplication } from '@azure/msal-browser';
 import { getAccessToken } from './authService';

@ -24,6 +24,7 @@ export interface AnalyzeProofResult {
    review: AgentReview;
    proofId?: string;
    versionId?: string;
+    pdfPages?: PDFPage[];
 }

 /**
@ -114,6 +115,7 @@ export const analyzeProof = async (
                            review: message.result as AgentReview,
                            proofId: message.proof_id,
                            versionId: message.version_id,
+                            pdfPages: message.pdf_pages as PDFPage[] | undefined,
                        });
                        break;

--- a/frontend/types.ts
+++ b/frontend/types.ts
@ -62,3 +62,10 @@ export interface ErrorItem {
  errorSummary: string;
  timestamp: string;
 }
+
+export interface PDFPage {
+  page: number;
+  data_url: string;
+  width: number;
+  height: number;
+}