diff --git a/backend/app/agents/base_agent.py b/backend/app/agents/base_agent.py index 4d22491..dcefac0 100755 --- a/backend/app/agents/base_agent.py +++ b/backend/app/agents/base_agent.py @@ -1,4 +1,6 @@ from abc import ABC, abstractmethod +from typing import List, Tuple + from app.models.schemas import SubReview @@ -8,13 +10,14 @@ class BaseAgent(ABC): name: str = "Base Agent" @abstractmethod - async def analyze(self, file_data: bytes, file_type: str) -> SubReview: + async def analyze(self, images: List[Tuple[bytes, str]]) -> SubReview: """ Analyze the proof and return a SubReview. Args: - file_data: Raw bytes of the file to analyze - file_type: MIME type of the file + images: List of (file_data, mime_type) tuples representing the proof. + For single images/videos, this will contain one tuple. + For multi-page PDFs, this will contain one tuple per page. Returns: SubReview containing ragStatus, feedback, and issues diff --git a/backend/app/agents/brand_agent.py b/backend/app/agents/brand_agent.py index 5a534bd..8812448 100755 --- a/backend/app/agents/brand_agent.py +++ b/backend/app/agents/brand_agent.py @@ -1,3 +1,5 @@ +from typing import List, Tuple + from app.agents.base_agent import BaseAgent from app.models.schemas import SubReview from app.services.gemini_service import GeminiService @@ -20,13 +22,12 @@ class BrandAgent(BaseAgent): self.gemini = gemini_service self.brand_context = reference_docs.get_brand_context() - async def analyze(self, file_data: bytes, file_type: str) -> SubReview: + async def analyze(self, images: List[Tuple[bytes, str]]) -> SubReview: """ Analyze the proof for brand guideline adherence. Args: - file_data: Raw bytes of the file to analyze - file_type: MIME type of the file + images: List of (file_data, mime_type) tuples representing the proof Returns: SubReview with brand compliance assessment @@ -63,4 +64,9 @@ RAG Status Guidelines: If the proof is nonsensical, not a marketing material, or cannot be analyzed, set analysisStatus to 'low_confidence'. """ - return await self.gemini.analyze_with_image(prompt, file_data, file_type) + # Use single-image or multi-image analysis depending on input + if len(images) == 1: + file_data, file_type = images[0] + return await self.gemini.analyze_with_image(prompt, file_data, file_type) + else: + return await self.gemini.analyze_with_images(prompt, images) diff --git a/backend/app/agents/channel_agent.py b/backend/app/agents/channel_agent.py index 5c1d211..da50785 100755 --- a/backend/app/agents/channel_agent.py +++ b/backend/app/agents/channel_agent.py @@ -1,3 +1,5 @@ +from typing import List, Tuple + from app.agents.base_agent import BaseAgent from app.models.schemas import SubReview from app.services.gemini_service import GeminiService @@ -20,13 +22,12 @@ class ChannelAgent(BaseAgent): self.gemini = gemini_service self.channel_context = reference_docs.get_channel_context() - async def analyze(self, file_data: bytes, file_type: str) -> SubReview: + async def analyze(self, images: List[Tuple[bytes, str]]) -> SubReview: """ Analyze the proof for channel suitability. Args: - file_data: Raw bytes of the file to analyze - file_type: MIME type of the file + images: List of (file_data, mime_type) tuples representing the proof Returns: SubReview with channel suitability assessment @@ -81,4 +82,9 @@ RAG Status Guidelines: If the proof is nonsensical, not a marketing material, or cannot be analyzed, set analysisStatus to 'low_confidence'. """ - return await self.gemini.analyze_with_image(prompt, file_data, file_type) + # Use single-image or multi-image analysis depending on input + if len(images) == 1: + file_data, file_type = images[0] + return await self.gemini.analyze_with_image(prompt, file_data, file_type) + else: + return await self.gemini.analyze_with_images(prompt, images) diff --git a/backend/app/agents/legal_agent.py b/backend/app/agents/legal_agent.py index e8cdf0f..14d2afd 100755 --- a/backend/app/agents/legal_agent.py +++ b/backend/app/agents/legal_agent.py @@ -1,4 +1,6 @@ import asyncio +from typing import List, Tuple + from app.agents.base_agent import BaseAgent from app.models.schemas import SubReview, RagStatus @@ -12,13 +14,12 @@ class LegalAgent(BaseAgent): name = "Legal Agent" - async def analyze(self, file_data: bytes, file_type: str) -> SubReview: + async def analyze(self, images: List[Tuple[bytes, str]]) -> SubReview: """ Stub implementation that returns mock Green status. Args: - file_data: Raw bytes of the file (not used in stub) - file_type: MIME type of the file (not used in stub) + images: List of (file_data, mime_type) tuples (not used in stub) Returns: SubReview with Green status and stub notice diff --git a/backend/app/agents/tone_agent.py b/backend/app/agents/tone_agent.py index c552517..2a61535 100755 --- a/backend/app/agents/tone_agent.py +++ b/backend/app/agents/tone_agent.py @@ -1,4 +1,6 @@ import asyncio +from typing import List, Tuple + from app.agents.base_agent import BaseAgent from app.models.schemas import SubReview, RagStatus @@ -12,13 +14,12 @@ class ToneAgent(BaseAgent): name = "Tone Agent" - async def analyze(self, file_data: bytes, file_type: str) -> SubReview: + async def analyze(self, images: List[Tuple[bytes, str]]) -> SubReview: """ Stub implementation that returns mock Green status. Args: - file_data: Raw bytes of the file (not used in stub) - file_type: MIME type of the file (not used in stub) + images: List of (file_data, mime_type) tuples (not used in stub) Returns: SubReview with Green status and stub notice diff --git a/backend/app/services/analysis_service.py b/backend/app/services/analysis_service.py index eab83ad..5610a50 100755 --- a/backend/app/services/analysis_service.py +++ b/backend/app/services/analysis_service.py @@ -1,5 +1,5 @@ import logging -from typing import Callable, Awaitable +from typing import Callable, Awaitable, List, Tuple, Optional from app.models.schemas import SubReview, AgentReview, OverallStatus @@ -11,6 +11,7 @@ from app.agents.tone_agent import ToneAgent from app.agents.lead_agent import LeadAgent from app.services.gemini_service import GeminiService from app.services.reference_docs import ReferenceDocsService +from app.services.pdf_service import pdf_service # Type alias for the callback function @@ -57,7 +58,7 @@ class AnalysisService: file_type: str, on_agent_update: AgentCallback | None = None, is_wip: bool = False, - ) -> AgentReview: + ) -> Tuple[AgentReview, Optional[List[Tuple[bytes, int, int]]]]: """ Analyze a proof using all agents sequentially. @@ -70,11 +71,46 @@ class AnalysisService: is_wip: Whether this is a work-in-progress analysis Returns: - Complete AgentReview with all agent results and overall verdict + Tuple of: + - Complete AgentReview with all agent results and overall verdict + - List of rasterized PDF pages if input was PDF, else None + Each page is (png_bytes, width, height) """ logger.info(f"[ANALYSIS] Starting proof analysis - file_type: {file_type}, file_size: {len(file_data)} bytes, is_wip: {is_wip}") reviews: dict[str, SubReview] = {} + # Prepare images for analysis + pdf_pages: Optional[List[Tuple[bytes, int, int]]] = None + images: List[Tuple[bytes, str]] = [] + + if file_type == "application/pdf": + # Rasterize PDF to PNG images + logger.info("[ANALYSIS] Detected PDF, rasterizing pages...") + try: + pdf_pages = pdf_service.rasterize(file_data, max_pages=10) + images = [(png_data, "image/png") for png_data, _, _ in pdf_pages] + logger.info(f"[ANALYSIS] Rasterized {len(images)} PDF pages") + except ValueError as e: + logger.error(f"[ANALYSIS] PDF rasterization failed: {str(e)}") + # Return error review if PDF cannot be processed + error_review = SubReview( + ragStatus="Error", + feedback=f"Failed to process PDF: {str(e)}", + issues=[] + ) + return AgentReview( + legalAgentReview=error_review, + brandAgentReview=error_review, + toneAgentReview=error_review, + channelAgentReview=error_review, + leadAgentSummary=f"Analysis could not proceed due to PDF processing error: {str(e)}", + overallStatus="Analysis Error", + financialPromotionReason=None, + ), None + else: + # Single image/video - wrap in list + images = [(file_data, file_type)] + # Run each agent sequentially for agent_name in self.AGENT_ORDER: agent = self.agents[agent_name] @@ -85,8 +121,8 @@ class AnalysisService: if on_agent_update: await on_agent_update(agent_name, None) - # Run the agent - review = await agent.analyze(file_data, file_type) + # Run the agent with images list + review = await agent.analyze(images) reviews[agent_name] = review logger.info(f"[ANALYSIS] Agent completed: {agent_name} - ragStatus: {review.ragStatus}") @@ -112,4 +148,4 @@ class AnalysisService: leadAgentSummary=summary, overallStatus=overall_status, financialPromotionReason=financial_promotion_reason, - ) + ), pdf_pages diff --git a/backend/app/services/gemini_service.py b/backend/app/services/gemini_service.py index 78515d4..5454149 100755 --- a/backend/app/services/gemini_service.py +++ b/backend/app/services/gemini_service.py @@ -1,5 +1,7 @@ import json import logging +from typing import List, Tuple + from google import genai from google.genai import types @@ -122,6 +124,110 @@ class GeminiService: issues=[] ) + async def analyze_with_images( + self, + prompt: str, + images: List[Tuple[bytes, str]], + ) -> SubReview: + """ + Analyze multiple images with Gemini and return a structured SubReview. + + This is used for multi-page PDFs where all pages need to be analyzed together. + + Args: + prompt: The analysis prompt including reference doc context + images: List of (file_data, mime_type) tuples for each image + + Returns: + SubReview with ragStatus, feedback, and issues + """ + try: + logger.info(f"[GEMINI API] Starting multi-image analysis - {len(images)} images") + + # Create inline data parts for all images + file_parts = [] + for i, (file_data, file_type) in enumerate(images): + part = types.Part.from_bytes(data=file_data, mime_type=file_type) + file_parts.append(part) + logger.info(f"[GEMINI API] Added image {i + 1}/{len(images)} - type: {file_type}, size: {len(file_data)} bytes") + + # Define the response schema for structured output + response_schema = { + "type": "object", + "properties": { + "analysisStatus": { + "type": "string", + "enum": ["success", "low_confidence"], + "description": "Set to 'low_confidence' if the proof is nonsensical, completely irrelevant to marketing, or otherwise impossible to analyze. Otherwise, set to 'success'." + }, + "ragStatus": { + "type": "string", + "enum": ["Red", "Amber", "Green"], + "description": "A RAG status. Red: Issues that must be resolved. Amber: Issues that should be addressed. Green: No issues found." + }, + "feedback": { + "type": "string", + "description": "Constructive, professional feedback explaining the RAG status and highlighting both positive aspects and areas for improvement." + }, + "issues": { + "type": "array", + "items": {"type": "string"}, + "description": "A list of specific, actionable issues found. If no issues, return an empty array." + } + }, + "required": ["analysisStatus", "ragStatus", "feedback", "issues"] + } + + # Combine file parts with prompt + contents = file_parts + [prompt] + + # Make the API call + logger.info(f"[GEMINI API] Calling Gemini model: {self.model} with {len(images)} images") + response = await self.client.aio.models.generate_content( + model=self.model, + contents=contents, + config=types.GenerateContentConfig( + response_mime_type="application/json", + response_schema=response_schema + ) + ) + logger.info(f"[GEMINI API] Response received from Gemini (multi-image)") + + # Parse the JSON response + json_text = response.text.strip() + parsed_result = json.loads(json_text) + logger.info(f"[GEMINI API] Parsed result - ragStatus: {parsed_result.get('ragStatus')}, analysisStatus: {parsed_result.get('analysisStatus')}") + + # Handle low confidence analysis + if parsed_result.get("analysisStatus") == "low_confidence": + return SubReview( + ragStatus=RagStatus.ERROR, + feedback="The agent could not analyze this proof with high confidence. This may be because the content is irrelevant, nonsensical, or too far outside of expected marketing materials.", + issues=[] + ) + + # Return successful analysis + return SubReview( + ragStatus=RagStatus(parsed_result["ragStatus"]), + feedback=parsed_result["feedback"], + issues=parsed_result["issues"] + ) + + except json.JSONDecodeError as e: + logger.error(f"[GEMINI API] JSON parse error: {str(e)}") + return SubReview( + ragStatus=RagStatus.ERROR, + feedback=f"Failed to parse AI response as JSON: {str(e)}", + issues=[] + ) + except Exception as e: + logger.error(f"[GEMINI API] Error during multi-image analysis: {str(e)}") + return SubReview( + ragStatus=RagStatus.ERROR, + feedback=f"An error occurred during analysis: {str(e)}", + issues=[] + ) + async def generate_summary( self, prompt: str, diff --git a/backend/app/services/pdf_service.py b/backend/app/services/pdf_service.py new file mode 100644 index 0000000..4c6308b --- /dev/null +++ b/backend/app/services/pdf_service.py @@ -0,0 +1,106 @@ +""" +PDF Rasterization Service. + +Converts PDF pages to PNG images for analysis and display. +Uses PyMuPDF (fitz) for high-quality rasterization. +""" + +import logging +from typing import List, Tuple + +import fitz # PyMuPDF + +logger = logging.getLogger(__name__) + +# Target DPI for rasterization (150 DPI minimum required, using 200 for quality) +TARGET_DPI = 200 +# Default PDF resolution is 72 DPI, so scale factor = target_dpi / 72 +SCALE_FACTOR = TARGET_DPI / 72 + + +class PDFService: + """Service for PDF rasterization operations.""" + + def rasterize( + self, pdf_data: bytes, max_pages: int = 10 + ) -> List[Tuple[bytes, int, int]]: + """ + Convert PDF pages to PNG images. + + Args: + pdf_data: Raw PDF file bytes + max_pages: Maximum number of pages to rasterize (default 10) + + Returns: + List of tuples containing (png_bytes, width, height) for each page + + Raises: + ValueError: If the PDF cannot be opened or is password-protected + """ + try: + logger.info(f"[PDF] Starting rasterization, max_pages={max_pages}") + doc = fitz.open(stream=pdf_data, filetype="pdf") + + if doc.is_encrypted: + doc.close() + raise ValueError("Password-protected PDFs are not supported") + + pages: List[Tuple[bytes, int, int]] = [] + num_pages = min(doc.page_count, max_pages) + + logger.info(f"[PDF] Document has {doc.page_count} pages, processing {num_pages}") + + for page_num in range(num_pages): + page = doc.load_page(page_num) + + # Create transformation matrix for desired DPI + mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR) + + # Render page to pixmap (RGB) + pix = page.get_pixmap(matrix=mat, alpha=False) + + # Convert to PNG bytes + png_data = pix.tobytes("png") + + pages.append((png_data, pix.width, pix.height)) + logger.info( + f"[PDF] Rasterized page {page_num + 1}/{num_pages}: " + f"{pix.width}x{pix.height}px at {TARGET_DPI} DPI" + ) + + doc.close() + logger.info(f"[PDF] Rasterization complete, {len(pages)} pages processed") + return pages + + except fitz.FileDataError as e: + logger.error(f"[PDF] Invalid or corrupted PDF: {str(e)}") + raise ValueError(f"Invalid or corrupted PDF file: {str(e)}") + except Exception as e: + logger.error(f"[PDF] Rasterization error: {str(e)}") + raise ValueError(f"Failed to rasterize PDF: {str(e)}") + + def get_page_count(self, pdf_data: bytes) -> int: + """ + Get the number of pages in a PDF. + + Args: + pdf_data: Raw PDF file bytes + + Returns: + Number of pages in the PDF + + Raises: + ValueError: If the PDF cannot be opened + """ + try: + doc = fitz.open(stream=pdf_data, filetype="pdf") + count = doc.page_count + doc.close() + return count + except Exception as e: + logger.error(f"[PDF] Failed to get page count: {str(e)}") + raise ValueError(f"Failed to read PDF: {str(e)}") + + +# Singleton instance +pdf_service = PDFService() diff --git a/backend/app/websocket/handlers.py b/backend/app/websocket/handlers.py index 0e04a10..a81ba0f 100755 --- a/backend/app/websocket/handlers.py +++ b/backend/app/websocket/handlers.py @@ -86,7 +86,7 @@ async def handle_analyze_message( # Run the analysis logger.info("[WEBSOCKET] Starting analysis...") - result = await analysis_service.analyze_proof( + result, pdf_pages = await analysis_service.analyze_proof( file_data=file_data, file_type=file_type, on_agent_update=on_agent_update, @@ -183,6 +183,20 @@ async def handle_analyze_message( if version_id: response["version_id"] = version_id + # Include rasterized PDF pages if present + if pdf_pages: + import base64 as b64_module + response["pdf_pages"] = [ + { + "page": i + 1, + "data_url": f"data:image/png;base64,{b64_module.b64encode(png_data).decode('utf-8')}", + "width": width, + "height": height, + } + for i, (png_data, width, height) in enumerate(pdf_pages) + ] + logger.info(f"[WEBSOCKET] Including {len(pdf_pages)} rasterized PDF pages in response") + await manager.send_message(client_id, response) logger.info(f"[WEBSOCKET] Result sent to client: {client_id}") diff --git a/backend/requirements.txt b/backend/requirements.txt index 76d4e5b..2d0e86e 100755 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -11,3 +11,4 @@ httpx>=0.26.0 sqlalchemy[asyncio]>=2.0.0 asyncpg>=0.29.0 alembic>=1.13.0 +PyMuPDF>=1.23.0 diff --git a/frontend/components/AssetPreview.tsx b/frontend/components/AssetPreview.tsx index 6bd686f..8adf0d9 100755 --- a/frontend/components/AssetPreview.tsx +++ b/frontend/components/AssetPreview.tsx @@ -1,20 +1,24 @@ -import React from 'react'; +import React, { useState } from 'react'; import { DocumentIcon } from './icons/DocumentIcon'; +import type { PDFPage } from '../types'; interface AssetPreviewProps { file?: File | null; previewUrl: string | null; fileName?: string; + pdfPages?: PDFPage[]; } -export const AssetPreview: React.FC = ({ file, previewUrl, fileName }) => { - if (!previewUrl) { +export const AssetPreview: React.FC = ({ file, previewUrl, fileName, pdfPages }) => { + const [currentPage, setCurrentPage] = useState(1); + + if (!previewUrl && (!pdfPages || pdfPages.length === 0)) { return null; } - + const getMimeType = (): string => { if (file?.type) return file.type; - if (previewUrl.startsWith('data:')) { + if (previewUrl?.startsWith('data:')) { const match = previewUrl.match(/data:([a-zA-Z0-9]+\/[a-zA-Z0-9-.+]+);/); if (match && match[1]) { return match[1]; @@ -22,16 +26,70 @@ export const AssetPreview: React.FC = ({ file, previewUrl, fi } return 'application/octet-stream'; // Fallback }; - + const fileType = getMimeType(); const displayName = fileName || file?.name || 'Asset Preview'; + // Check if we have rasterized PDF pages to display + const hasPdfPages = pdfPages && pdfPages.length > 0; + const totalPages = pdfPages?.length || 0; + + const handlePrevPage = () => { + setCurrentPage(prev => Math.max(1, prev - 1)); + }; + + const handleNextPage = () => { + setCurrentPage(prev => Math.min(totalPages, prev + 1)); + }; + + const renderPdfPages = () => { + if (!pdfPages || pdfPages.length === 0) return null; + + const currentPdfPage = pdfPages[currentPage - 1]; + + return ( +
+ {`${displayName} + {totalPages > 1 && ( +
+ + + Page {currentPage} of {totalPages} + + +
+ )} +
+ ); + }; const renderPreview = () => { + // If we have rasterized PDF pages, use those + if (hasPdfPages) { + return renderPdfPages(); + } + if (fileType.startsWith('image/')) { return ( {displayName} = ({ file, previewUrl, fi if (fileType === 'video/mp4') { return (