Merge branch 'pdf-pptx-layout' of github.com:presenton/presenton into pdf-pptx-layout

2025-08-02 01:14:04 +05:45 · 2025-08-02 01:14:04 +05:45 · 6fc94648c4
commit 6fc94648c4
parent bef5afb32a 077cdbdca2
2 changed files with 180 additions and 4 deletions
--- a/servers/fastapi/api/v1/ppt/endpoints/pdf_slides.py
+++ b/servers/fastapi/api/v1/ppt/endpoints/pdf_slides.py
@ -0,0 +1,174 @@
+import os
+import shutil
+import tempfile
+import subprocess
+from typing import List, Optional
+from fastapi import APIRouter, UploadFile, File, HTTPException
+from pydantic import BaseModel
+
+from utils.asset_directory_utils import get_images_directory
+from utils.randomizers import get_random_uuid
+from constants.documents import PDF_MIME_TYPES
+
+
+PDF_SLIDES_ROUTER = APIRouter(prefix="/pdf-slides", tags=["PDF Slides"])
+
+
+class PdfSlideData(BaseModel):
+    slide_number: int
+    screenshot_url: str
+
+
+class PdfSlidesResponse(BaseModel):
+    success: bool
+    slides: List[PdfSlideData]
+    total_slides: int
+
+
+@PDF_SLIDES_ROUTER.post("/process", response_model=PdfSlidesResponse)
+async def process_pdf_slides(
+    pdf_file: UploadFile = File(..., description="PDF file to process")
+):
+    """
+    Process a PDF file to extract slide screenshots.
+    
+    This endpoint:
+    1. Validates the uploaded PDF file
+    2. Uses ImageMagick to convert PDF pages to PNG images
+    3. Returns screenshot URLs for each slide/page
+    
+    Note: Font installation is not needed since PDFs already have fonts embedded.
+    """
+    
+    # Validate PDF file
+    if pdf_file.content_type not in PDF_MIME_TYPES:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid file type. Expected PDF file, got {pdf_file.content_type}"
+        )
+    
+    # Create temporary directory for processing
+    with tempfile.TemporaryDirectory() as temp_dir:
+        try:
+            # Save uploaded PDF file
+            pdf_path = os.path.join(temp_dir, "presentation.pdf")
+            with open(pdf_path, "wb") as f:
+                pdf_content = await pdf_file.read()
+                f.write(pdf_content)
+            
+            # Generate screenshots from PDF using ImageMagick
+            screenshot_paths = await _generate_pdf_screenshots(pdf_path, temp_dir)
+            print(f"Generated {len(screenshot_paths)} PDF screenshots")
+            
+            # Move screenshots to images directory and generate URLs
+            images_dir = get_images_directory()
+            presentation_id = get_random_uuid()
+            presentation_images_dir = os.path.join(images_dir, presentation_id)
+            os.makedirs(presentation_images_dir, exist_ok=True)
+            
+            slides_data = []
+            
+            for i, screenshot_path in enumerate(screenshot_paths, 1):
+                # Move screenshot to permanent location
+                screenshot_filename = f"slide_{i}.png"
+                permanent_screenshot_path = os.path.join(presentation_images_dir, screenshot_filename)
+                
+                if os.path.exists(screenshot_path) and os.path.getsize(screenshot_path) > 0:
+                    # Use shutil.copy2 instead of os.rename to handle cross-device moves
+                    shutil.copy2(screenshot_path, permanent_screenshot_path)
+                    screenshot_url = f"/app_data/images/{presentation_id}/{screenshot_filename}"
+                else:
+                    # Fallback if screenshot generation failed or file is empty placeholder
+                    screenshot_url = "/static/images/placeholder.jpg"
+                
+                slides_data.append(PdfSlideData(
+                    slide_number=i,
+                    screenshot_url=screenshot_url
+                ))
+            
+            return PdfSlidesResponse(
+                success=True,
+                slides=slides_data,
+                total_slides=len(slides_data)
+            )
+            
+        except Exception as e:
+            print(f"Error processing PDF slides: {str(e)}")
+            raise HTTPException(
+                status_code=500,
+                detail=f"Failed to process PDF: {str(e)}"
+            )
+
+
+async def _generate_pdf_screenshots(pdf_path: str, temp_dir: str) -> List[str]:
+    """Generate PNG screenshots of PDF pages using ImageMagick (same approach as PPTX endpoint)."""
+    screenshots_dir = os.path.join(temp_dir, "screenshots")
+    os.makedirs(screenshots_dir, exist_ok=True)
+    
+    try:
+        # Convert PDF to individual PNG images using ImageMagick
+        print("Starting ImageMagick PNG conversion...")
+        try:
+            result = subprocess.run([
+                "convert",
+                "-density", "150",  # Same DPI as PPTX endpoint
+                pdf_path,
+                os.path.join(screenshots_dir, "slide_%03d.png")
+            ], check=True, capture_output=True, text=True, timeout=500)
+            
+            print(f"ImageMagick conversion output: {result.stdout}")
+            if result.stderr:
+                print(f"ImageMagick conversion warnings: {result.stderr}")
+                
+        except subprocess.TimeoutExpired:
+            raise Exception("ImageMagick PNG conversion timed out after 500 seconds")
+        except subprocess.CalledProcessError as e:
+            error_msg = e.stderr if e.stderr else str(e)
+            raise Exception(f"ImageMagick PNG conversion failed: {error_msg}")
+        
+        # Find generated PNG files (ImageMagick creates slide_000.png, slide_001.png, etc.)
+        print("Checking for generated PNG files...")
+        png_files = sorted([f for f in os.listdir(screenshots_dir) if f.startswith("slide_") and f.endswith('.png')])
+        print(f"Generated PNG files: {png_files}")
+        
+        if not png_files:
+            raise Exception("ImageMagick failed to generate any PNG files")
+        
+        # Determine page count from generated files
+        page_count = len(png_files)
+        print(f"Determined {page_count} pages from ImageMagick output")
+        
+        # Rename files from slide_000.png format to slide_1.png format expected by the API
+        # (Same renaming logic as PPTX endpoint)
+        print("Renaming PNG files to expected format...")
+        screenshot_paths = []
+        for i in range(page_count):
+            # ImageMagick generates slide_000.png, slide_001.png, etc.
+            source_file = f"slide_{i:03d}.png"
+            source_path = os.path.join(screenshots_dir, source_file)
+            
+            # We need slide_1.png, slide_2.png, etc.
+            target_file = f"slide_{i+1}.png"
+            target_path = os.path.join(screenshots_dir, target_file)
+            
+            if os.path.exists(source_path):
+                # Rename to expected format
+                shutil.move(source_path, target_path)
+                screenshot_paths.append(target_path)
+                print(f"✓ Renamed {source_file} to {target_file}")
+            else:
+                print(f"⚠ Warning: Expected file {source_file} not found, creating placeholder")
+                # Create empty placeholder
+                with open(target_path, 'w') as f:
+                    f.write("")
+                screenshot_paths.append(target_path)
+        
+        print(f"Successfully generated {len(screenshot_paths)} PDF page screenshots")
+        return screenshot_paths
+        
+    except Exception as e:
+        # Re-raise the specific exceptions we've already handled
+        if "timed out" in str(e) or "failed:" in str(e):
+            raise
+        # Handle any other unexpected exceptions
+        raise Exception(f"PDF screenshot generation failed: {str(e)}") 
--- a/servers/fastapi/api/v1/ppt/router.py
+++ b/servers/fastapi/api/v1/ppt/router.py
@ -1,16 +1,17 @@
 from fastapi import APIRouter

-from api.v1.ppt.endpoints.custom_llm import CUSTOM_LLM_ROUTER
+from api.v1.ppt.endpoints.slide_to_html import SLIDE_TO_HTML_ROUTER, HTML_TO_REACT_ROUTER, HTML_EDIT_ROUTER, LAYOUT_MANAGEMENT_ROUTER
+from api.v1.ppt.endpoints.presentation import PRESENTATION_ROUTER
 from api.v1.ppt.endpoints.files import FILES_ROUTER
+from api.v1.ppt.endpoints.custom_llm import CUSTOM_LLM_ROUTER
+from api.v1.ppt.endpoints.pptx_slides import PPTX_SLIDES_ROUTER
+from api.v1.ppt.endpoints.pdf_slides import PDF_SLIDES_ROUTER
 from api.v1.ppt.endpoints.fonts import FONTS_ROUTER
 from api.v1.ppt.endpoints.icons import ICONS_ROUTER
 from api.v1.ppt.endpoints.images import IMAGES_ROUTER
 from api.v1.ppt.endpoints.ollama import OLLAMA_ROUTER
 from api.v1.ppt.endpoints.outlines import OUTLINES_ROUTER
-from api.v1.ppt.endpoints.presentation import PRESENTATION_ROUTER
-from api.v1.ppt.endpoints.pptx_slides import PPTX_SLIDES_ROUTER
 from api.v1.ppt.endpoints.slide import SLIDE_ROUTER
-from api.v1.ppt.endpoints.slide_to_html import SLIDE_TO_HTML_ROUTER, HTML_TO_REACT_ROUTER, HTML_EDIT_ROUTER, LAYOUT_MANAGEMENT_ROUTER


 API_V1_PPT_ROUTER = APIRouter(prefix="/api/v1/ppt")
@ -29,6 +30,7 @@ API_V1_PPT_ROUTER.include_router(IMAGES_ROUTER)
 API_V1_PPT_ROUTER.include_router(ICONS_ROUTER)
 API_V1_PPT_ROUTER.include_router(OLLAMA_ROUTER)
 API_V1_PPT_ROUTER.include_router(CUSTOM_LLM_ROUTER)
+API_V1_PPT_ROUTER.include_router(PDF_SLIDES_ROUTER)