diff --git a/Dockerfile b/Dockerfile index 5bc2815b..3cfa3888 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,10 +6,7 @@ RUN apt-get update && apt-get install -y \ curl \ libreoffice \ fontconfig \ - chromium \ - imagemagick - -RUN sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /etc/ImageMagick-6/policy.xml + chromium # Install Node.js 20 using NodeSource repository diff --git a/Dockerfile.dev b/Dockerfile.dev index e7bc8d2c..7e71c71b 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -6,11 +6,7 @@ RUN apt-get update && apt-get install -y \ curl \ libreoffice \ fontconfig \ - chromium \ - imagemagick - -RUN sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /etc/ImageMagick-6/policy.xml - + chromium # Install Node.js 20 using NodeSource repository RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ diff --git a/README.md b/README.md index 0d9154f0..6e08b4cb 100644 --- a/README.md +++ b/README.md @@ -54,11 +54,10 @@ Presenton gives you complete control over your AI presentation workflow. Choose * ✅ **Docker Ready** — One-command deployment with GPU support for local models ## Presenton Cloud -We're launching Presenton Cloud which will make it very easy to create presentations through UI, API and MCP. Join our [waitlist](https://presenton.ai) for early beta. - -## Deploy on Cloud (one click deployment) -[![Deploy on Railway](https://railway.com/button.svg)](https://railway.com/deploy/presenton-ai-presentations?referralCode=ubp0kk) - + + + Presenton Logo + ## Running Presenton Docker diff --git a/readme_assets/cloud-banner.png b/readme_assets/cloud-banner.png new file mode 100644 index 00000000..3df54356 Binary files /dev/null and b/readme_assets/cloud-banner.png differ diff --git a/servers/fastapi/api/v1/ppt/endpoints/outlines.py b/servers/fastapi/api/v1/ppt/endpoints/outlines.py index fc1b8a92..d5ee6a2a 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/outlines.py +++ b/servers/fastapi/api/v1/ppt/endpoints/outlines.py @@ -64,7 +64,8 @@ async def stream_outlines( presentation.tone, presentation.verbosity, presentation.instructions, - True, + presentation.include_title_slide, + presentation.web_search, ): # Give control to the event loop await asyncio.sleep(0) diff --git a/servers/fastapi/api/v1/ppt/endpoints/pdf_slides.py b/servers/fastapi/api/v1/ppt/endpoints/pdf_slides.py index 0c1173d9..606cb12f 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/pdf_slides.py +++ b/servers/fastapi/api/v1/ppt/endpoints/pdf_slides.py @@ -6,6 +6,7 @@ from typing import List, Optional from fastapi import APIRouter, UploadFile, File, HTTPException from pydantic import BaseModel +from services.documents_loader import DocumentsLoader from utils.asset_directory_utils import get_images_directory import uuid from constants.documents import PDF_MIME_TYPES @@ -31,28 +32,32 @@ async def process_pdf_slides( ): """ Process a PDF file to extract slide screenshots. - + This endpoint: 1. Validates the uploaded PDF file 2. Uses ImageMagick to convert PDF pages to PNG images 3. Returns screenshot URLs for each slide/page - + Note: Font installation is not needed since PDFs already have fonts embedded. """ - + # Validate PDF file if pdf_file.content_type not in PDF_MIME_TYPES: raise HTTPException( status_code=400, - detail=f"Invalid file type. Expected PDF file, got {pdf_file.content_type}" + detail=f"Invalid file type. Expected PDF file, got {pdf_file.content_type}", ) # Enforce 100MB size limit - if hasattr(pdf_file, "size") and pdf_file.size and pdf_file.size > (100 * 1024 * 1024): + if ( + hasattr(pdf_file, "size") + and pdf_file.size + and pdf_file.size > (100 * 1024 * 1024) + ): raise HTTPException( status_code=400, detail="PDF file exceeded max upload size of 100 MB", ) - + # Create temporary directory for processing with tempfile.TemporaryDirectory() as temp_dir: try: @@ -61,120 +66,51 @@ async def process_pdf_slides( with open(pdf_path, "wb") as f: pdf_content = await pdf_file.read() f.write(pdf_content) - + # Generate screenshots from PDF using ImageMagick - screenshot_paths = await _generate_pdf_screenshots(pdf_path, temp_dir) + screenshot_paths = await DocumentsLoader.get_page_images_from_pdf_async( + pdf_path, temp_dir + ) print(f"Generated {len(screenshot_paths)} PDF screenshots") - + # Move screenshots to images directory and generate URLs images_dir = get_images_directory() presentation_id = uuid.uuid4() presentation_images_dir = os.path.join(images_dir, str(presentation_id)) os.makedirs(presentation_images_dir, exist_ok=True) - + slides_data = [] - + for i, screenshot_path in enumerate(screenshot_paths, 1): # Move screenshot to permanent location screenshot_filename = f"slide_{i}.png" - permanent_screenshot_path = os.path.join(presentation_images_dir, screenshot_filename) - - if os.path.exists(screenshot_path) and os.path.getsize(screenshot_path) > 0: + permanent_screenshot_path = os.path.join( + presentation_images_dir, screenshot_filename + ) + + if ( + os.path.exists(screenshot_path) + and os.path.getsize(screenshot_path) > 0 + ): # Use shutil.copy2 instead of os.rename to handle cross-device moves shutil.copy2(screenshot_path, permanent_screenshot_path) - screenshot_url = f"/app_data/images/{presentation_id}/{screenshot_filename}" + screenshot_url = ( + f"/app_data/images/{presentation_id}/{screenshot_filename}" + ) else: # Fallback if screenshot generation failed or file is empty placeholder screenshot_url = "/static/images/placeholder.jpg" - - slides_data.append(PdfSlideData( - slide_number=i, - screenshot_url=screenshot_url - )) - + + slides_data.append( + PdfSlideData(slide_number=i, screenshot_url=screenshot_url) + ) + return PdfSlidesResponse( - success=True, - slides=slides_data, - total_slides=len(slides_data) + success=True, slides=slides_data, total_slides=len(slides_data) ) - + except Exception as e: print(f"Error processing PDF slides: {str(e)}") raise HTTPException( - status_code=500, - detail=f"Failed to process PDF: {str(e)}" + status_code=500, detail=f"Failed to process PDF: {str(e)}" ) - - -async def _generate_pdf_screenshots(pdf_path: str, temp_dir: str) -> List[str]: - """Generate PNG screenshots of PDF pages using ImageMagick (same approach as PPTX endpoint).""" - screenshots_dir = os.path.join(temp_dir, "screenshots") - os.makedirs(screenshots_dir, exist_ok=True) - - try: - # Convert PDF to individual PNG images using ImageMagick - print("Starting ImageMagick PNG conversion...") - try: - result = subprocess.run([ - "convert", - "-density", "150", # Same DPI as PPTX endpoint - pdf_path, - os.path.join(screenshots_dir, "slide_%03d.png") - ], check=True, capture_output=True, text=True, timeout=500) - - print(f"ImageMagick conversion output: {result.stdout}") - if result.stderr: - print(f"ImageMagick conversion warnings: {result.stderr}") - - except subprocess.TimeoutExpired: - raise Exception("ImageMagick PNG conversion timed out after 500 seconds") - except subprocess.CalledProcessError as e: - error_msg = e.stderr if e.stderr else str(e) - raise Exception(f"ImageMagick PNG conversion failed: {error_msg}") - - # Find generated PNG files (ImageMagick creates slide_000.png, slide_001.png, etc.) - print("Checking for generated PNG files...") - png_files = sorted([f for f in os.listdir(screenshots_dir) if f.startswith("slide_") and f.endswith('.png')]) - print(f"Generated PNG files: {png_files}") - - if not png_files: - raise Exception("ImageMagick failed to generate any PNG files") - - # Determine page count from generated files - page_count = len(png_files) - print(f"Determined {page_count} pages from ImageMagick output") - - # Rename files from slide_000.png format to slide_1.png format expected by the API - # (Same renaming logic as PPTX endpoint) - print("Renaming PNG files to expected format...") - screenshot_paths = [] - for i in range(page_count): - # ImageMagick generates slide_000.png, slide_001.png, etc. - source_file = f"slide_{i:03d}.png" - source_path = os.path.join(screenshots_dir, source_file) - - # We need slide_1.png, slide_2.png, etc. - target_file = f"slide_{i+1}.png" - target_path = os.path.join(screenshots_dir, target_file) - - if os.path.exists(source_path): - # Rename to expected format - shutil.move(source_path, target_path) - screenshot_paths.append(target_path) - print(f"✓ Renamed {source_file} to {target_file}") - else: - print(f"⚠ Warning: Expected file {source_file} not found, creating placeholder") - # Create empty placeholder - with open(target_path, 'w') as f: - f.write("") - screenshot_paths.append(target_path) - - print(f"Successfully generated {len(screenshot_paths)} PDF page screenshots") - return screenshot_paths - - except Exception as e: - # Re-raise the specific exceptions we've already handled - if "timed out" in str(e) or "failed:" in str(e): - raise - # Handle any other unexpected exceptions - raise Exception(f"PDF screenshot generation failed: {str(e)}") \ No newline at end of file diff --git a/servers/fastapi/api/v1/ppt/endpoints/pptx_slides.py b/servers/fastapi/api/v1/ppt/endpoints/pptx_slides.py index 33e38aec..b4c4acae 100644 --- a/servers/fastapi/api/v1/ppt/endpoints/pptx_slides.py +++ b/servers/fastapi/api/v1/ppt/endpoints/pptx_slides.py @@ -12,6 +12,7 @@ import asyncio import xml.etree.ElementTree as ET import re +from services.documents_loader import DocumentsLoader from utils.asset_directory_utils import get_images_directory import uuid from constants.documents import POWERPOINT_TYPES @@ -28,7 +29,9 @@ class SlideData(BaseModel): class FontAnalysisResult(BaseModel): - internally_supported_fonts: List[Dict[str, str]] # [{"name": "Open Sans", "google_fonts_url": "..."}] + internally_supported_fonts: List[ + Dict[str, str] + ] # [{"name": "Open Sans", "google_fonts_url": "..."}] not_supported_fonts: List[str] # ["Custom Font Name"] @@ -38,30 +41,62 @@ class PptxSlidesResponse(BaseModel): total_slides: int fonts: Optional[FontAnalysisResult] = None + # NEW: Fonts-only router and response for PPTX class PptxFontsResponse(BaseModel): success: bool fonts: FontAnalysisResult + PPTX_FONTS_ROUTER = APIRouter(prefix="/pptx-fonts", tags=["PPTX Fonts"]) # NEW: Normalize font family names by removing style/weight/stretch descriptors and splitting camel case _STYLE_TOKENS = { # styles - "italic", "italics", "ital", "oblique", "roman", + "italic", + "italics", + "ital", + "oblique", + "roman", # combined style shortcuts - "bolditalic", "bolditalics", + "bolditalic", + "bolditalics", # weights - "thin", "hairline", "extralight", "ultralight", "light", "demilight", "semilight", "book", - "regular", "normal", "medium", "semibold", "demibold", "bold", "extrabold", "ultrabold", - "black", "extrablack", "ultrablack", "heavy", + "thin", + "hairline", + "extralight", + "ultralight", + "light", + "demilight", + "semilight", + "book", + "regular", + "normal", + "medium", + "semibold", + "demibold", + "bold", + "extrabold", + "ultrabold", + "black", + "extrablack", + "ultrablack", + "heavy", # width/stretch - "narrow", "condensed", "semicondensed", "extracondensed", "ultracondensed", - "expanded", "semiexpanded", "extraexpanded", "ultraexpanded", + "narrow", + "condensed", + "semicondensed", + "extracondensed", + "ultracondensed", + "expanded", + "semiexpanded", + "extraexpanded", + "ultraexpanded", } # Modifiers commonly used with style tokens _STYLE_MODIFIERS = {"semi", "demi", "extra", "ultra"} + def _insert_spaces_in_camel_case(value: str) -> str: # Insert space before capital letters preceded by lowercase or digits (e.g., MontserratBold -> Montserrat Bold) value = re.sub(r"(?<=[a-z0-9])([A-Z])", r" \1", value) @@ -69,6 +104,7 @@ def _insert_spaces_in_camel_case(value: str) -> str: value = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", value) return value + def normalize_font_family_name(raw_name: str) -> str: if not raw_name: return raw_name @@ -111,69 +147,69 @@ def normalize_font_family_name(raw_name: str) -> str: def extract_fonts_from_oxml(xml_content: str) -> List[str]: """ Extract font names from OXML content. - + Args: xml_content: OXML content as string - + Returns: List of unique font names found in the OXML """ fonts = set() - + try: # Parse the XML content root = ET.fromstring(xml_content) - + # Define namespaces commonly used in OXML namespaces = { - 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main', - 'p': 'http://schemas.openxmlformats.org/presentationml/2006/main', - 'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' + "a": "http://schemas.openxmlformats.org/drawingml/2006/main", + "p": "http://schemas.openxmlformats.org/presentationml/2006/main", + "r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships", } - + # Search for font references in various OXML elements # Look for latin fonts - for font_elem in root.findall('.//a:latin', namespaces): - if 'typeface' in font_elem.attrib: - fonts.add(font_elem.attrib['typeface']) - + for font_elem in root.findall(".//a:latin", namespaces): + if "typeface" in font_elem.attrib: + fonts.add(font_elem.attrib["typeface"]) + # Look for east asian fonts - for font_elem in root.findall('.//a:ea', namespaces): - if 'typeface' in font_elem.attrib: - fonts.add(font_elem.attrib['typeface']) - + for font_elem in root.findall(".//a:ea", namespaces): + if "typeface" in font_elem.attrib: + fonts.add(font_elem.attrib["typeface"]) + # Look for complex script fonts - for font_elem in root.findall('.//a:cs', namespaces): - if 'typeface' in font_elem.attrib: - fonts.add(font_elem.attrib['typeface']) - + for font_elem in root.findall(".//a:cs", namespaces): + if "typeface" in font_elem.attrib: + fonts.add(font_elem.attrib["typeface"]) + # Look for font references in theme elements - for font_elem in root.findall('.//a:font', namespaces): - if 'typeface' in font_elem.attrib: - fonts.add(font_elem.attrib['typeface']) - + for font_elem in root.findall(".//a:font", namespaces): + if "typeface" in font_elem.attrib: + fonts.add(font_elem.attrib["typeface"]) + # Look for rPr (run properties) font references - for rpr_elem in root.findall('.//a:rPr', namespaces): - for font_elem in rpr_elem.findall('.//a:latin', namespaces): - if 'typeface' in font_elem.attrib: - fonts.add(font_elem.attrib['typeface']) - + for rpr_elem in root.findall(".//a:rPr", namespaces): + for font_elem in rpr_elem.findall(".//a:latin", namespaces): + if "typeface" in font_elem.attrib: + fonts.add(font_elem.attrib["typeface"]) + # Also search without namespace prefix for compatibility - for font_elem in root.findall('.//latin'): - if 'typeface' in font_elem.attrib: - fonts.add(font_elem.attrib['typeface']) - + for font_elem in root.findall(".//latin"): + if "typeface" in font_elem.attrib: + fonts.add(font_elem.attrib["typeface"]) + # Regex fallback for fonts that might be missed font_pattern = r'typeface="([^"]+)"' regex_fonts = re.findall(font_pattern, xml_content) fonts.update(regex_fonts) - + # Filter out system fonts and empty values - system_fonts = {'+mn-lt', '+mj-lt', '+mn-ea', '+mj-ea', '+mn-cs', '+mj-cs', ''} + system_fonts = {"+mn-lt", "+mj-lt", "+mn-ea", "+mj-ea", "+mn-cs", "+mj-cs", ""} fonts = {font for font in fonts if font not in system_fonts and font.strip()} - + return list(fonts) - + except Exception as e: print(f"Error extracting fonts from OXML: {e}") return [] @@ -182,21 +218,23 @@ def extract_fonts_from_oxml(xml_content: str) -> List[str]: async def check_google_font_availability(font_name: str) -> bool: """ Check if a font is available in Google Fonts. - + Args: font_name: Name of the font to check - + Returns: True if font is available in Google Fonts, False otherwise """ try: - formatted_name = font_name.replace(' ', '+') + formatted_name = font_name.replace(" ", "+") url = f"https://fonts.googleapis.com/css2?family={formatted_name}&display=swap" - + async with aiohttp.ClientSession() as session: - async with session.head(url, timeout=aiohttp.ClientTimeout(total=10)) as response: + async with session.head( + url, timeout=aiohttp.ClientTimeout(total=10) + ) as response: return response.status == 200 - + except Exception as e: print(f"Error checking Google Font availability for {font_name}: {e}") return False @@ -205,10 +243,10 @@ async def check_google_font_availability(font_name: str) -> bool: async def analyze_fonts_in_all_slides(slide_xmls: List[str]) -> FontAnalysisResult: """ Analyze fonts across all slides and determine Google Fonts availability. - + Args: slide_xmls: List of OXML content strings from all slides - + Returns: FontAnalysisResult with supported and unsupported fonts """ @@ -222,45 +260,40 @@ async def analyze_fonts_in_all_slides(slide_xmls: List[str]) -> FontAnalysisResu normalized_fonts = {normalize_font_family_name(f) for f in raw_fonts} # Remove empties if any normalized_fonts = {f for f in normalized_fonts if f} - + if not normalized_fonts: - return FontAnalysisResult( - internally_supported_fonts=[], - not_supported_fonts=[] - ) - + return FontAnalysisResult(internally_supported_fonts=[], not_supported_fonts=[]) + # Check each normalized font's availability in Google Fonts concurrently tasks = [check_google_font_availability(font) for font in normalized_fonts] results = await asyncio.gather(*tasks) - + internally_supported_fonts = [] not_supported_fonts = [] - + for font, is_available in zip(normalized_fonts, results): if is_available: - formatted_name = font.replace(' ', '+') + formatted_name = font.replace(" ", "+") google_fonts_url = f"https://fonts.googleapis.com/css2?family={formatted_name}&display=swap" - internally_supported_fonts.append({ - "name": font, - "google_fonts_url": google_fonts_url - }) + internally_supported_fonts.append( + {"name": font, "google_fonts_url": google_fonts_url} + ) else: not_supported_fonts.append(font) - + return FontAnalysisResult( - internally_supported_fonts=internally_supported_fonts, - not_supported_fonts=[] + internally_supported_fonts=internally_supported_fonts, not_supported_fonts=[] ) @PPTX_SLIDES_ROUTER.post("/process", response_model=PptxSlidesResponse) async def process_pptx_slides( pptx_file: UploadFile = File(..., description="PPTX file to process"), - fonts: Optional[List[UploadFile]] = File(None, description="Optional font files") + fonts: Optional[List[UploadFile]] = File(None, description="Optional font files"), ): """ Process a PPTX file to extract slide screenshots and XML content. - + This endpoint: 1. Validates the uploaded PPTX file 2. Installs any provided font files @@ -268,20 +301,24 @@ async def process_pptx_slides( 4. Uses LibreOffice to generate slide screenshots 5. Returns both screenshot URLs and XML content for each slide """ - + # Validate PPTX file if pptx_file.content_type not in POWERPOINT_TYPES: raise HTTPException( status_code=400, - detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}" + detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}", ) # Enforce 100MB size limit - if hasattr(pptx_file, "size") and pptx_file.size and pptx_file.size > (100 * 1024 * 1024): + if ( + hasattr(pptx_file, "size") + and pptx_file.size + and pptx_file.size > (100 * 1024 * 1024) + ): raise HTTPException( status_code=400, detail="PPTX file exceeded max upload size of 100 MB", ) - + # Create temporary directory for processing with tempfile.TemporaryDirectory() as temp_dir: if True: @@ -290,61 +327,82 @@ async def process_pptx_slides( with open(pptx_path, "wb") as f: pptx_content = await pptx_file.read() f.write(pptx_content) - + # Install fonts if provided if fonts: await _install_fonts(fonts, temp_dir) - + # Extract slide XMLs from PPTX slide_xmls = _extract_slide_xmls(pptx_path, temp_dir) - + + # Convert PPTX to PDF + pdf_path = await _convert_pptx_to_pdf(pptx_path, temp_dir) + # Generate screenshots using LibreOffice - screenshot_paths = await _generate_screenshots(pptx_path, temp_dir) + screenshot_paths = await DocumentsLoader.get_page_images_from_pdf_async( + pdf_path, temp_dir + ) print(f"Screenshot paths: {screenshot_paths}") - + # Analyze fonts across all slides font_analysis = await analyze_fonts_in_all_slides(slide_xmls) - print(f"Font analysis completed: {len(font_analysis.internally_supported_fonts)} supported, {len(font_analysis.not_supported_fonts)} not supported") - + print( + f"Font analysis completed: {len(font_analysis.internally_supported_fonts)} supported, {len(font_analysis.not_supported_fonts)} not supported" + ) + # Move screenshots to images directory and generate URLs images_dir = get_images_directory() presentation_id = uuid.uuid4() presentation_images_dir = os.path.join(images_dir, str(presentation_id)) os.makedirs(presentation_images_dir, exist_ok=True) - + slides_data = [] - - for i, (xml_content, screenshot_path) in enumerate(zip(slide_xmls, screenshot_paths), 1): + + for i, (xml_content, screenshot_path) in enumerate( + zip(slide_xmls, screenshot_paths), 1 + ): # Move screenshot to permanent location screenshot_filename = f"slide_{i}.png" - permanent_screenshot_path = os.path.join(presentation_images_dir, screenshot_filename) - - if os.path.exists(screenshot_path) and os.path.getsize(screenshot_path) > 0: + permanent_screenshot_path = os.path.join( + presentation_images_dir, screenshot_filename + ) + + if ( + os.path.exists(screenshot_path) + and os.path.getsize(screenshot_path) > 0 + ): # Use shutil.copy2 instead of os.rename to handle cross-device moves shutil.copy2(screenshot_path, permanent_screenshot_path) - screenshot_url = f"/app_data/images/{presentation_id}/{screenshot_filename}" + screenshot_url = ( + f"/app_data/images/{presentation_id}/{screenshot_filename}" + ) else: # Fallback if screenshot generation failed or file is empty placeholder screenshot_url = "/static/images/placeholder.jpg" - + # Compute normalized fonts for this slide raw_slide_fonts = extract_fonts_from_oxml(xml_content) - normalized_fonts = sorted({normalize_font_family_name(f) for f in raw_slide_fonts if f}) - - slides_data.append(SlideData( - slide_number=i, - screenshot_url=screenshot_url, - xml_content=xml_content, - normalized_fonts=normalized_fonts - )) - + normalized_fonts = sorted( + {normalize_font_family_name(f) for f in raw_slide_fonts if f} + ) + + slides_data.append( + SlideData( + slide_number=i, + screenshot_url=screenshot_url, + xml_content=xml_content, + normalized_fonts=normalized_fonts, + ) + ) + return PptxSlidesResponse( success=True, slides=slides_data, total_slides=len(slides_data), - fonts=font_analysis + fonts=font_analysis, ) + # NEW: Fonts-only endpoint leveraging the same font extraction/analysis @PPTX_FONTS_ROUTER.post("/process", response_model=PptxFontsResponse) async def process_pptx_fonts( @@ -359,7 +417,7 @@ async def process_pptx_fonts( if pptx_file.content_type not in POWERPOINT_TYPES: raise HTTPException( status_code=400, - detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}" + detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}", ) # Create temporary directory for processing @@ -381,6 +439,7 @@ async def process_pptx_fonts( fonts=font_analysis, ) + def _create_font_alias_config(raw_fonts: List[str]) -> str: """Create a temporary fontconfig configuration that aliases variant family names to normalized root families. Returns the path to the config file. @@ -395,13 +454,16 @@ def _create_font_alias_config(raw_fonts: List[str]) -> str: fd, fonts_conf_path = tempfile.mkstemp(prefix="fonts_alias_", suffix=".conf") os.close(fd) with open(fonts_conf_path, "w", encoding="utf-8") as cfg: - cfg.write(""" + cfg.write( + """ /etc/fonts/fonts.conf -""") +""" + ) for src, dst in mappings.items(): - cfg.write(f""" + cfg.write( + f""" {src} @@ -410,30 +472,34 @@ def _create_font_alias_config(raw_fonts: List[str]) -> str: {dst} -""") +""" + ) cfg.write("\n\n") return fonts_conf_path + async def _install_fonts(fonts: List[UploadFile], temp_dir: str) -> None: """Install provided font files to the system.""" fonts_dir = os.path.join(temp_dir, "fonts") os.makedirs(fonts_dir, exist_ok=True) - + for font_file in fonts: # Save font file font_path = os.path.join(fonts_dir, font_file.filename) with open(font_path, "wb") as f: font_content = await font_file.read() f.write(font_content) - + # Install font (copy to system fonts directory) try: - subprocess.run([ - "cp", font_path, "/usr/share/fonts/truetype/" - ], check=True, capture_output=True) + subprocess.run( + ["cp", font_path, "/usr/share/fonts/truetype/"], + check=True, + capture_output=True, + ) except subprocess.CalledProcessError as e: print(f"Warning: Failed to install font {font_file.filename}: {e}") - + # Refresh font cache try: subprocess.run(["fc-cache", "-f", "-v"], check=True, capture_output=True) @@ -445,44 +511,48 @@ def _extract_slide_xmls(pptx_path: str, temp_dir: str) -> List[str]: """Extract slide XML content from PPTX file.""" slide_xmls = [] extract_dir = os.path.join(temp_dir, "pptx_extract") - + try: # Unzip PPTX file - with zipfile.ZipFile(pptx_path, 'r') as zip_ref: + with zipfile.ZipFile(pptx_path, "r") as zip_ref: zip_ref.extractall(extract_dir) - + # Look for slides in ppt/slides/ directory slides_dir = os.path.join(extract_dir, "ppt", "slides") - + if not os.path.exists(slides_dir): raise Exception("No slides directory found in PPTX file") - + # Get all slide XML files and sort them numerically - slide_files = [f for f in os.listdir(slides_dir) if f.startswith("slide") and f.endswith(".xml")] + slide_files = [ + f + for f in os.listdir(slides_dir) + if f.startswith("slide") and f.endswith(".xml") + ] slide_files.sort(key=lambda x: int(x.replace("slide", "").replace(".xml", ""))) - + # Read XML content from each slide for slide_file in slide_files: slide_path = os.path.join(slides_dir, slide_file) - with open(slide_path, 'r', encoding='utf-8') as f: + with open(slide_path, "r", encoding="utf-8") as f: slide_xmls.append(f.read()) - + return slide_xmls - + except Exception as e: raise Exception(f"Failed to extract slide XMLs: {str(e)}") -async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]: +async def _convert_pptx_to_pdf(pptx_path: str, temp_dir: str) -> str: """Generate PNG screenshots of PPTX slides using LibreOffice + ImageMagick.""" screenshots_dir = os.path.join(temp_dir, "screenshots") os.makedirs(screenshots_dir, exist_ok=True) - + try: # First, get the number of slides by extracting XMLs slide_xmls = _extract_slide_xmls(pptx_path, temp_dir) slide_count = len(slide_xmls) - + # Build font alias config to force variant families to resolve to normalized root families raw_fonts: List[str] = [] for xml in slide_xmls: @@ -491,23 +561,32 @@ async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]: fonts_conf_path = _create_font_alias_config(raw_fonts) env = os.environ.copy() env["FONTCONFIG_FILE"] = fonts_conf_path - + print(f"Found {slide_count} slides in presentation") - + # Step 1: Convert PPTX to PDF using LibreOffice print("Starting LibreOffice PDF conversion...") pdf_filename = "temp_presentation.pdf" pdf_path = os.path.join(screenshots_dir, pdf_filename) - + try: - result = subprocess.run([ - "libreoffice", - "--headless", - "--convert-to", "pdf", - "--outdir", screenshots_dir, - pptx_path - ], check=True, capture_output=True, text=True, timeout=500, env=env) - + result = subprocess.run( + [ + "libreoffice", + "--headless", + "--convert-to", + "pdf", + "--outdir", + screenshots_dir, + pptx_path, + ], + check=True, + capture_output=True, + text=True, + timeout=500, + env=env, + ) + print(f"LibreOffice PDF conversion output: {result.stdout}") if result.stderr: print(f"LibreOffice PDF conversion warnings: {result.stderr}") @@ -516,74 +595,19 @@ async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]: except subprocess.CalledProcessError as e: error_msg = e.stderr if e.stderr else str(e) raise Exception(f"LibreOffice PDF conversion failed: {error_msg}") - + # Find the generated PDF file (LibreOffice uses original filename) - pdf_files = [f for f in os.listdir(screenshots_dir) if f.endswith('.pdf')] + pdf_files = [f for f in os.listdir(screenshots_dir) if f.endswith(".pdf")] if not pdf_files: raise Exception("LibreOffice failed to generate PDF file") - + actual_pdf_path = os.path.join(screenshots_dir, pdf_files[0]) print(f"Generated PDF: {actual_pdf_path}") - - # Step 2: Convert PDF to individual PNG images using ImageMagick - print("Starting ImageMagick PNG conversion...") - try: - result = subprocess.run([ - "convert", - "-density", "150", - actual_pdf_path, - os.path.join(screenshots_dir, "slide_%03d.png") - ], check=True, capture_output=True, text=True, timeout=500, env=env) - - print(f"ImageMagick conversion output: {result.stdout}") - if result.stderr: - print(f"ImageMagick conversion warnings: {result.stderr}") - except subprocess.TimeoutExpired: - raise Exception("ImageMagick PNG conversion timed out after 120 seconds") - except subprocess.CalledProcessError as e: - error_msg = e.stderr if e.stderr else str(e) - raise Exception(f"ImageMagick PNG conversion failed: {error_msg}") - - # Find generated PNG files (ImageMagick creates slide_000.png, slide_001.png, etc.) - print("Checking for generated PNG files...") - png_files = sorted([f for f in os.listdir(screenshots_dir) if f.startswith("slide_") and f.endswith('.png')]) - print(f"Generated PNG files: {png_files}") - - if not png_files: - raise Exception("ImageMagick failed to generate any PNG files") - - # Rename files from slide_000.png format to slide_1.png format expected by the API - print("Renaming PNG files to expected format...") - screenshot_paths = [] - for i in range(slide_count): - # ImageMagick generates slide_000.png, slide_001.png, etc. - source_file = f"slide_{i:03d}.png" - source_path = os.path.join(screenshots_dir, source_file) - - # We need slide_1.png, slide_2.png, etc. - target_file = f"slide_{i+1}.png" - target_path = os.path.join(screenshots_dir, target_file) - - if os.path.exists(source_path): - # Rename to expected format - shutil.move(source_path, target_path) - screenshot_paths.append(target_path) - print(f"✓ Renamed {source_file} to {target_file}") - else: - print(f"⚠ Warning: Expected file {source_file} not found, creating placeholder") - # Create empty placeholder - with open(target_path, 'w') as f: - f.write("") - screenshot_paths.append(target_path) - - print(f"Successfully generated {len(screenshot_paths)} slide screenshots") - return screenshot_paths - + return actual_pdf_path + except Exception as e: # Re-raise the specific exceptions we've already handled if "timed out" in str(e) or "failed:" in str(e): raise # Handle any other unexpected exceptions raise Exception(f"Screenshot generation failed: {str(e)}") - - \ No newline at end of file diff --git a/servers/fastapi/services/documents_loader.py b/servers/fastapi/services/documents_loader.py index 4fdcbbdf..9556f5b2 100644 --- a/servers/fastapi/services/documents_loader.py +++ b/servers/fastapi/services/documents_loader.py @@ -95,13 +95,15 @@ class DocumentsLoader: def load_powerpoint(self, file_path: str) -> str: return self.docling_service.parse_to_markdown(file_path) - def get_page_images_from_pdf(self, file_path: str, temp_dir: str): + @classmethod + def get_page_images_from_pdf(cls, file_path: str, temp_dir: str): with pdfplumber.open(file_path) as pdf: for page in pdf.pages: - img = page.to_image(resolution=300) + img = page.to_image(resolution=150) img.save(os.path.join(temp_dir, f"page_{page.page_number}.png")) - async def get_page_images_from_pdf_async(self, file_path: str, temp_dir: str): + @classmethod + async def get_page_images_from_pdf_async(cls, file_path: str, temp_dir: str): return await asyncio.to_thread( - self.get_page_images_from_pdf, file_path, temp_dir + cls.get_page_images_from_pdf, file_path, temp_dir ) diff --git a/servers/fastapi/utils/process_slides.py b/servers/fastapi/utils/process_slides.py index b3c605fa..87aa3e21 100644 --- a/servers/fastapi/utils/process_slides.py +++ b/servers/fastapi/utils/process_slides.py @@ -182,5 +182,5 @@ def process_slide_add_placeholder_assets(slide: SlideModel): for icon_path in icon_paths: icon_dict = get_dict_at_path(slide.content, icon_path) - icon_dict["__icon_url__"] = "/static/icons/placeholder.png" + icon_dict["__icon_url__"] = "/static/icons/placeholder.svg" set_dict_at_path(slide.content, icon_path, icon_dict)