Merge branch 'development' of github.com:presenton/presenton into development

This commit is contained in:
shiva raj badu 2025-09-11 23:37:55 +05:45
commit 79cf37f168
No known key found for this signature in database
9 changed files with 271 additions and 316 deletions

View file

@ -6,10 +6,7 @@ RUN apt-get update && apt-get install -y \
curl \
libreoffice \
fontconfig \
chromium \
imagemagick
RUN sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /etc/ImageMagick-6/policy.xml
chromium
# Install Node.js 20 using NodeSource repository

View file

@ -6,11 +6,7 @@ RUN apt-get update && apt-get install -y \
curl \
libreoffice \
fontconfig \
chromium \
imagemagick
RUN sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /etc/ImageMagick-6/policy.xml
chromium
# Install Node.js 20 using NodeSource repository
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \

View file

@ -54,11 +54,10 @@ Presenton gives you complete control over your AI presentation workflow. Choose
* ✅ **Docker Ready** — One-command deployment with GPU support for local models
## Presenton Cloud
We're launching Presenton Cloud which will make it very easy to create presentations through UI, API and MCP. Join our [waitlist](https://presenton.ai) for early beta.
## Deploy on Cloud (one click deployment)
[![Deploy on Railway](https://railway.com/button.svg)](https://railway.com/deploy/presenton-ai-presentations?referralCode=ubp0kk)
<a href="https://presenton.ai" target="_blank" align="center">
<img src="readme_assets/cloud-banner.png" height="350" alt="Presenton Logo" />
</a>
## Running Presenton Docker

Binary file not shown.

After

Width:  |  Height:  |  Size: 437 KiB

View file

@ -64,7 +64,8 @@ async def stream_outlines(
presentation.tone,
presentation.verbosity,
presentation.instructions,
True,
presentation.include_title_slide,
presentation.web_search,
):
# Give control to the event loop
await asyncio.sleep(0)

View file

@ -6,6 +6,7 @@ from typing import List, Optional
from fastapi import APIRouter, UploadFile, File, HTTPException
from pydantic import BaseModel
from services.documents_loader import DocumentsLoader
from utils.asset_directory_utils import get_images_directory
import uuid
from constants.documents import PDF_MIME_TYPES
@ -31,28 +32,32 @@ async def process_pdf_slides(
):
"""
Process a PDF file to extract slide screenshots.
This endpoint:
1. Validates the uploaded PDF file
2. Uses ImageMagick to convert PDF pages to PNG images
3. Returns screenshot URLs for each slide/page
Note: Font installation is not needed since PDFs already have fonts embedded.
"""
# Validate PDF file
if pdf_file.content_type not in PDF_MIME_TYPES:
raise HTTPException(
status_code=400,
detail=f"Invalid file type. Expected PDF file, got {pdf_file.content_type}"
detail=f"Invalid file type. Expected PDF file, got {pdf_file.content_type}",
)
# Enforce 100MB size limit
if hasattr(pdf_file, "size") and pdf_file.size and pdf_file.size > (100 * 1024 * 1024):
if (
hasattr(pdf_file, "size")
and pdf_file.size
and pdf_file.size > (100 * 1024 * 1024)
):
raise HTTPException(
status_code=400,
detail="PDF file exceeded max upload size of 100 MB",
)
# Create temporary directory for processing
with tempfile.TemporaryDirectory() as temp_dir:
try:
@ -61,120 +66,51 @@ async def process_pdf_slides(
with open(pdf_path, "wb") as f:
pdf_content = await pdf_file.read()
f.write(pdf_content)
# Generate screenshots from PDF using ImageMagick
screenshot_paths = await _generate_pdf_screenshots(pdf_path, temp_dir)
screenshot_paths = await DocumentsLoader.get_page_images_from_pdf_async(
pdf_path, temp_dir
)
print(f"Generated {len(screenshot_paths)} PDF screenshots")
# Move screenshots to images directory and generate URLs
images_dir = get_images_directory()
presentation_id = uuid.uuid4()
presentation_images_dir = os.path.join(images_dir, str(presentation_id))
os.makedirs(presentation_images_dir, exist_ok=True)
slides_data = []
for i, screenshot_path in enumerate(screenshot_paths, 1):
# Move screenshot to permanent location
screenshot_filename = f"slide_{i}.png"
permanent_screenshot_path = os.path.join(presentation_images_dir, screenshot_filename)
if os.path.exists(screenshot_path) and os.path.getsize(screenshot_path) > 0:
permanent_screenshot_path = os.path.join(
presentation_images_dir, screenshot_filename
)
if (
os.path.exists(screenshot_path)
and os.path.getsize(screenshot_path) > 0
):
# Use shutil.copy2 instead of os.rename to handle cross-device moves
shutil.copy2(screenshot_path, permanent_screenshot_path)
screenshot_url = f"/app_data/images/{presentation_id}/{screenshot_filename}"
screenshot_url = (
f"/app_data/images/{presentation_id}/{screenshot_filename}"
)
else:
# Fallback if screenshot generation failed or file is empty placeholder
screenshot_url = "/static/images/placeholder.jpg"
slides_data.append(PdfSlideData(
slide_number=i,
screenshot_url=screenshot_url
))
slides_data.append(
PdfSlideData(slide_number=i, screenshot_url=screenshot_url)
)
return PdfSlidesResponse(
success=True,
slides=slides_data,
total_slides=len(slides_data)
success=True, slides=slides_data, total_slides=len(slides_data)
)
except Exception as e:
print(f"Error processing PDF slides: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"Failed to process PDF: {str(e)}"
status_code=500, detail=f"Failed to process PDF: {str(e)}"
)
async def _generate_pdf_screenshots(pdf_path: str, temp_dir: str) -> List[str]:
"""Generate PNG screenshots of PDF pages using ImageMagick (same approach as PPTX endpoint)."""
screenshots_dir = os.path.join(temp_dir, "screenshots")
os.makedirs(screenshots_dir, exist_ok=True)
try:
# Convert PDF to individual PNG images using ImageMagick
print("Starting ImageMagick PNG conversion...")
try:
result = subprocess.run([
"convert",
"-density", "150", # Same DPI as PPTX endpoint
pdf_path,
os.path.join(screenshots_dir, "slide_%03d.png")
], check=True, capture_output=True, text=True, timeout=500)
print(f"ImageMagick conversion output: {result.stdout}")
if result.stderr:
print(f"ImageMagick conversion warnings: {result.stderr}")
except subprocess.TimeoutExpired:
raise Exception("ImageMagick PNG conversion timed out after 500 seconds")
except subprocess.CalledProcessError as e:
error_msg = e.stderr if e.stderr else str(e)
raise Exception(f"ImageMagick PNG conversion failed: {error_msg}")
# Find generated PNG files (ImageMagick creates slide_000.png, slide_001.png, etc.)
print("Checking for generated PNG files...")
png_files = sorted([f for f in os.listdir(screenshots_dir) if f.startswith("slide_") and f.endswith('.png')])
print(f"Generated PNG files: {png_files}")
if not png_files:
raise Exception("ImageMagick failed to generate any PNG files")
# Determine page count from generated files
page_count = len(png_files)
print(f"Determined {page_count} pages from ImageMagick output")
# Rename files from slide_000.png format to slide_1.png format expected by the API
# (Same renaming logic as PPTX endpoint)
print("Renaming PNG files to expected format...")
screenshot_paths = []
for i in range(page_count):
# ImageMagick generates slide_000.png, slide_001.png, etc.
source_file = f"slide_{i:03d}.png"
source_path = os.path.join(screenshots_dir, source_file)
# We need slide_1.png, slide_2.png, etc.
target_file = f"slide_{i+1}.png"
target_path = os.path.join(screenshots_dir, target_file)
if os.path.exists(source_path):
# Rename to expected format
shutil.move(source_path, target_path)
screenshot_paths.append(target_path)
print(f"✓ Renamed {source_file} to {target_file}")
else:
print(f"⚠ Warning: Expected file {source_file} not found, creating placeholder")
# Create empty placeholder
with open(target_path, 'w') as f:
f.write("")
screenshot_paths.append(target_path)
print(f"Successfully generated {len(screenshot_paths)} PDF page screenshots")
return screenshot_paths
except Exception as e:
# Re-raise the specific exceptions we've already handled
if "timed out" in str(e) or "failed:" in str(e):
raise
# Handle any other unexpected exceptions
raise Exception(f"PDF screenshot generation failed: {str(e)}")

View file

@ -12,6 +12,7 @@ import asyncio
import xml.etree.ElementTree as ET
import re
from services.documents_loader import DocumentsLoader
from utils.asset_directory_utils import get_images_directory
import uuid
from constants.documents import POWERPOINT_TYPES
@ -28,7 +29,9 @@ class SlideData(BaseModel):
class FontAnalysisResult(BaseModel):
internally_supported_fonts: List[Dict[str, str]] # [{"name": "Open Sans", "google_fonts_url": "..."}]
internally_supported_fonts: List[
Dict[str, str]
] # [{"name": "Open Sans", "google_fonts_url": "..."}]
not_supported_fonts: List[str] # ["Custom Font Name"]
@ -38,30 +41,62 @@ class PptxSlidesResponse(BaseModel):
total_slides: int
fonts: Optional[FontAnalysisResult] = None
# NEW: Fonts-only router and response for PPTX
class PptxFontsResponse(BaseModel):
success: bool
fonts: FontAnalysisResult
PPTX_FONTS_ROUTER = APIRouter(prefix="/pptx-fonts", tags=["PPTX Fonts"])
# NEW: Normalize font family names by removing style/weight/stretch descriptors and splitting camel case
_STYLE_TOKENS = {
# styles
"italic", "italics", "ital", "oblique", "roman",
"italic",
"italics",
"ital",
"oblique",
"roman",
# combined style shortcuts
"bolditalic", "bolditalics",
"bolditalic",
"bolditalics",
# weights
"thin", "hairline", "extralight", "ultralight", "light", "demilight", "semilight", "book",
"regular", "normal", "medium", "semibold", "demibold", "bold", "extrabold", "ultrabold",
"black", "extrablack", "ultrablack", "heavy",
"thin",
"hairline",
"extralight",
"ultralight",
"light",
"demilight",
"semilight",
"book",
"regular",
"normal",
"medium",
"semibold",
"demibold",
"bold",
"extrabold",
"ultrabold",
"black",
"extrablack",
"ultrablack",
"heavy",
# width/stretch
"narrow", "condensed", "semicondensed", "extracondensed", "ultracondensed",
"expanded", "semiexpanded", "extraexpanded", "ultraexpanded",
"narrow",
"condensed",
"semicondensed",
"extracondensed",
"ultracondensed",
"expanded",
"semiexpanded",
"extraexpanded",
"ultraexpanded",
}
# Modifiers commonly used with style tokens
_STYLE_MODIFIERS = {"semi", "demi", "extra", "ultra"}
def _insert_spaces_in_camel_case(value: str) -> str:
# Insert space before capital letters preceded by lowercase or digits (e.g., MontserratBold -> Montserrat Bold)
value = re.sub(r"(?<=[a-z0-9])([A-Z])", r" \1", value)
@ -69,6 +104,7 @@ def _insert_spaces_in_camel_case(value: str) -> str:
value = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", value)
return value
def normalize_font_family_name(raw_name: str) -> str:
if not raw_name:
return raw_name
@ -111,69 +147,69 @@ def normalize_font_family_name(raw_name: str) -> str:
def extract_fonts_from_oxml(xml_content: str) -> List[str]:
"""
Extract font names from OXML content.
Args:
xml_content: OXML content as string
Returns:
List of unique font names found in the OXML
"""
fonts = set()
try:
# Parse the XML content
root = ET.fromstring(xml_content)
# Define namespaces commonly used in OXML
namespaces = {
'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
'p': 'http://schemas.openxmlformats.org/presentationml/2006/main',
'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
"p": "http://schemas.openxmlformats.org/presentationml/2006/main",
"r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
}
# Search for font references in various OXML elements
# Look for latin fonts
for font_elem in root.findall('.//a:latin', namespaces):
if 'typeface' in font_elem.attrib:
fonts.add(font_elem.attrib['typeface'])
for font_elem in root.findall(".//a:latin", namespaces):
if "typeface" in font_elem.attrib:
fonts.add(font_elem.attrib["typeface"])
# Look for east asian fonts
for font_elem in root.findall('.//a:ea', namespaces):
if 'typeface' in font_elem.attrib:
fonts.add(font_elem.attrib['typeface'])
for font_elem in root.findall(".//a:ea", namespaces):
if "typeface" in font_elem.attrib:
fonts.add(font_elem.attrib["typeface"])
# Look for complex script fonts
for font_elem in root.findall('.//a:cs', namespaces):
if 'typeface' in font_elem.attrib:
fonts.add(font_elem.attrib['typeface'])
for font_elem in root.findall(".//a:cs", namespaces):
if "typeface" in font_elem.attrib:
fonts.add(font_elem.attrib["typeface"])
# Look for font references in theme elements
for font_elem in root.findall('.//a:font', namespaces):
if 'typeface' in font_elem.attrib:
fonts.add(font_elem.attrib['typeface'])
for font_elem in root.findall(".//a:font", namespaces):
if "typeface" in font_elem.attrib:
fonts.add(font_elem.attrib["typeface"])
# Look for rPr (run properties) font references
for rpr_elem in root.findall('.//a:rPr', namespaces):
for font_elem in rpr_elem.findall('.//a:latin', namespaces):
if 'typeface' in font_elem.attrib:
fonts.add(font_elem.attrib['typeface'])
for rpr_elem in root.findall(".//a:rPr", namespaces):
for font_elem in rpr_elem.findall(".//a:latin", namespaces):
if "typeface" in font_elem.attrib:
fonts.add(font_elem.attrib["typeface"])
# Also search without namespace prefix for compatibility
for font_elem in root.findall('.//latin'):
if 'typeface' in font_elem.attrib:
fonts.add(font_elem.attrib['typeface'])
for font_elem in root.findall(".//latin"):
if "typeface" in font_elem.attrib:
fonts.add(font_elem.attrib["typeface"])
# Regex fallback for fonts that might be missed
font_pattern = r'typeface="([^"]+)"'
regex_fonts = re.findall(font_pattern, xml_content)
fonts.update(regex_fonts)
# Filter out system fonts and empty values
system_fonts = {'+mn-lt', '+mj-lt', '+mn-ea', '+mj-ea', '+mn-cs', '+mj-cs', ''}
system_fonts = {"+mn-lt", "+mj-lt", "+mn-ea", "+mj-ea", "+mn-cs", "+mj-cs", ""}
fonts = {font for font in fonts if font not in system_fonts and font.strip()}
return list(fonts)
except Exception as e:
print(f"Error extracting fonts from OXML: {e}")
return []
@ -182,21 +218,23 @@ def extract_fonts_from_oxml(xml_content: str) -> List[str]:
async def check_google_font_availability(font_name: str) -> bool:
"""
Check if a font is available in Google Fonts.
Args:
font_name: Name of the font to check
Returns:
True if font is available in Google Fonts, False otherwise
"""
try:
formatted_name = font_name.replace(' ', '+')
formatted_name = font_name.replace(" ", "+")
url = f"https://fonts.googleapis.com/css2?family={formatted_name}&display=swap"
async with aiohttp.ClientSession() as session:
async with session.head(url, timeout=aiohttp.ClientTimeout(total=10)) as response:
async with session.head(
url, timeout=aiohttp.ClientTimeout(total=10)
) as response:
return response.status == 200
except Exception as e:
print(f"Error checking Google Font availability for {font_name}: {e}")
return False
@ -205,10 +243,10 @@ async def check_google_font_availability(font_name: str) -> bool:
async def analyze_fonts_in_all_slides(slide_xmls: List[str]) -> FontAnalysisResult:
"""
Analyze fonts across all slides and determine Google Fonts availability.
Args:
slide_xmls: List of OXML content strings from all slides
Returns:
FontAnalysisResult with supported and unsupported fonts
"""
@ -222,45 +260,40 @@ async def analyze_fonts_in_all_slides(slide_xmls: List[str]) -> FontAnalysisResu
normalized_fonts = {normalize_font_family_name(f) for f in raw_fonts}
# Remove empties if any
normalized_fonts = {f for f in normalized_fonts if f}
if not normalized_fonts:
return FontAnalysisResult(
internally_supported_fonts=[],
not_supported_fonts=[]
)
return FontAnalysisResult(internally_supported_fonts=[], not_supported_fonts=[])
# Check each normalized font's availability in Google Fonts concurrently
tasks = [check_google_font_availability(font) for font in normalized_fonts]
results = await asyncio.gather(*tasks)
internally_supported_fonts = []
not_supported_fonts = []
for font, is_available in zip(normalized_fonts, results):
if is_available:
formatted_name = font.replace(' ', '+')
formatted_name = font.replace(" ", "+")
google_fonts_url = f"https://fonts.googleapis.com/css2?family={formatted_name}&display=swap"
internally_supported_fonts.append({
"name": font,
"google_fonts_url": google_fonts_url
})
internally_supported_fonts.append(
{"name": font, "google_fonts_url": google_fonts_url}
)
else:
not_supported_fonts.append(font)
return FontAnalysisResult(
internally_supported_fonts=internally_supported_fonts,
not_supported_fonts=[]
internally_supported_fonts=internally_supported_fonts, not_supported_fonts=[]
)
@PPTX_SLIDES_ROUTER.post("/process", response_model=PptxSlidesResponse)
async def process_pptx_slides(
pptx_file: UploadFile = File(..., description="PPTX file to process"),
fonts: Optional[List[UploadFile]] = File(None, description="Optional font files")
fonts: Optional[List[UploadFile]] = File(None, description="Optional font files"),
):
"""
Process a PPTX file to extract slide screenshots and XML content.
This endpoint:
1. Validates the uploaded PPTX file
2. Installs any provided font files
@ -268,20 +301,24 @@ async def process_pptx_slides(
4. Uses LibreOffice to generate slide screenshots
5. Returns both screenshot URLs and XML content for each slide
"""
# Validate PPTX file
if pptx_file.content_type not in POWERPOINT_TYPES:
raise HTTPException(
status_code=400,
detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}"
detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}",
)
# Enforce 100MB size limit
if hasattr(pptx_file, "size") and pptx_file.size and pptx_file.size > (100 * 1024 * 1024):
if (
hasattr(pptx_file, "size")
and pptx_file.size
and pptx_file.size > (100 * 1024 * 1024)
):
raise HTTPException(
status_code=400,
detail="PPTX file exceeded max upload size of 100 MB",
)
# Create temporary directory for processing
with tempfile.TemporaryDirectory() as temp_dir:
if True:
@ -290,61 +327,82 @@ async def process_pptx_slides(
with open(pptx_path, "wb") as f:
pptx_content = await pptx_file.read()
f.write(pptx_content)
# Install fonts if provided
if fonts:
await _install_fonts(fonts, temp_dir)
# Extract slide XMLs from PPTX
slide_xmls = _extract_slide_xmls(pptx_path, temp_dir)
# Convert PPTX to PDF
pdf_path = await _convert_pptx_to_pdf(pptx_path, temp_dir)
# Generate screenshots using LibreOffice
screenshot_paths = await _generate_screenshots(pptx_path, temp_dir)
screenshot_paths = await DocumentsLoader.get_page_images_from_pdf_async(
pdf_path, temp_dir
)
print(f"Screenshot paths: {screenshot_paths}")
# Analyze fonts across all slides
font_analysis = await analyze_fonts_in_all_slides(slide_xmls)
print(f"Font analysis completed: {len(font_analysis.internally_supported_fonts)} supported, {len(font_analysis.not_supported_fonts)} not supported")
print(
f"Font analysis completed: {len(font_analysis.internally_supported_fonts)} supported, {len(font_analysis.not_supported_fonts)} not supported"
)
# Move screenshots to images directory and generate URLs
images_dir = get_images_directory()
presentation_id = uuid.uuid4()
presentation_images_dir = os.path.join(images_dir, str(presentation_id))
os.makedirs(presentation_images_dir, exist_ok=True)
slides_data = []
for i, (xml_content, screenshot_path) in enumerate(zip(slide_xmls, screenshot_paths), 1):
for i, (xml_content, screenshot_path) in enumerate(
zip(slide_xmls, screenshot_paths), 1
):
# Move screenshot to permanent location
screenshot_filename = f"slide_{i}.png"
permanent_screenshot_path = os.path.join(presentation_images_dir, screenshot_filename)
if os.path.exists(screenshot_path) and os.path.getsize(screenshot_path) > 0:
permanent_screenshot_path = os.path.join(
presentation_images_dir, screenshot_filename
)
if (
os.path.exists(screenshot_path)
and os.path.getsize(screenshot_path) > 0
):
# Use shutil.copy2 instead of os.rename to handle cross-device moves
shutil.copy2(screenshot_path, permanent_screenshot_path)
screenshot_url = f"/app_data/images/{presentation_id}/{screenshot_filename}"
screenshot_url = (
f"/app_data/images/{presentation_id}/{screenshot_filename}"
)
else:
# Fallback if screenshot generation failed or file is empty placeholder
screenshot_url = "/static/images/placeholder.jpg"
# Compute normalized fonts for this slide
raw_slide_fonts = extract_fonts_from_oxml(xml_content)
normalized_fonts = sorted({normalize_font_family_name(f) for f in raw_slide_fonts if f})
slides_data.append(SlideData(
slide_number=i,
screenshot_url=screenshot_url,
xml_content=xml_content,
normalized_fonts=normalized_fonts
))
normalized_fonts = sorted(
{normalize_font_family_name(f) for f in raw_slide_fonts if f}
)
slides_data.append(
SlideData(
slide_number=i,
screenshot_url=screenshot_url,
xml_content=xml_content,
normalized_fonts=normalized_fonts,
)
)
return PptxSlidesResponse(
success=True,
slides=slides_data,
total_slides=len(slides_data),
fonts=font_analysis
fonts=font_analysis,
)
# NEW: Fonts-only endpoint leveraging the same font extraction/analysis
@PPTX_FONTS_ROUTER.post("/process", response_model=PptxFontsResponse)
async def process_pptx_fonts(
@ -359,7 +417,7 @@ async def process_pptx_fonts(
if pptx_file.content_type not in POWERPOINT_TYPES:
raise HTTPException(
status_code=400,
detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}"
detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}",
)
# Create temporary directory for processing
@ -381,6 +439,7 @@ async def process_pptx_fonts(
fonts=font_analysis,
)
def _create_font_alias_config(raw_fonts: List[str]) -> str:
"""Create a temporary fontconfig configuration that aliases variant family names to normalized root families.
Returns the path to the config file.
@ -395,13 +454,16 @@ def _create_font_alias_config(raw_fonts: List[str]) -> str:
fd, fonts_conf_path = tempfile.mkstemp(prefix="fonts_alias_", suffix=".conf")
os.close(fd)
with open(fonts_conf_path, "w", encoding="utf-8") as cfg:
cfg.write("""<?xml version='1.0'?>
cfg.write(
"""<?xml version='1.0'?>
<!DOCTYPE fontconfig SYSTEM "urn:fontconfig:fonts.dtd">
<fontconfig>
<include>/etc/fonts/fonts.conf</include>
""")
"""
)
for src, dst in mappings.items():
cfg.write(f"""
cfg.write(
f"""
<match target="pattern">
<test name="family" compare="eq">
<string>{src}</string>
@ -410,30 +472,34 @@ def _create_font_alias_config(raw_fonts: List[str]) -> str:
<string>{dst}</string>
</edit>
</match>
""")
"""
)
cfg.write("\n</fontconfig>\n")
return fonts_conf_path
async def _install_fonts(fonts: List[UploadFile], temp_dir: str) -> None:
"""Install provided font files to the system."""
fonts_dir = os.path.join(temp_dir, "fonts")
os.makedirs(fonts_dir, exist_ok=True)
for font_file in fonts:
# Save font file
font_path = os.path.join(fonts_dir, font_file.filename)
with open(font_path, "wb") as f:
font_content = await font_file.read()
f.write(font_content)
# Install font (copy to system fonts directory)
try:
subprocess.run([
"cp", font_path, "/usr/share/fonts/truetype/"
], check=True, capture_output=True)
subprocess.run(
["cp", font_path, "/usr/share/fonts/truetype/"],
check=True,
capture_output=True,
)
except subprocess.CalledProcessError as e:
print(f"Warning: Failed to install font {font_file.filename}: {e}")
# Refresh font cache
try:
subprocess.run(["fc-cache", "-f", "-v"], check=True, capture_output=True)
@ -445,44 +511,48 @@ def _extract_slide_xmls(pptx_path: str, temp_dir: str) -> List[str]:
"""Extract slide XML content from PPTX file."""
slide_xmls = []
extract_dir = os.path.join(temp_dir, "pptx_extract")
try:
# Unzip PPTX file
with zipfile.ZipFile(pptx_path, 'r') as zip_ref:
with zipfile.ZipFile(pptx_path, "r") as zip_ref:
zip_ref.extractall(extract_dir)
# Look for slides in ppt/slides/ directory
slides_dir = os.path.join(extract_dir, "ppt", "slides")
if not os.path.exists(slides_dir):
raise Exception("No slides directory found in PPTX file")
# Get all slide XML files and sort them numerically
slide_files = [f for f in os.listdir(slides_dir) if f.startswith("slide") and f.endswith(".xml")]
slide_files = [
f
for f in os.listdir(slides_dir)
if f.startswith("slide") and f.endswith(".xml")
]
slide_files.sort(key=lambda x: int(x.replace("slide", "").replace(".xml", "")))
# Read XML content from each slide
for slide_file in slide_files:
slide_path = os.path.join(slides_dir, slide_file)
with open(slide_path, 'r', encoding='utf-8') as f:
with open(slide_path, "r", encoding="utf-8") as f:
slide_xmls.append(f.read())
return slide_xmls
except Exception as e:
raise Exception(f"Failed to extract slide XMLs: {str(e)}")
async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]:
async def _convert_pptx_to_pdf(pptx_path: str, temp_dir: str) -> str:
"""Generate PNG screenshots of PPTX slides using LibreOffice + ImageMagick."""
screenshots_dir = os.path.join(temp_dir, "screenshots")
os.makedirs(screenshots_dir, exist_ok=True)
try:
# First, get the number of slides by extracting XMLs
slide_xmls = _extract_slide_xmls(pptx_path, temp_dir)
slide_count = len(slide_xmls)
# Build font alias config to force variant families to resolve to normalized root families
raw_fonts: List[str] = []
for xml in slide_xmls:
@ -491,23 +561,32 @@ async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]:
fonts_conf_path = _create_font_alias_config(raw_fonts)
env = os.environ.copy()
env["FONTCONFIG_FILE"] = fonts_conf_path
print(f"Found {slide_count} slides in presentation")
# Step 1: Convert PPTX to PDF using LibreOffice
print("Starting LibreOffice PDF conversion...")
pdf_filename = "temp_presentation.pdf"
pdf_path = os.path.join(screenshots_dir, pdf_filename)
try:
result = subprocess.run([
"libreoffice",
"--headless",
"--convert-to", "pdf",
"--outdir", screenshots_dir,
pptx_path
], check=True, capture_output=True, text=True, timeout=500, env=env)
result = subprocess.run(
[
"libreoffice",
"--headless",
"--convert-to",
"pdf",
"--outdir",
screenshots_dir,
pptx_path,
],
check=True,
capture_output=True,
text=True,
timeout=500,
env=env,
)
print(f"LibreOffice PDF conversion output: {result.stdout}")
if result.stderr:
print(f"LibreOffice PDF conversion warnings: {result.stderr}")
@ -516,74 +595,19 @@ async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]:
except subprocess.CalledProcessError as e:
error_msg = e.stderr if e.stderr else str(e)
raise Exception(f"LibreOffice PDF conversion failed: {error_msg}")
# Find the generated PDF file (LibreOffice uses original filename)
pdf_files = [f for f in os.listdir(screenshots_dir) if f.endswith('.pdf')]
pdf_files = [f for f in os.listdir(screenshots_dir) if f.endswith(".pdf")]
if not pdf_files:
raise Exception("LibreOffice failed to generate PDF file")
actual_pdf_path = os.path.join(screenshots_dir, pdf_files[0])
print(f"Generated PDF: {actual_pdf_path}")
# Step 2: Convert PDF to individual PNG images using ImageMagick
print("Starting ImageMagick PNG conversion...")
try:
result = subprocess.run([
"convert",
"-density", "150",
actual_pdf_path,
os.path.join(screenshots_dir, "slide_%03d.png")
], check=True, capture_output=True, text=True, timeout=500, env=env)
print(f"ImageMagick conversion output: {result.stdout}")
if result.stderr:
print(f"ImageMagick conversion warnings: {result.stderr}")
except subprocess.TimeoutExpired:
raise Exception("ImageMagick PNG conversion timed out after 120 seconds")
except subprocess.CalledProcessError as e:
error_msg = e.stderr if e.stderr else str(e)
raise Exception(f"ImageMagick PNG conversion failed: {error_msg}")
# Find generated PNG files (ImageMagick creates slide_000.png, slide_001.png, etc.)
print("Checking for generated PNG files...")
png_files = sorted([f for f in os.listdir(screenshots_dir) if f.startswith("slide_") and f.endswith('.png')])
print(f"Generated PNG files: {png_files}")
if not png_files:
raise Exception("ImageMagick failed to generate any PNG files")
# Rename files from slide_000.png format to slide_1.png format expected by the API
print("Renaming PNG files to expected format...")
screenshot_paths = []
for i in range(slide_count):
# ImageMagick generates slide_000.png, slide_001.png, etc.
source_file = f"slide_{i:03d}.png"
source_path = os.path.join(screenshots_dir, source_file)
# We need slide_1.png, slide_2.png, etc.
target_file = f"slide_{i+1}.png"
target_path = os.path.join(screenshots_dir, target_file)
if os.path.exists(source_path):
# Rename to expected format
shutil.move(source_path, target_path)
screenshot_paths.append(target_path)
print(f"✓ Renamed {source_file} to {target_file}")
else:
print(f"⚠ Warning: Expected file {source_file} not found, creating placeholder")
# Create empty placeholder
with open(target_path, 'w') as f:
f.write("")
screenshot_paths.append(target_path)
print(f"Successfully generated {len(screenshot_paths)} slide screenshots")
return screenshot_paths
return actual_pdf_path
except Exception as e:
# Re-raise the specific exceptions we've already handled
if "timed out" in str(e) or "failed:" in str(e):
raise
# Handle any other unexpected exceptions
raise Exception(f"Screenshot generation failed: {str(e)}")

View file

@ -95,13 +95,15 @@ class DocumentsLoader:
def load_powerpoint(self, file_path: str) -> str:
return self.docling_service.parse_to_markdown(file_path)
def get_page_images_from_pdf(self, file_path: str, temp_dir: str):
@classmethod
def get_page_images_from_pdf(cls, file_path: str, temp_dir: str):
with pdfplumber.open(file_path) as pdf:
for page in pdf.pages:
img = page.to_image(resolution=300)
img = page.to_image(resolution=150)
img.save(os.path.join(temp_dir, f"page_{page.page_number}.png"))
async def get_page_images_from_pdf_async(self, file_path: str, temp_dir: str):
@classmethod
async def get_page_images_from_pdf_async(cls, file_path: str, temp_dir: str):
return await asyncio.to_thread(
self.get_page_images_from_pdf, file_path, temp_dir
cls.get_page_images_from_pdf, file_path, temp_dir
)

View file

@ -182,5 +182,5 @@ def process_slide_add_placeholder_assets(slide: SlideModel):
for icon_path in icon_paths:
icon_dict = get_dict_at_path(slide.content, icon_path)
icon_dict["__icon_url__"] = "/static/icons/placeholder.png"
icon_dict["__icon_url__"] = "/static/icons/placeholder.svg"
set_dict_at_path(slide.content, icon_path, icon_dict)