Merge branch 'development' of github.com:presenton/presenton into development
This commit is contained in:
commit
79cf37f168
9 changed files with 271 additions and 316 deletions
|
|
@ -6,10 +6,7 @@ RUN apt-get update && apt-get install -y \
|
|||
curl \
|
||||
libreoffice \
|
||||
fontconfig \
|
||||
chromium \
|
||||
imagemagick
|
||||
|
||||
RUN sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /etc/ImageMagick-6/policy.xml
|
||||
chromium
|
||||
|
||||
|
||||
# Install Node.js 20 using NodeSource repository
|
||||
|
|
|
|||
|
|
@ -6,11 +6,7 @@ RUN apt-get update && apt-get install -y \
|
|||
curl \
|
||||
libreoffice \
|
||||
fontconfig \
|
||||
chromium \
|
||||
imagemagick
|
||||
|
||||
RUN sed -i 's/rights="none" pattern="PDF"/rights="read|write" pattern="PDF"/' /etc/ImageMagick-6/policy.xml
|
||||
|
||||
chromium
|
||||
|
||||
# Install Node.js 20 using NodeSource repository
|
||||
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
||||
|
|
|
|||
|
|
@ -54,11 +54,10 @@ Presenton gives you complete control over your AI presentation workflow. Choose
|
|||
* ✅ **Docker Ready** — One-command deployment with GPU support for local models
|
||||
|
||||
## Presenton Cloud
|
||||
We're launching Presenton Cloud which will make it very easy to create presentations through UI, API and MCP. Join our [waitlist](https://presenton.ai) for early beta.
|
||||
|
||||
## Deploy on Cloud (one click deployment)
|
||||
[](https://railway.com/deploy/presenton-ai-presentations?referralCode=ubp0kk)
|
||||
|
||||
<a href="https://presenton.ai" target="_blank" align="center">
|
||||
|
||||
<img src="readme_assets/cloud-banner.png" height="350" alt="Presenton Logo" />
|
||||
</a>
|
||||
|
||||
## Running Presenton Docker
|
||||
|
||||
|
|
|
|||
BIN
readme_assets/cloud-banner.png
Normal file
BIN
readme_assets/cloud-banner.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 437 KiB |
|
|
@ -64,7 +64,8 @@ async def stream_outlines(
|
|||
presentation.tone,
|
||||
presentation.verbosity,
|
||||
presentation.instructions,
|
||||
True,
|
||||
presentation.include_title_slide,
|
||||
presentation.web_search,
|
||||
):
|
||||
# Give control to the event loop
|
||||
await asyncio.sleep(0)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from typing import List, Optional
|
|||
from fastapi import APIRouter, UploadFile, File, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.documents_loader import DocumentsLoader
|
||||
from utils.asset_directory_utils import get_images_directory
|
||||
import uuid
|
||||
from constants.documents import PDF_MIME_TYPES
|
||||
|
|
@ -31,28 +32,32 @@ async def process_pdf_slides(
|
|||
):
|
||||
"""
|
||||
Process a PDF file to extract slide screenshots.
|
||||
|
||||
|
||||
This endpoint:
|
||||
1. Validates the uploaded PDF file
|
||||
2. Uses ImageMagick to convert PDF pages to PNG images
|
||||
3. Returns screenshot URLs for each slide/page
|
||||
|
||||
|
||||
Note: Font installation is not needed since PDFs already have fonts embedded.
|
||||
"""
|
||||
|
||||
|
||||
# Validate PDF file
|
||||
if pdf_file.content_type not in PDF_MIME_TYPES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid file type. Expected PDF file, got {pdf_file.content_type}"
|
||||
detail=f"Invalid file type. Expected PDF file, got {pdf_file.content_type}",
|
||||
)
|
||||
# Enforce 100MB size limit
|
||||
if hasattr(pdf_file, "size") and pdf_file.size and pdf_file.size > (100 * 1024 * 1024):
|
||||
if (
|
||||
hasattr(pdf_file, "size")
|
||||
and pdf_file.size
|
||||
and pdf_file.size > (100 * 1024 * 1024)
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="PDF file exceeded max upload size of 100 MB",
|
||||
)
|
||||
|
||||
|
||||
# Create temporary directory for processing
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
try:
|
||||
|
|
@ -61,120 +66,51 @@ async def process_pdf_slides(
|
|||
with open(pdf_path, "wb") as f:
|
||||
pdf_content = await pdf_file.read()
|
||||
f.write(pdf_content)
|
||||
|
||||
|
||||
# Generate screenshots from PDF using ImageMagick
|
||||
screenshot_paths = await _generate_pdf_screenshots(pdf_path, temp_dir)
|
||||
screenshot_paths = await DocumentsLoader.get_page_images_from_pdf_async(
|
||||
pdf_path, temp_dir
|
||||
)
|
||||
print(f"Generated {len(screenshot_paths)} PDF screenshots")
|
||||
|
||||
|
||||
# Move screenshots to images directory and generate URLs
|
||||
images_dir = get_images_directory()
|
||||
presentation_id = uuid.uuid4()
|
||||
presentation_images_dir = os.path.join(images_dir, str(presentation_id))
|
||||
os.makedirs(presentation_images_dir, exist_ok=True)
|
||||
|
||||
|
||||
slides_data = []
|
||||
|
||||
|
||||
for i, screenshot_path in enumerate(screenshot_paths, 1):
|
||||
# Move screenshot to permanent location
|
||||
screenshot_filename = f"slide_{i}.png"
|
||||
permanent_screenshot_path = os.path.join(presentation_images_dir, screenshot_filename)
|
||||
|
||||
if os.path.exists(screenshot_path) and os.path.getsize(screenshot_path) > 0:
|
||||
permanent_screenshot_path = os.path.join(
|
||||
presentation_images_dir, screenshot_filename
|
||||
)
|
||||
|
||||
if (
|
||||
os.path.exists(screenshot_path)
|
||||
and os.path.getsize(screenshot_path) > 0
|
||||
):
|
||||
# Use shutil.copy2 instead of os.rename to handle cross-device moves
|
||||
shutil.copy2(screenshot_path, permanent_screenshot_path)
|
||||
screenshot_url = f"/app_data/images/{presentation_id}/{screenshot_filename}"
|
||||
screenshot_url = (
|
||||
f"/app_data/images/{presentation_id}/{screenshot_filename}"
|
||||
)
|
||||
else:
|
||||
# Fallback if screenshot generation failed or file is empty placeholder
|
||||
screenshot_url = "/static/images/placeholder.jpg"
|
||||
|
||||
slides_data.append(PdfSlideData(
|
||||
slide_number=i,
|
||||
screenshot_url=screenshot_url
|
||||
))
|
||||
|
||||
|
||||
slides_data.append(
|
||||
PdfSlideData(slide_number=i, screenshot_url=screenshot_url)
|
||||
)
|
||||
|
||||
return PdfSlidesResponse(
|
||||
success=True,
|
||||
slides=slides_data,
|
||||
total_slides=len(slides_data)
|
||||
success=True, slides=slides_data, total_slides=len(slides_data)
|
||||
)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing PDF slides: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to process PDF: {str(e)}"
|
||||
status_code=500, detail=f"Failed to process PDF: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
async def _generate_pdf_screenshots(pdf_path: str, temp_dir: str) -> List[str]:
|
||||
"""Generate PNG screenshots of PDF pages using ImageMagick (same approach as PPTX endpoint)."""
|
||||
screenshots_dir = os.path.join(temp_dir, "screenshots")
|
||||
os.makedirs(screenshots_dir, exist_ok=True)
|
||||
|
||||
try:
|
||||
# Convert PDF to individual PNG images using ImageMagick
|
||||
print("Starting ImageMagick PNG conversion...")
|
||||
try:
|
||||
result = subprocess.run([
|
||||
"convert",
|
||||
"-density", "150", # Same DPI as PPTX endpoint
|
||||
pdf_path,
|
||||
os.path.join(screenshots_dir, "slide_%03d.png")
|
||||
], check=True, capture_output=True, text=True, timeout=500)
|
||||
|
||||
print(f"ImageMagick conversion output: {result.stdout}")
|
||||
if result.stderr:
|
||||
print(f"ImageMagick conversion warnings: {result.stderr}")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
raise Exception("ImageMagick PNG conversion timed out after 500 seconds")
|
||||
except subprocess.CalledProcessError as e:
|
||||
error_msg = e.stderr if e.stderr else str(e)
|
||||
raise Exception(f"ImageMagick PNG conversion failed: {error_msg}")
|
||||
|
||||
# Find generated PNG files (ImageMagick creates slide_000.png, slide_001.png, etc.)
|
||||
print("Checking for generated PNG files...")
|
||||
png_files = sorted([f for f in os.listdir(screenshots_dir) if f.startswith("slide_") and f.endswith('.png')])
|
||||
print(f"Generated PNG files: {png_files}")
|
||||
|
||||
if not png_files:
|
||||
raise Exception("ImageMagick failed to generate any PNG files")
|
||||
|
||||
# Determine page count from generated files
|
||||
page_count = len(png_files)
|
||||
print(f"Determined {page_count} pages from ImageMagick output")
|
||||
|
||||
# Rename files from slide_000.png format to slide_1.png format expected by the API
|
||||
# (Same renaming logic as PPTX endpoint)
|
||||
print("Renaming PNG files to expected format...")
|
||||
screenshot_paths = []
|
||||
for i in range(page_count):
|
||||
# ImageMagick generates slide_000.png, slide_001.png, etc.
|
||||
source_file = f"slide_{i:03d}.png"
|
||||
source_path = os.path.join(screenshots_dir, source_file)
|
||||
|
||||
# We need slide_1.png, slide_2.png, etc.
|
||||
target_file = f"slide_{i+1}.png"
|
||||
target_path = os.path.join(screenshots_dir, target_file)
|
||||
|
||||
if os.path.exists(source_path):
|
||||
# Rename to expected format
|
||||
shutil.move(source_path, target_path)
|
||||
screenshot_paths.append(target_path)
|
||||
print(f"✓ Renamed {source_file} to {target_file}")
|
||||
else:
|
||||
print(f"⚠ Warning: Expected file {source_file} not found, creating placeholder")
|
||||
# Create empty placeholder
|
||||
with open(target_path, 'w') as f:
|
||||
f.write("")
|
||||
screenshot_paths.append(target_path)
|
||||
|
||||
print(f"Successfully generated {len(screenshot_paths)} PDF page screenshots")
|
||||
return screenshot_paths
|
||||
|
||||
except Exception as e:
|
||||
# Re-raise the specific exceptions we've already handled
|
||||
if "timed out" in str(e) or "failed:" in str(e):
|
||||
raise
|
||||
# Handle any other unexpected exceptions
|
||||
raise Exception(f"PDF screenshot generation failed: {str(e)}")
|
||||
|
|
@ -12,6 +12,7 @@ import asyncio
|
|||
import xml.etree.ElementTree as ET
|
||||
import re
|
||||
|
||||
from services.documents_loader import DocumentsLoader
|
||||
from utils.asset_directory_utils import get_images_directory
|
||||
import uuid
|
||||
from constants.documents import POWERPOINT_TYPES
|
||||
|
|
@ -28,7 +29,9 @@ class SlideData(BaseModel):
|
|||
|
||||
|
||||
class FontAnalysisResult(BaseModel):
|
||||
internally_supported_fonts: List[Dict[str, str]] # [{"name": "Open Sans", "google_fonts_url": "..."}]
|
||||
internally_supported_fonts: List[
|
||||
Dict[str, str]
|
||||
] # [{"name": "Open Sans", "google_fonts_url": "..."}]
|
||||
not_supported_fonts: List[str] # ["Custom Font Name"]
|
||||
|
||||
|
||||
|
|
@ -38,30 +41,62 @@ class PptxSlidesResponse(BaseModel):
|
|||
total_slides: int
|
||||
fonts: Optional[FontAnalysisResult] = None
|
||||
|
||||
|
||||
# NEW: Fonts-only router and response for PPTX
|
||||
class PptxFontsResponse(BaseModel):
|
||||
success: bool
|
||||
fonts: FontAnalysisResult
|
||||
|
||||
|
||||
PPTX_FONTS_ROUTER = APIRouter(prefix="/pptx-fonts", tags=["PPTX Fonts"])
|
||||
|
||||
# NEW: Normalize font family names by removing style/weight/stretch descriptors and splitting camel case
|
||||
_STYLE_TOKENS = {
|
||||
# styles
|
||||
"italic", "italics", "ital", "oblique", "roman",
|
||||
"italic",
|
||||
"italics",
|
||||
"ital",
|
||||
"oblique",
|
||||
"roman",
|
||||
# combined style shortcuts
|
||||
"bolditalic", "bolditalics",
|
||||
"bolditalic",
|
||||
"bolditalics",
|
||||
# weights
|
||||
"thin", "hairline", "extralight", "ultralight", "light", "demilight", "semilight", "book",
|
||||
"regular", "normal", "medium", "semibold", "demibold", "bold", "extrabold", "ultrabold",
|
||||
"black", "extrablack", "ultrablack", "heavy",
|
||||
"thin",
|
||||
"hairline",
|
||||
"extralight",
|
||||
"ultralight",
|
||||
"light",
|
||||
"demilight",
|
||||
"semilight",
|
||||
"book",
|
||||
"regular",
|
||||
"normal",
|
||||
"medium",
|
||||
"semibold",
|
||||
"demibold",
|
||||
"bold",
|
||||
"extrabold",
|
||||
"ultrabold",
|
||||
"black",
|
||||
"extrablack",
|
||||
"ultrablack",
|
||||
"heavy",
|
||||
# width/stretch
|
||||
"narrow", "condensed", "semicondensed", "extracondensed", "ultracondensed",
|
||||
"expanded", "semiexpanded", "extraexpanded", "ultraexpanded",
|
||||
"narrow",
|
||||
"condensed",
|
||||
"semicondensed",
|
||||
"extracondensed",
|
||||
"ultracondensed",
|
||||
"expanded",
|
||||
"semiexpanded",
|
||||
"extraexpanded",
|
||||
"ultraexpanded",
|
||||
}
|
||||
# Modifiers commonly used with style tokens
|
||||
_STYLE_MODIFIERS = {"semi", "demi", "extra", "ultra"}
|
||||
|
||||
|
||||
def _insert_spaces_in_camel_case(value: str) -> str:
|
||||
# Insert space before capital letters preceded by lowercase or digits (e.g., MontserratBold -> Montserrat Bold)
|
||||
value = re.sub(r"(?<=[a-z0-9])([A-Z])", r" \1", value)
|
||||
|
|
@ -69,6 +104,7 @@ def _insert_spaces_in_camel_case(value: str) -> str:
|
|||
value = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", value)
|
||||
return value
|
||||
|
||||
|
||||
def normalize_font_family_name(raw_name: str) -> str:
|
||||
if not raw_name:
|
||||
return raw_name
|
||||
|
|
@ -111,69 +147,69 @@ def normalize_font_family_name(raw_name: str) -> str:
|
|||
def extract_fonts_from_oxml(xml_content: str) -> List[str]:
|
||||
"""
|
||||
Extract font names from OXML content.
|
||||
|
||||
|
||||
Args:
|
||||
xml_content: OXML content as string
|
||||
|
||||
|
||||
Returns:
|
||||
List of unique font names found in the OXML
|
||||
"""
|
||||
fonts = set()
|
||||
|
||||
|
||||
try:
|
||||
# Parse the XML content
|
||||
root = ET.fromstring(xml_content)
|
||||
|
||||
|
||||
# Define namespaces commonly used in OXML
|
||||
namespaces = {
|
||||
'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
|
||||
'p': 'http://schemas.openxmlformats.org/presentationml/2006/main',
|
||||
'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'
|
||||
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
|
||||
"p": "http://schemas.openxmlformats.org/presentationml/2006/main",
|
||||
"r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
|
||||
}
|
||||
|
||||
|
||||
# Search for font references in various OXML elements
|
||||
# Look for latin fonts
|
||||
for font_elem in root.findall('.//a:latin', namespaces):
|
||||
if 'typeface' in font_elem.attrib:
|
||||
fonts.add(font_elem.attrib['typeface'])
|
||||
|
||||
for font_elem in root.findall(".//a:latin", namespaces):
|
||||
if "typeface" in font_elem.attrib:
|
||||
fonts.add(font_elem.attrib["typeface"])
|
||||
|
||||
# Look for east asian fonts
|
||||
for font_elem in root.findall('.//a:ea', namespaces):
|
||||
if 'typeface' in font_elem.attrib:
|
||||
fonts.add(font_elem.attrib['typeface'])
|
||||
|
||||
for font_elem in root.findall(".//a:ea", namespaces):
|
||||
if "typeface" in font_elem.attrib:
|
||||
fonts.add(font_elem.attrib["typeface"])
|
||||
|
||||
# Look for complex script fonts
|
||||
for font_elem in root.findall('.//a:cs', namespaces):
|
||||
if 'typeface' in font_elem.attrib:
|
||||
fonts.add(font_elem.attrib['typeface'])
|
||||
|
||||
for font_elem in root.findall(".//a:cs", namespaces):
|
||||
if "typeface" in font_elem.attrib:
|
||||
fonts.add(font_elem.attrib["typeface"])
|
||||
|
||||
# Look for font references in theme elements
|
||||
for font_elem in root.findall('.//a:font', namespaces):
|
||||
if 'typeface' in font_elem.attrib:
|
||||
fonts.add(font_elem.attrib['typeface'])
|
||||
|
||||
for font_elem in root.findall(".//a:font", namespaces):
|
||||
if "typeface" in font_elem.attrib:
|
||||
fonts.add(font_elem.attrib["typeface"])
|
||||
|
||||
# Look for rPr (run properties) font references
|
||||
for rpr_elem in root.findall('.//a:rPr', namespaces):
|
||||
for font_elem in rpr_elem.findall('.//a:latin', namespaces):
|
||||
if 'typeface' in font_elem.attrib:
|
||||
fonts.add(font_elem.attrib['typeface'])
|
||||
|
||||
for rpr_elem in root.findall(".//a:rPr", namespaces):
|
||||
for font_elem in rpr_elem.findall(".//a:latin", namespaces):
|
||||
if "typeface" in font_elem.attrib:
|
||||
fonts.add(font_elem.attrib["typeface"])
|
||||
|
||||
# Also search without namespace prefix for compatibility
|
||||
for font_elem in root.findall('.//latin'):
|
||||
if 'typeface' in font_elem.attrib:
|
||||
fonts.add(font_elem.attrib['typeface'])
|
||||
|
||||
for font_elem in root.findall(".//latin"):
|
||||
if "typeface" in font_elem.attrib:
|
||||
fonts.add(font_elem.attrib["typeface"])
|
||||
|
||||
# Regex fallback for fonts that might be missed
|
||||
font_pattern = r'typeface="([^"]+)"'
|
||||
regex_fonts = re.findall(font_pattern, xml_content)
|
||||
fonts.update(regex_fonts)
|
||||
|
||||
|
||||
# Filter out system fonts and empty values
|
||||
system_fonts = {'+mn-lt', '+mj-lt', '+mn-ea', '+mj-ea', '+mn-cs', '+mj-cs', ''}
|
||||
system_fonts = {"+mn-lt", "+mj-lt", "+mn-ea", "+mj-ea", "+mn-cs", "+mj-cs", ""}
|
||||
fonts = {font for font in fonts if font not in system_fonts and font.strip()}
|
||||
|
||||
|
||||
return list(fonts)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error extracting fonts from OXML: {e}")
|
||||
return []
|
||||
|
|
@ -182,21 +218,23 @@ def extract_fonts_from_oxml(xml_content: str) -> List[str]:
|
|||
async def check_google_font_availability(font_name: str) -> bool:
|
||||
"""
|
||||
Check if a font is available in Google Fonts.
|
||||
|
||||
|
||||
Args:
|
||||
font_name: Name of the font to check
|
||||
|
||||
|
||||
Returns:
|
||||
True if font is available in Google Fonts, False otherwise
|
||||
"""
|
||||
try:
|
||||
formatted_name = font_name.replace(' ', '+')
|
||||
formatted_name = font_name.replace(" ", "+")
|
||||
url = f"https://fonts.googleapis.com/css2?family={formatted_name}&display=swap"
|
||||
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.head(url, timeout=aiohttp.ClientTimeout(total=10)) as response:
|
||||
async with session.head(
|
||||
url, timeout=aiohttp.ClientTimeout(total=10)
|
||||
) as response:
|
||||
return response.status == 200
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error checking Google Font availability for {font_name}: {e}")
|
||||
return False
|
||||
|
|
@ -205,10 +243,10 @@ async def check_google_font_availability(font_name: str) -> bool:
|
|||
async def analyze_fonts_in_all_slides(slide_xmls: List[str]) -> FontAnalysisResult:
|
||||
"""
|
||||
Analyze fonts across all slides and determine Google Fonts availability.
|
||||
|
||||
|
||||
Args:
|
||||
slide_xmls: List of OXML content strings from all slides
|
||||
|
||||
|
||||
Returns:
|
||||
FontAnalysisResult with supported and unsupported fonts
|
||||
"""
|
||||
|
|
@ -222,45 +260,40 @@ async def analyze_fonts_in_all_slides(slide_xmls: List[str]) -> FontAnalysisResu
|
|||
normalized_fonts = {normalize_font_family_name(f) for f in raw_fonts}
|
||||
# Remove empties if any
|
||||
normalized_fonts = {f for f in normalized_fonts if f}
|
||||
|
||||
|
||||
if not normalized_fonts:
|
||||
return FontAnalysisResult(
|
||||
internally_supported_fonts=[],
|
||||
not_supported_fonts=[]
|
||||
)
|
||||
|
||||
return FontAnalysisResult(internally_supported_fonts=[], not_supported_fonts=[])
|
||||
|
||||
# Check each normalized font's availability in Google Fonts concurrently
|
||||
tasks = [check_google_font_availability(font) for font in normalized_fonts]
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
||||
|
||||
internally_supported_fonts = []
|
||||
not_supported_fonts = []
|
||||
|
||||
|
||||
for font, is_available in zip(normalized_fonts, results):
|
||||
if is_available:
|
||||
formatted_name = font.replace(' ', '+')
|
||||
formatted_name = font.replace(" ", "+")
|
||||
google_fonts_url = f"https://fonts.googleapis.com/css2?family={formatted_name}&display=swap"
|
||||
internally_supported_fonts.append({
|
||||
"name": font,
|
||||
"google_fonts_url": google_fonts_url
|
||||
})
|
||||
internally_supported_fonts.append(
|
||||
{"name": font, "google_fonts_url": google_fonts_url}
|
||||
)
|
||||
else:
|
||||
not_supported_fonts.append(font)
|
||||
|
||||
|
||||
return FontAnalysisResult(
|
||||
internally_supported_fonts=internally_supported_fonts,
|
||||
not_supported_fonts=[]
|
||||
internally_supported_fonts=internally_supported_fonts, not_supported_fonts=[]
|
||||
)
|
||||
|
||||
|
||||
@PPTX_SLIDES_ROUTER.post("/process", response_model=PptxSlidesResponse)
|
||||
async def process_pptx_slides(
|
||||
pptx_file: UploadFile = File(..., description="PPTX file to process"),
|
||||
fonts: Optional[List[UploadFile]] = File(None, description="Optional font files")
|
||||
fonts: Optional[List[UploadFile]] = File(None, description="Optional font files"),
|
||||
):
|
||||
"""
|
||||
Process a PPTX file to extract slide screenshots and XML content.
|
||||
|
||||
|
||||
This endpoint:
|
||||
1. Validates the uploaded PPTX file
|
||||
2. Installs any provided font files
|
||||
|
|
@ -268,20 +301,24 @@ async def process_pptx_slides(
|
|||
4. Uses LibreOffice to generate slide screenshots
|
||||
5. Returns both screenshot URLs and XML content for each slide
|
||||
"""
|
||||
|
||||
|
||||
# Validate PPTX file
|
||||
if pptx_file.content_type not in POWERPOINT_TYPES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}"
|
||||
detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}",
|
||||
)
|
||||
# Enforce 100MB size limit
|
||||
if hasattr(pptx_file, "size") and pptx_file.size and pptx_file.size > (100 * 1024 * 1024):
|
||||
if (
|
||||
hasattr(pptx_file, "size")
|
||||
and pptx_file.size
|
||||
and pptx_file.size > (100 * 1024 * 1024)
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="PPTX file exceeded max upload size of 100 MB",
|
||||
)
|
||||
|
||||
|
||||
# Create temporary directory for processing
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
if True:
|
||||
|
|
@ -290,61 +327,82 @@ async def process_pptx_slides(
|
|||
with open(pptx_path, "wb") as f:
|
||||
pptx_content = await pptx_file.read()
|
||||
f.write(pptx_content)
|
||||
|
||||
|
||||
# Install fonts if provided
|
||||
if fonts:
|
||||
await _install_fonts(fonts, temp_dir)
|
||||
|
||||
|
||||
# Extract slide XMLs from PPTX
|
||||
slide_xmls = _extract_slide_xmls(pptx_path, temp_dir)
|
||||
|
||||
|
||||
# Convert PPTX to PDF
|
||||
pdf_path = await _convert_pptx_to_pdf(pptx_path, temp_dir)
|
||||
|
||||
# Generate screenshots using LibreOffice
|
||||
screenshot_paths = await _generate_screenshots(pptx_path, temp_dir)
|
||||
screenshot_paths = await DocumentsLoader.get_page_images_from_pdf_async(
|
||||
pdf_path, temp_dir
|
||||
)
|
||||
print(f"Screenshot paths: {screenshot_paths}")
|
||||
|
||||
|
||||
# Analyze fonts across all slides
|
||||
font_analysis = await analyze_fonts_in_all_slides(slide_xmls)
|
||||
print(f"Font analysis completed: {len(font_analysis.internally_supported_fonts)} supported, {len(font_analysis.not_supported_fonts)} not supported")
|
||||
|
||||
print(
|
||||
f"Font analysis completed: {len(font_analysis.internally_supported_fonts)} supported, {len(font_analysis.not_supported_fonts)} not supported"
|
||||
)
|
||||
|
||||
# Move screenshots to images directory and generate URLs
|
||||
images_dir = get_images_directory()
|
||||
presentation_id = uuid.uuid4()
|
||||
presentation_images_dir = os.path.join(images_dir, str(presentation_id))
|
||||
os.makedirs(presentation_images_dir, exist_ok=True)
|
||||
|
||||
|
||||
slides_data = []
|
||||
|
||||
for i, (xml_content, screenshot_path) in enumerate(zip(slide_xmls, screenshot_paths), 1):
|
||||
|
||||
for i, (xml_content, screenshot_path) in enumerate(
|
||||
zip(slide_xmls, screenshot_paths), 1
|
||||
):
|
||||
# Move screenshot to permanent location
|
||||
screenshot_filename = f"slide_{i}.png"
|
||||
permanent_screenshot_path = os.path.join(presentation_images_dir, screenshot_filename)
|
||||
|
||||
if os.path.exists(screenshot_path) and os.path.getsize(screenshot_path) > 0:
|
||||
permanent_screenshot_path = os.path.join(
|
||||
presentation_images_dir, screenshot_filename
|
||||
)
|
||||
|
||||
if (
|
||||
os.path.exists(screenshot_path)
|
||||
and os.path.getsize(screenshot_path) > 0
|
||||
):
|
||||
# Use shutil.copy2 instead of os.rename to handle cross-device moves
|
||||
shutil.copy2(screenshot_path, permanent_screenshot_path)
|
||||
screenshot_url = f"/app_data/images/{presentation_id}/{screenshot_filename}"
|
||||
screenshot_url = (
|
||||
f"/app_data/images/{presentation_id}/{screenshot_filename}"
|
||||
)
|
||||
else:
|
||||
# Fallback if screenshot generation failed or file is empty placeholder
|
||||
screenshot_url = "/static/images/placeholder.jpg"
|
||||
|
||||
|
||||
# Compute normalized fonts for this slide
|
||||
raw_slide_fonts = extract_fonts_from_oxml(xml_content)
|
||||
normalized_fonts = sorted({normalize_font_family_name(f) for f in raw_slide_fonts if f})
|
||||
|
||||
slides_data.append(SlideData(
|
||||
slide_number=i,
|
||||
screenshot_url=screenshot_url,
|
||||
xml_content=xml_content,
|
||||
normalized_fonts=normalized_fonts
|
||||
))
|
||||
|
||||
normalized_fonts = sorted(
|
||||
{normalize_font_family_name(f) for f in raw_slide_fonts if f}
|
||||
)
|
||||
|
||||
slides_data.append(
|
||||
SlideData(
|
||||
slide_number=i,
|
||||
screenshot_url=screenshot_url,
|
||||
xml_content=xml_content,
|
||||
normalized_fonts=normalized_fonts,
|
||||
)
|
||||
)
|
||||
|
||||
return PptxSlidesResponse(
|
||||
success=True,
|
||||
slides=slides_data,
|
||||
total_slides=len(slides_data),
|
||||
fonts=font_analysis
|
||||
fonts=font_analysis,
|
||||
)
|
||||
|
||||
|
||||
# NEW: Fonts-only endpoint leveraging the same font extraction/analysis
|
||||
@PPTX_FONTS_ROUTER.post("/process", response_model=PptxFontsResponse)
|
||||
async def process_pptx_fonts(
|
||||
|
|
@ -359,7 +417,7 @@ async def process_pptx_fonts(
|
|||
if pptx_file.content_type not in POWERPOINT_TYPES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}"
|
||||
detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}",
|
||||
)
|
||||
|
||||
# Create temporary directory for processing
|
||||
|
|
@ -381,6 +439,7 @@ async def process_pptx_fonts(
|
|||
fonts=font_analysis,
|
||||
)
|
||||
|
||||
|
||||
def _create_font_alias_config(raw_fonts: List[str]) -> str:
|
||||
"""Create a temporary fontconfig configuration that aliases variant family names to normalized root families.
|
||||
Returns the path to the config file.
|
||||
|
|
@ -395,13 +454,16 @@ def _create_font_alias_config(raw_fonts: List[str]) -> str:
|
|||
fd, fonts_conf_path = tempfile.mkstemp(prefix="fonts_alias_", suffix=".conf")
|
||||
os.close(fd)
|
||||
with open(fonts_conf_path, "w", encoding="utf-8") as cfg:
|
||||
cfg.write("""<?xml version='1.0'?>
|
||||
cfg.write(
|
||||
"""<?xml version='1.0'?>
|
||||
<!DOCTYPE fontconfig SYSTEM "urn:fontconfig:fonts.dtd">
|
||||
<fontconfig>
|
||||
<include>/etc/fonts/fonts.conf</include>
|
||||
""")
|
||||
"""
|
||||
)
|
||||
for src, dst in mappings.items():
|
||||
cfg.write(f"""
|
||||
cfg.write(
|
||||
f"""
|
||||
<match target="pattern">
|
||||
<test name="family" compare="eq">
|
||||
<string>{src}</string>
|
||||
|
|
@ -410,30 +472,34 @@ def _create_font_alias_config(raw_fonts: List[str]) -> str:
|
|||
<string>{dst}</string>
|
||||
</edit>
|
||||
</match>
|
||||
""")
|
||||
"""
|
||||
)
|
||||
cfg.write("\n</fontconfig>\n")
|
||||
return fonts_conf_path
|
||||
|
||||
|
||||
async def _install_fonts(fonts: List[UploadFile], temp_dir: str) -> None:
|
||||
"""Install provided font files to the system."""
|
||||
fonts_dir = os.path.join(temp_dir, "fonts")
|
||||
os.makedirs(fonts_dir, exist_ok=True)
|
||||
|
||||
|
||||
for font_file in fonts:
|
||||
# Save font file
|
||||
font_path = os.path.join(fonts_dir, font_file.filename)
|
||||
with open(font_path, "wb") as f:
|
||||
font_content = await font_file.read()
|
||||
f.write(font_content)
|
||||
|
||||
|
||||
# Install font (copy to system fonts directory)
|
||||
try:
|
||||
subprocess.run([
|
||||
"cp", font_path, "/usr/share/fonts/truetype/"
|
||||
], check=True, capture_output=True)
|
||||
subprocess.run(
|
||||
["cp", font_path, "/usr/share/fonts/truetype/"],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Warning: Failed to install font {font_file.filename}: {e}")
|
||||
|
||||
|
||||
# Refresh font cache
|
||||
try:
|
||||
subprocess.run(["fc-cache", "-f", "-v"], check=True, capture_output=True)
|
||||
|
|
@ -445,44 +511,48 @@ def _extract_slide_xmls(pptx_path: str, temp_dir: str) -> List[str]:
|
|||
"""Extract slide XML content from PPTX file."""
|
||||
slide_xmls = []
|
||||
extract_dir = os.path.join(temp_dir, "pptx_extract")
|
||||
|
||||
|
||||
try:
|
||||
# Unzip PPTX file
|
||||
with zipfile.ZipFile(pptx_path, 'r') as zip_ref:
|
||||
with zipfile.ZipFile(pptx_path, "r") as zip_ref:
|
||||
zip_ref.extractall(extract_dir)
|
||||
|
||||
|
||||
# Look for slides in ppt/slides/ directory
|
||||
slides_dir = os.path.join(extract_dir, "ppt", "slides")
|
||||
|
||||
|
||||
if not os.path.exists(slides_dir):
|
||||
raise Exception("No slides directory found in PPTX file")
|
||||
|
||||
|
||||
# Get all slide XML files and sort them numerically
|
||||
slide_files = [f for f in os.listdir(slides_dir) if f.startswith("slide") and f.endswith(".xml")]
|
||||
slide_files = [
|
||||
f
|
||||
for f in os.listdir(slides_dir)
|
||||
if f.startswith("slide") and f.endswith(".xml")
|
||||
]
|
||||
slide_files.sort(key=lambda x: int(x.replace("slide", "").replace(".xml", "")))
|
||||
|
||||
|
||||
# Read XML content from each slide
|
||||
for slide_file in slide_files:
|
||||
slide_path = os.path.join(slides_dir, slide_file)
|
||||
with open(slide_path, 'r', encoding='utf-8') as f:
|
||||
with open(slide_path, "r", encoding="utf-8") as f:
|
||||
slide_xmls.append(f.read())
|
||||
|
||||
|
||||
return slide_xmls
|
||||
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Failed to extract slide XMLs: {str(e)}")
|
||||
|
||||
|
||||
async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]:
|
||||
async def _convert_pptx_to_pdf(pptx_path: str, temp_dir: str) -> str:
|
||||
"""Generate PNG screenshots of PPTX slides using LibreOffice + ImageMagick."""
|
||||
screenshots_dir = os.path.join(temp_dir, "screenshots")
|
||||
os.makedirs(screenshots_dir, exist_ok=True)
|
||||
|
||||
|
||||
try:
|
||||
# First, get the number of slides by extracting XMLs
|
||||
slide_xmls = _extract_slide_xmls(pptx_path, temp_dir)
|
||||
slide_count = len(slide_xmls)
|
||||
|
||||
|
||||
# Build font alias config to force variant families to resolve to normalized root families
|
||||
raw_fonts: List[str] = []
|
||||
for xml in slide_xmls:
|
||||
|
|
@ -491,23 +561,32 @@ async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]:
|
|||
fonts_conf_path = _create_font_alias_config(raw_fonts)
|
||||
env = os.environ.copy()
|
||||
env["FONTCONFIG_FILE"] = fonts_conf_path
|
||||
|
||||
|
||||
print(f"Found {slide_count} slides in presentation")
|
||||
|
||||
|
||||
# Step 1: Convert PPTX to PDF using LibreOffice
|
||||
print("Starting LibreOffice PDF conversion...")
|
||||
pdf_filename = "temp_presentation.pdf"
|
||||
pdf_path = os.path.join(screenshots_dir, pdf_filename)
|
||||
|
||||
|
||||
try:
|
||||
result = subprocess.run([
|
||||
"libreoffice",
|
||||
"--headless",
|
||||
"--convert-to", "pdf",
|
||||
"--outdir", screenshots_dir,
|
||||
pptx_path
|
||||
], check=True, capture_output=True, text=True, timeout=500, env=env)
|
||||
|
||||
result = subprocess.run(
|
||||
[
|
||||
"libreoffice",
|
||||
"--headless",
|
||||
"--convert-to",
|
||||
"pdf",
|
||||
"--outdir",
|
||||
screenshots_dir,
|
||||
pptx_path,
|
||||
],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=500,
|
||||
env=env,
|
||||
)
|
||||
|
||||
print(f"LibreOffice PDF conversion output: {result.stdout}")
|
||||
if result.stderr:
|
||||
print(f"LibreOffice PDF conversion warnings: {result.stderr}")
|
||||
|
|
@ -516,74 +595,19 @@ async def _generate_screenshots(pptx_path: str, temp_dir: str) -> List[str]:
|
|||
except subprocess.CalledProcessError as e:
|
||||
error_msg = e.stderr if e.stderr else str(e)
|
||||
raise Exception(f"LibreOffice PDF conversion failed: {error_msg}")
|
||||
|
||||
|
||||
# Find the generated PDF file (LibreOffice uses original filename)
|
||||
pdf_files = [f for f in os.listdir(screenshots_dir) if f.endswith('.pdf')]
|
||||
pdf_files = [f for f in os.listdir(screenshots_dir) if f.endswith(".pdf")]
|
||||
if not pdf_files:
|
||||
raise Exception("LibreOffice failed to generate PDF file")
|
||||
|
||||
|
||||
actual_pdf_path = os.path.join(screenshots_dir, pdf_files[0])
|
||||
print(f"Generated PDF: {actual_pdf_path}")
|
||||
|
||||
# Step 2: Convert PDF to individual PNG images using ImageMagick
|
||||
print("Starting ImageMagick PNG conversion...")
|
||||
try:
|
||||
result = subprocess.run([
|
||||
"convert",
|
||||
"-density", "150",
|
||||
actual_pdf_path,
|
||||
os.path.join(screenshots_dir, "slide_%03d.png")
|
||||
], check=True, capture_output=True, text=True, timeout=500, env=env)
|
||||
|
||||
print(f"ImageMagick conversion output: {result.stdout}")
|
||||
if result.stderr:
|
||||
print(f"ImageMagick conversion warnings: {result.stderr}")
|
||||
except subprocess.TimeoutExpired:
|
||||
raise Exception("ImageMagick PNG conversion timed out after 120 seconds")
|
||||
except subprocess.CalledProcessError as e:
|
||||
error_msg = e.stderr if e.stderr else str(e)
|
||||
raise Exception(f"ImageMagick PNG conversion failed: {error_msg}")
|
||||
|
||||
# Find generated PNG files (ImageMagick creates slide_000.png, slide_001.png, etc.)
|
||||
print("Checking for generated PNG files...")
|
||||
png_files = sorted([f for f in os.listdir(screenshots_dir) if f.startswith("slide_") and f.endswith('.png')])
|
||||
print(f"Generated PNG files: {png_files}")
|
||||
|
||||
if not png_files:
|
||||
raise Exception("ImageMagick failed to generate any PNG files")
|
||||
|
||||
# Rename files from slide_000.png format to slide_1.png format expected by the API
|
||||
print("Renaming PNG files to expected format...")
|
||||
screenshot_paths = []
|
||||
for i in range(slide_count):
|
||||
# ImageMagick generates slide_000.png, slide_001.png, etc.
|
||||
source_file = f"slide_{i:03d}.png"
|
||||
source_path = os.path.join(screenshots_dir, source_file)
|
||||
|
||||
# We need slide_1.png, slide_2.png, etc.
|
||||
target_file = f"slide_{i+1}.png"
|
||||
target_path = os.path.join(screenshots_dir, target_file)
|
||||
|
||||
if os.path.exists(source_path):
|
||||
# Rename to expected format
|
||||
shutil.move(source_path, target_path)
|
||||
screenshot_paths.append(target_path)
|
||||
print(f"✓ Renamed {source_file} to {target_file}")
|
||||
else:
|
||||
print(f"⚠ Warning: Expected file {source_file} not found, creating placeholder")
|
||||
# Create empty placeholder
|
||||
with open(target_path, 'w') as f:
|
||||
f.write("")
|
||||
screenshot_paths.append(target_path)
|
||||
|
||||
print(f"Successfully generated {len(screenshot_paths)} slide screenshots")
|
||||
return screenshot_paths
|
||||
|
||||
return actual_pdf_path
|
||||
|
||||
except Exception as e:
|
||||
# Re-raise the specific exceptions we've already handled
|
||||
if "timed out" in str(e) or "failed:" in str(e):
|
||||
raise
|
||||
# Handle any other unexpected exceptions
|
||||
raise Exception(f"Screenshot generation failed: {str(e)}")
|
||||
|
||||
|
||||
|
|
@ -95,13 +95,15 @@ class DocumentsLoader:
|
|||
def load_powerpoint(self, file_path: str) -> str:
|
||||
return self.docling_service.parse_to_markdown(file_path)
|
||||
|
||||
def get_page_images_from_pdf(self, file_path: str, temp_dir: str):
|
||||
@classmethod
|
||||
def get_page_images_from_pdf(cls, file_path: str, temp_dir: str):
|
||||
with pdfplumber.open(file_path) as pdf:
|
||||
for page in pdf.pages:
|
||||
img = page.to_image(resolution=300)
|
||||
img = page.to_image(resolution=150)
|
||||
img.save(os.path.join(temp_dir, f"page_{page.page_number}.png"))
|
||||
|
||||
async def get_page_images_from_pdf_async(self, file_path: str, temp_dir: str):
|
||||
@classmethod
|
||||
async def get_page_images_from_pdf_async(cls, file_path: str, temp_dir: str):
|
||||
return await asyncio.to_thread(
|
||||
self.get_page_images_from_pdf, file_path, temp_dir
|
||||
cls.get_page_images_from_pdf, file_path, temp_dir
|
||||
)
|
||||
|
|
|
|||
|
|
@ -182,5 +182,5 @@ def process_slide_add_placeholder_assets(slide: SlideModel):
|
|||
|
||||
for icon_path in icon_paths:
|
||||
icon_dict = get_dict_at_path(slide.content, icon_path)
|
||||
icon_dict["__icon_url__"] = "/static/icons/placeholder.png"
|
||||
icon_dict["__icon_url__"] = "/static/icons/placeholder.svg"
|
||||
set_dict_at_path(slide.content, icon_path, icon_dict)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue