presenton/servers/fastapi/image_processor/image_from_pptx.py
2025-05-13 05:28:01 +05:45

39 lines
1.1 KiB
Python

import io
import os
import subprocess
from typing import List
from image_processor.utils import get_page_images_from_pdf
def get_pdf_from_pptx(pptx_path: str, temp_dir: str) -> str:
base_name = os.path.splitext(os.path.basename(pptx_path))[0]
pdf_filename = f"{base_name}.pdf"
pdf_path = os.path.join(temp_dir, pdf_filename)
retry_count = 0
while not os.path.exists(pdf_path):
result = subprocess.run(
f'{os.getenv("LIBREOFFICE")} --convert-to pdf "{pptx_path}" --outdir "{temp_dir}"',
shell=True,
capture_output=True,
text=True,
)
print("LibreOffice stdout:", result.stdout)
print("LibreOffice stderr:", result.stderr)
retry_count += 1
if retry_count > 3:
raise Exception("Failed to convert PPTX to PDF")
pdf_path = os.path.join(temp_dir, pdf_filename)
return pdf_path
def get_images_from_pptx(pptx_path: str) -> List[str]:
temp_dir = os.path.dirname(pptx_path)
pdf_path = get_pdf_from_pptx(pptx_path, temp_dir)
return get_page_images_from_pdf(pdf_path, temp_dir)