presenton/servers/fastapi/services/export_task_service.py

289 lines
9.9 KiB
Python

import asyncio
import json
import os
import shutil
import subprocess
import tempfile
from typing import Literal, Mapping
from fastapi import HTTPException
from pydantic import BaseModel
from services.liteparse_service import _snippet, _subprocess_text_kwargs
from utils.asset_directory_utils import resolve_app_path_to_filesystem
from utils.get_env import get_app_data_directory_env, get_temp_directory_env
class PptxToHtmlDocument(BaseModel):
slides: list[str]
font_css: str = ""
width: float
height: float
images_dir: str
fonts_dir: str
class PresentationExportTaskResult(BaseModel):
path: str
class ExportTaskService:
def __init__(self, timeout_seconds: int = 300):
self.timeout_seconds = timeout_seconds
self.node_binary = os.getenv("LITEPARSE_NODE_BINARY", "node")
self.export_dir = self._resolve_export_dir()
self.entrypoint_path = self._resolve_entrypoint_path(self.export_dir)
self.converter_path = self._resolve_converter_path(self.export_dir)
@staticmethod
def _resolve_export_dir() -> str:
configured = (os.getenv("EXPORT_RUNTIME_DIR") or "").strip()
if configured:
return configured
package_root = (os.getenv("EXPORT_PACKAGE_ROOT") or "").strip()
if package_root:
return package_root
cwd = os.path.abspath(".")
service_dir = os.path.dirname(__file__)
candidates = [
os.path.abspath(os.path.join(cwd, "..", "..", "presentation-export")),
os.path.abspath(os.path.join(cwd, "..", "presentation-export")),
os.path.abspath(os.path.join(service_dir, "..", "..", "..", "presentation-export")),
os.path.abspath(os.path.join(service_dir, "..", "..", "..", "..", "presentation-export")),
]
for candidate in candidates:
if os.path.isfile(os.path.join(candidate, "index.cjs")) or os.path.isfile(
os.path.join(candidate, "index.js")
):
return candidate
return candidates[0]
@staticmethod
def _resolve_entrypoint_path(export_dir: str) -> str:
index_cjs = os.path.join(export_dir, "index.cjs")
if os.path.isfile(index_cjs):
return index_cjs
index_js = os.path.join(export_dir, "index.js")
if os.path.isfile(index_js):
shutil.copyfile(index_js, index_cjs)
return index_cjs
return index_cjs
@staticmethod
def _resolve_converter_path(export_dir: str) -> str:
py_dir = os.path.join(export_dir, "py")
extension = ".exe" if os.name == "nt" else ""
platform_name = sys_platform()
arch_name = sys_arch()
candidates = [
os.path.join(py_dir, f"convert-{platform_name}-{arch_name}{extension}"),
os.path.join(py_dir, f"convert-{platform_name}{extension}"),
os.path.join(py_dir, f"convert{extension}"),
os.path.join(py_dir, "convert"),
]
for candidate in candidates:
if candidate and os.path.isfile(candidate):
return candidate
return candidates[1]
def _build_node_env(self) -> Mapping[str, str]:
env = os.environ.copy()
app_data_directory = get_app_data_directory_env()
if not app_data_directory:
raise HTTPException(
status_code=500,
detail="APP_DATA_DIRECTORY must be set for PPTX-to-HTML export",
)
env["APP_DATA_DIRECTORY"] = app_data_directory
temp_directory = get_temp_directory_env() or os.path.join(
tempfile.gettempdir(), "presenton"
)
os.makedirs(temp_directory, exist_ok=True)
env["TEMP_DIRECTORY"] = temp_directory
fastapi_public_url = (os.getenv("FASTAPI_PUBLIC_URL") or "").strip()
if not fastapi_public_url:
raise HTTPException(
status_code=500,
detail="FASTAPI_PUBLIC_URL must be set for PPTX-to-HTML export",
)
env["ASSETS_BASE_URL"] = f"{fastapi_public_url.rstrip('/')}/app_data"
env["BUILT_PYTHON_MODULE_PATH"] = self.converter_path
return env
def _ensure_runtime_ready(self) -> None:
if not os.path.isfile(self.entrypoint_path):
raise HTTPException(
status_code=500,
detail=f"Export runtime not found at {self.entrypoint_path}",
)
if not os.path.isfile(self.converter_path):
raise HTTPException(
status_code=500,
detail=f"Export converter binary not found at {self.converter_path}",
)
@staticmethod
def _resolve_output_path(response_data: dict) -> str:
path_value = response_data.get("path")
if isinstance(path_value, str):
resolved = resolve_app_path_to_filesystem(path_value) or path_value
if os.path.isfile(resolved):
return resolved
url_value = response_data.get("url")
if isinstance(url_value, str):
resolved = resolve_app_path_to_filesystem(url_value)
if resolved and os.path.isfile(resolved):
return resolved
raise HTTPException(
status_code=500,
detail="PPTX-to-HTML task completed without a valid output path",
)
@staticmethod
def _create_task_paths() -> tuple[str, str, str]:
temp_root = get_temp_directory_env() or os.path.join(
tempfile.gettempdir(), "presenton"
)
os.makedirs(temp_root, exist_ok=True)
temp_dir = tempfile.mkdtemp(prefix="export-task-", dir=temp_root)
task_path = os.path.join(temp_dir, "export_task.json")
response_path = os.path.join(temp_dir, "export_task.response.json")
return temp_dir, task_path, response_path
async def _run_task(self, task_payload: dict, response_error_detail: str) -> dict:
self._ensure_runtime_ready()
temp_dir, task_path, response_path = self._create_task_paths()
try:
with open(task_path, "w", encoding="utf-8") as task_file:
json.dump(task_payload, task_file)
result = await asyncio.to_thread(
subprocess.run,
[self.node_binary, self.entrypoint_path, task_path],
cwd=self.export_dir,
capture_output=True,
timeout=self.timeout_seconds,
env=dict(self._build_node_env()),
**_subprocess_text_kwargs(),
)
if result.returncode != 0:
raise HTTPException(
status_code=500,
detail=(
"Export task failed. "
f"stderr={_snippet(result.stderr)} stdout={_snippet(result.stdout)}"
),
)
if not os.path.isfile(response_path):
raise HTTPException(
status_code=500,
detail=response_error_detail,
)
with open(response_path, "r", encoding="utf-8") as response_file:
return json.load(response_file)
except subprocess.TimeoutExpired as exc:
raise HTTPException(
status_code=500,
detail=f"Export task timed out after {self.timeout_seconds} seconds",
) from exc
except json.JSONDecodeError as exc:
raise HTTPException(
status_code=500,
detail="Export task produced invalid JSON output",
) from exc
except OSError as exc:
raise HTTPException(
status_code=500,
detail=f"Failed to run export task: {exc}",
) from exc
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
async def export_from_url(
self,
url: str,
title: str,
export_as: Literal["pdf", "pptx"],
fastapi_url: str | None = None,
) -> PresentationExportTaskResult:
response_data = await self._run_task(
{
"type": "export",
"url": url,
"format": export_as,
"title": title,
"fastapiUrl": fastapi_url or None,
},
"Export task did not produce a response file",
)
return PresentationExportTaskResult(
path=self._resolve_output_path(response_data),
)
async def convert_pptx_to_html(
self, pptx_path: str, get_fonts: bool = False
) -> PptxToHtmlDocument:
if not os.path.isfile(pptx_path):
raise HTTPException(status_code=400, detail=f"PPTX not found: {pptx_path}")
try:
response_data = await self._run_task(
{
"type": "pptx-to-html",
"pptx_path": pptx_path,
"get_fonts": get_fonts,
},
"PPTX-to-HTML export task did not produce a response file",
)
output_path = self._resolve_output_path(response_data)
with open(output_path, "r", encoding="utf-8") as output_file:
output_data = json.load(output_file)
return PptxToHtmlDocument(**output_data)
except json.JSONDecodeError as exc:
raise HTTPException(
status_code=500,
detail="PPTX-to-HTML export produced invalid JSON output",
) from exc
def sys_platform() -> str:
if os.name == "nt":
return "win32"
return os.sys.platform
def sys_arch() -> str:
machine = (os.environ.get("PROCESSOR_ARCHITECTURE") or "").lower()
if not machine and hasattr(os, "uname"):
machine = os.uname().machine.lower()
arch_map = {
"x86_64": "x64",
"amd64": "x64",
"x64": "x64",
"aarch64": "arm64",
"arm64": "arm64",
}
return arch_map.get(machine, machine or "x64")
EXPORT_TASK_SERVICE = ExportTaskService()