Merge pull request #481 from presenton/feat/liteparseVsDocling

Feat/liteparse vs docling
This commit is contained in:
Sudip Parajuli 2026-03-30 20:23:27 +05:45 committed by GitHub
commit 28ff86c19b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 1982 additions and 1352 deletions

9
electron/.gitignore vendored
View file

@ -21,6 +21,13 @@ app_dist
resources/fastapi
resources/nextjs
dist
eng.traineddata
servers/fastapi/build/
servers/fastapi/dist/
servers/fastapi/fastembed_cache/
electron/.cache/
electron/.cache/export-runtime/
electron/.cache/export-runtime/
*.pkg
*.toc
*.zip
*.pyc

View file

@ -1,13 +1,14 @@
/**
* IPC handlers for the unified setup installer (LibreOffice + Chromium).
* IPC handlers for the unified setup installer (LibreOffice + Chromium + ImageMagick).
* - setup:get-status which dependencies are missing
* - setup:install-chrome download Chromium (browser-snapshots) with progress
*/
import { ipcMain, WebContents } from "electron";
import { ipcMain, WebContents, shell } from "electron";
import fs from "fs";
import path from "path";
import os from "os";
import { spawn, spawnSync } from "child_process";
import puppeteer from "puppeteer";
import {
Browser,
@ -17,6 +18,11 @@ import {
resolveBuildId,
} from "@puppeteer/browsers";
import { getSetupStatus } from "../utils/setup-dependencies";
import {
getImageMagickDownloadUrl,
getImageMagickManualInstallCommands,
isImageMagickInstalled,
} from "../utils/imagemagick-check";
function getPuppeteerCacheDir(): string {
const configCache =
@ -42,9 +48,105 @@ function sendChromeLog(wc: WebContents, level: string, text: string) {
}
}
function sendImageMagickProgress(
wc: WebContents,
phase: "installing" | "done" | "error",
percent?: number,
message?: string
) {
if (!wc.isDestroyed()) {
wc.send("setup:imagemagick-progress", { phase, percent, message });
}
}
function sendImageMagickLog(wc: WebContents, level: string, text: string) {
if (!wc.isDestroyed()) {
wc.send("setup:imagemagick-log", { level, text });
}
}
function commandExists(command: string, versionArgs: string[] = ["--version"]): boolean {
const result = spawnSync(command, versionArgs, {
stdio: "pipe",
windowsHide: true,
});
return result.status === 0;
}
function resolveBrewCommand(): string | null {
if (commandExists("brew")) {
return "brew";
}
const candidates = ["/opt/homebrew/bin/brew", "/usr/local/bin/brew"];
for (const candidate of candidates) {
if (fs.existsSync(candidate)) {
return candidate;
}
}
return null;
}
function resolveLinuxEscalationCommand(): string | null {
if (commandExists("pkexec", ["--version"])) return "pkexec";
if (commandExists("sudo", ["-V"])) return "sudo";
return null;
}
function logManualImageMagickCommands(wc: WebContents) {
for (const line of getImageMagickManualInstallCommands()) {
const level = line.endsWith(":") ? "info" : "cmd";
sendImageMagickLog(wc, level, line);
}
}
function runInstallCommand(
wc: WebContents,
command: string,
args: string[]
): Promise<void> {
sendImageMagickLog(wc, "info", `Running: ${command} ${args.join(" ")}`);
return new Promise((resolve, reject) => {
const child = spawn(command, args, {
stdio: ["ignore", "pipe", "pipe"],
windowsHide: process.platform === "win32",
});
child.stdout.on("data", (data) => {
const text = String(data).trim();
if (text) sendImageMagickLog(wc, "info", text);
});
child.stderr.on("data", (data) => {
const text = String(data).trim();
if (text) {
sendImageMagickLog(
wc,
text.toLowerCase().includes("error") ? "error" : "info",
text
);
}
});
child.on("error", reject);
child.on("close", (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(`${command} exited with code ${code}`));
});
});
}
export function setupSetupInstallHandlers() {
ipcMain.handle("setup:get-status", () => {
return getSetupStatus() ?? { needsLibreOffice: false, needsChrome: false };
return (
getSetupStatus() ?? {
needsLibreOffice: false,
needsChrome: false,
needsImageMagick: false,
}
);
});
ipcMain.handle(
@ -121,4 +223,122 @@ export function setupSetupInstallHandlers() {
return { ok: true };
}
);
ipcMain.handle(
"setup:install-imagemagick",
async (event): Promise<{ ok: boolean; error?: string }> => {
const wc = event.sender;
try {
sendImageMagickProgress(
wc,
"installing",
undefined,
"Installing ImageMagick..."
);
if (process.platform === "linux") {
if (commandExists("apt-get")) {
const escalator = resolveLinuxEscalationCommand();
if (!escalator) {
throw new Error(
"Neither pkexec nor sudo is available to run apt-get install."
);
}
await runInstallCommand(wc, escalator, [
"apt-get",
"update",
]);
await runInstallCommand(wc, escalator, [
"apt-get",
"install",
"-y",
"imagemagick",
]);
} else {
throw new Error(
"apt-get is unavailable. Install ImageMagick manually using your package manager."
);
}
} else if (process.platform === "darwin") {
let brewCommand = resolveBrewCommand();
if (!brewCommand) {
sendImageMagickLog(
wc,
"info",
"Homebrew not found. Installing Homebrew first..."
);
const installHomebrewCommand =
'NONINTERACTIVE=1 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"';
await runInstallCommand(wc, "/bin/bash", ["-c", installHomebrewCommand]);
brewCommand = resolveBrewCommand();
}
if (!brewCommand) {
throw new Error(
"Homebrew installation completed, but brew was not found on PATH."
);
}
await runInstallCommand(wc, brewCommand, ["install", "imagemagick"]);
} else if (process.platform === "win32") {
if (commandExists("choco", ["-v"])) {
await runInstallCommand(wc, "choco", [
"install",
"imagemagick.app",
"-y",
]);
} else {
throw new Error(
"Chocolatey is not installed. Falling back to direct installer download."
);
}
} else {
throw new Error(
"Unsupported platform for automatic install. Use manual install from the official download page."
);
}
sendImageMagickProgress(wc, "done", 100, "ImageMagick install finished");
return { ok: true };
} catch (error) {
const message =
error instanceof Error ? error.message : "ImageMagick install failed";
sendImageMagickLog(wc, "error", message);
logManualImageMagickCommands(wc);
const downloadUrl = getImageMagickDownloadUrl();
sendImageMagickLog(
wc,
"info",
`Opening manual install link: ${downloadUrl}`
);
await shell.openExternal(downloadUrl);
sendImageMagickProgress(
wc,
"error",
undefined,
"Finish manual installation, then click Retry."
);
return { ok: false, error: message };
}
}
);
ipcMain.handle(
"setup:check-imagemagick",
async (event): Promise<{ ok: boolean; error?: string }> => {
const wc = event.sender;
const installed = isImageMagickInstalled();
if (installed) {
sendImageMagickProgress(wc, "done", 100, "ImageMagick detected");
sendImageMagickLog(wc, "ok", "ImageMagick is installed and ready.");
return { ok: true };
}
const message =
"ImageMagick is not detected yet. Install it, then click Retry.";
sendImageMagickProgress(wc, "error", undefined, message);
sendImageMagickLog(wc, "error", message);
return { ok: false, error: message };
}
);
}

View file

@ -13,6 +13,8 @@ import { setupSetupInstallHandlers } from "./ipc/setup_install_handlers";
import { checkDependenciesBeforeWindow } from "./utils/setup-dependencies";
import { getSofficePath, isLibreOfficeInstalled } from "./utils/libreoffice-check";
import { getPuppeteerExecutablePath, isChromeInstalled } from "./utils/puppeteer-check";
import { getLiteParseRunnerPath } from "./utils/liteparse-check";
import { isImageMagickInstalled } from "./utils/imagemagick-check";
import { startUpdateChecker, stopUpdateChecker } from "./utils/update-checker";
@ -23,6 +25,7 @@ let isStopping = false;
const startupStatus: Record<string, string> = {
libreoffice: "checking",
puppeteer: "checking",
imagemagick: "checking",
};
// Allow renderer to query initial startup status as soon as it loads.
@ -122,6 +125,7 @@ async function startServers(fastApiPort: number, nextjsPort: number) {
// Resolved by libreoffice-check.ts at startup; lets Python invoke the
// exact binary path instead of relying on the system PATH.
SOFFICE_PATH: getSofficePath(),
LITEPARSE_RUNNER_PATH: getLiteParseRunnerPath(),
},
isDev,
);
@ -188,7 +192,7 @@ app.whenReady().then(async () => {
createWindow();
win?.loadFile(path.join(baseDir, "resources/ui/homepage/index.html"));
// Single installer: checks LibreOffice and Chrome; if either is missing, shows one
// Single installer: checks LibreOffice, Chrome, and ImageMagick; if any are missing, shows one
// window that installs them one after another. Resolves when the window closes.
const setupCompleted = await checkDependenciesBeforeWindow();
if (!setupCompleted) {
@ -199,12 +203,14 @@ app.whenReady().then(async () => {
}
// Update startup status after setup (user may have installed one or both)
const [loResult, chromeOk] = await Promise.all([
const [loResult, chromeOk, imageMagickOk] = await Promise.all([
isLibreOfficeInstalled(),
isChromeInstalled(),
Promise.resolve(isImageMagickInstalled()),
]);
startupStatus.libreoffice = loResult.installed ? "installed" : "missing";
startupStatus.puppeteer = chromeOk ? "installed" : "missing";
startupStatus.imagemagick = imageMagickOk ? "installed" : "missing";
// Show and focus main window
win?.show();
@ -218,6 +224,7 @@ app.whenReady().then(async () => {
win?.webContents.once("did-finish-load", () => {
sendStartupStatus("libreoffice", startupStatus.libreoffice);
sendStartupStatus("puppeteer", startupStatus.puppeteer);
sendStartupStatus("imagemagick", startupStatus.imagemagick);
});
setUserConfig({

View file

@ -5,6 +5,8 @@ contextBridge.exposeInMainWorld("setupInstaller", {
installLibreOffice: () => ipcRenderer.invoke("lo:start-install"),
installChrome: () => ipcRenderer.invoke("setup:install-chrome"),
installImageMagick: () => ipcRenderer.invoke("setup:install-imagemagick"),
checkImageMagick: () => ipcRenderer.invoke("setup:check-imagemagick"),
done: () => ipcRenderer.send("setup:done"),
@ -25,4 +27,13 @@ contextBridge.exposeInMainWorld("setupInstaller", {
onChromeLog: (cb: (data: { level: string; text: string }) => void) => {
ipcRenderer.on("setup:chrome-log", (_event, data) => cb(data));
},
onImageMagickProgress: (
cb: (data: { phase: string; percent?: number; message?: string }) => void
) => {
ipcRenderer.on("setup:imagemagick-progress", (_event, data) => cb(data));
},
onImageMagickLog: (cb: (data: { level: string; text: string }) => void) => {
ipcRenderer.on("setup:imagemagick-log", (_event, data) => cb(data));
},
});

View file

@ -33,6 +33,8 @@ interface FastApiEnv {
MIGRATE_DATABASE_ON_STARTUP?: string,
/** Absolute path to the soffice binary resolved at startup by libreoffice-check.ts. */
SOFFICE_PATH?: string,
/** Absolute path to the bundled LiteParse runner script. */
LITEPARSE_RUNNER_PATH?: string,
}
interface NextJsEnv {

View file

@ -0,0 +1,51 @@
import { spawnSync } from "child_process";
function canExecute(command: string, args: string[]): boolean {
const result = spawnSync(command, args, {
stdio: "pipe",
windowsHide: true,
});
return result.status === 0;
}
export function isImageMagickInstalled(): boolean {
// ImageMagick 7+ command
if (canExecute("magick", ["-version"])) return true;
// Legacy command on Linux/macOS packages
if (canExecute("convert", ["-version"])) return true;
return false;
}
export function getImageMagickDownloadUrl(): string {
if (process.platform === "win32") {
return "https://imagemagick.org/archive/binaries/ImageMagick-7.1.2-18-Q16-HDRI-x64-dll.exe";
}
if (process.platform === "darwin") {
return "https://brew.sh/";
}
return "https://imagemagick.org/script/download.php#linux";
}
export function getImageMagickManualInstallCommands(): string[] {
if (process.platform === "win32") {
return [
"Download and run the installer:",
getImageMagickDownloadUrl(),
];
}
if (process.platform === "darwin") {
return [
"Install Homebrew:",
'/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"',
"Install ImageMagick:",
"brew install imagemagick",
];
}
return [
"Install ImageMagick:",
"sudo apt-get update",
"sudo apt-get install -y imagemagick",
];
}

View file

@ -0,0 +1,6 @@
import path from "path";
import { baseDir } from "./constants";
export function getLiteParseRunnerPath(): string {
return path.join(baseDir, "resources", "document-extraction", "liteparse_runner.mjs");
}

View file

@ -29,6 +29,7 @@ function shouldSkipDownload(): boolean {
export interface SetupStatus {
needsLibreOffice: boolean;
needsChrome: boolean;
needsImageMagick: boolean;
}
/**

View file

@ -1,9 +1,10 @@
/**
* setup-dependencies.ts
*
* Single installer window that ensures LibreOffice and Chrome (Puppeteer) are
* Single installer window that ensures LibreOffice, Chrome (Puppeteer), and
* ImageMagick are
* available before the user starts creating presentations. Runs checks, then
* if either is missing shows one installer that runs LibreOffice then Chrome
* if any are missing shows one installer that runs dependency setup steps
* in sequence (each with Install / Skip).
*/
@ -15,6 +16,7 @@ import {
isChromeInstalled,
type SetupStatus,
} from "./puppeteer-check";
import { isImageMagickInstalled } from "./imagemagick-check";
export type { SetupStatus };
@ -26,40 +28,44 @@ export function getSetupStatus(): SetupStatus | null {
}
/**
* Checks LibreOffice and Chrome. If both are present, returns immediately.
* If either is missing, opens one installer window that runs LibreOffice
* then Chrome in sequence. Returns true only when all required dependencies
* Checks LibreOffice, Chrome and ImageMagick. If all are present, returns
* immediately. If any are missing, opens one installer window that runs each
* missing setup step in sequence. Returns true only when all required dependencies
* are installed; false when the installer is closed/skipped before completion.
*/
export async function checkDependenciesBeforeWindow(): Promise<boolean> {
const [loResult, chromeInstalled] = await Promise.all([
const [loResult, chromeInstalled, imageMagickInstalled] = await Promise.all([
isLibreOfficeInstalled(),
isChromeInstalled(),
Promise.resolve(isImageMagickInstalled()),
]);
const needsLibreOffice = !loResult.installed;
const needsChrome = !chromeInstalled;
const needsImageMagick = !imageMagickInstalled;
if (!needsLibreOffice && !needsChrome) {
if (!needsLibreOffice && !needsChrome && !needsImageMagick) {
return true;
}
currentSetupStatus = {
needsLibreOffice,
needsChrome,
needsImageMagick,
};
await showSetupInstallerWindow();
// Re-check after installer closes; setup can only proceed when all
// required dependencies are actually installed.
const [postLoResult, postChromeInstalled] = await Promise.all([
const [postLoResult, postChromeInstalled, postImageMagickInstalled] = await Promise.all([
isLibreOfficeInstalled(),
isChromeInstalled(),
Promise.resolve(isImageMagickInstalled()),
]);
currentSetupStatus = null;
return postLoResult.installed && postChromeInstalled;
return postLoResult.installed && postChromeInstalled && postImageMagickInstalled;
}
/**

View file

@ -9,6 +9,7 @@
"version": "0.6.3-beta",
"hasInstallScript": true,
"dependencies": {
"@llamaindex/liteparse": "^1.4.0",
"@puppeteer/browsers": "^1.9.1",
"@tailwindcss/cli": "^4.1.5",
"@types/uuid": "^10.0.0",
@ -54,6 +55,16 @@
"node": ">=6.9.0"
}
},
"node_modules/@borewit/text-codec": {
"version": "0.2.2",
"resolved": "https://registry.npmjs.org/@borewit/text-codec/-/text-codec-0.2.2.tgz",
"integrity": "sha512-DDaRehssg1aNrH4+2hnj1B7vnUGEjU6OIlyRdkMd0aUdIUvKXrJfXsy8LVtXAy7DRvYVluWbMspsRhz2lcW0mQ==",
"license": "MIT",
"funding": {
"type": "github",
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/@develar/schema-utils": {
"version": "2.6.5",
"resolved": "https://registry.npmjs.org/@develar/schema-utils/-/schema-utils-2.6.5.tgz",
@ -507,6 +518,12 @@
"tslib": "^2.4.0"
}
},
"node_modules/@hyzyla/pdfium": {
"version": "2.1.12",
"resolved": "https://registry.npmjs.org/@hyzyla/pdfium/-/pdfium-2.1.12.tgz",
"integrity": "sha512-2ezbrJk9V4foB3+U+eQ7234spsHmrufPU+9EV2cVZCnhTLLfelPz7wWshO0HjUNtcECNBaAfEzrdaQZOigkW+A==",
"license": "MIT"
},
"node_modules/@img/colour": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.0.0.tgz",
@ -1156,6 +1173,67 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
"node_modules/@llamaindex/liteparse": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/@llamaindex/liteparse/-/liteparse-1.4.0.tgz",
"integrity": "sha512-58Tr4vAutcaf0Cxe7GK4cknpzcpN3tTzUhIAwWioWuSDqVPS3jpNhVVfqE5tV5PE4za07l07QFhGscCoVm/hRw==",
"license": "Apache-2.0",
"dependencies": {
"@hyzyla/pdfium": "^2.1.9",
"axios": "^1.7.0",
"commander": "^12.0.0",
"file-type": "^21.3.3",
"form-data": "^4.0.0",
"p-limit": "^7.3.0",
"sharp": "^0.34.5",
"tesseract.js": "^7.0.0",
"unified": "^11.0.0",
"zod": "^3.23.0"
},
"bin": {
"lit": "dist/src/index.js",
"liteparse": "dist/src/index.js"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@llamaindex/liteparse/node_modules/commander": {
"version": "12.1.0",
"resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz",
"integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==",
"license": "MIT",
"engines": {
"node": ">=18"
}
},
"node_modules/@llamaindex/liteparse/node_modules/p-limit": {
"version": "7.3.0",
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-7.3.0.tgz",
"integrity": "sha512-7cIXg/Z0M5WZRblrsOla88S4wAK+zOQQWeBYfV3qJuJXMr+LnbYjaadrFaS0JILfEDPVqHyKnZ1Z/1d6J9VVUw==",
"license": "MIT",
"dependencies": {
"yocto-queue": "^1.2.1"
},
"engines": {
"node": ">=20"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/@llamaindex/liteparse/node_modules/yocto-queue": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz",
"integrity": "sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ==",
"license": "MIT",
"engines": {
"node": ">=12.20"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/@malept/cross-spawn-promise": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/@malept/cross-spawn-promise/-/cross-spawn-promise-2.0.0.tgz",
@ -1975,6 +2053,29 @@
"node": ">= 10"
}
},
"node_modules/@tokenizer/inflate": {
"version": "0.4.1",
"resolved": "https://registry.npmjs.org/@tokenizer/inflate/-/inflate-0.4.1.tgz",
"integrity": "sha512-2mAv+8pkG6GIZiF1kNg1jAjh27IDxEPKwdGul3snfztFerfPGI1LjDezZp3i7BElXompqEtPmoPx6c2wgtWsOA==",
"license": "MIT",
"dependencies": {
"debug": "^4.4.3",
"token-types": "^6.1.1"
},
"engines": {
"node": ">=18"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/@tokenizer/token": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz",
"integrity": "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==",
"license": "MIT"
},
"node_modules/@tootallnate/quickjs-emscripten": {
"version": "0.23.0",
"resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz",
@ -2070,6 +2171,12 @@
"@types/node": "*"
}
},
"node_modules/@types/unist": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz",
"integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==",
"license": "MIT"
},
"node_modules/@types/uuid": {
"version": "10.0.0",
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-10.0.0.tgz",
@ -2366,7 +2473,6 @@
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
"dev": true,
"license": "MIT"
},
"node_modules/at-least-node": {
@ -2379,6 +2485,26 @@
"node": ">= 4.0.0"
}
},
"node_modules/axios": {
"version": "1.14.0",
"resolved": "https://registry.npmjs.org/axios/-/axios-1.14.0.tgz",
"integrity": "sha512-3Y8yrqLSwjuzpXuZ0oIYZ/XGgLwUIBU3uLvbcpb0pidD9ctpShJd43KSlEEkVQg6DS0G9NKyzOvBfUtDKEyHvQ==",
"license": "MIT",
"dependencies": {
"follow-redirects": "^1.15.11",
"form-data": "^4.0.5",
"proxy-from-env": "^2.1.0"
}
},
"node_modules/axios/node_modules/proxy-from-env": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-2.1.0.tgz",
"integrity": "sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA==",
"license": "MIT",
"engines": {
"node": ">=10"
}
},
"node_modules/b4a": {
"version": "1.8.0",
"resolved": "https://registry.npmjs.org/b4a/-/b4a-1.8.0.tgz",
@ -2393,6 +2519,16 @@
}
}
},
"node_modules/bail": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/bail/-/bail-2.0.2.tgz",
"integrity": "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw==",
"license": "MIT",
"funding": {
"type": "github",
"url": "https://github.com/sponsors/wooorm"
}
},
"node_modules/balanced-match": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
@ -2527,6 +2663,12 @@
"readable-stream": "^3.4.0"
}
},
"node_modules/bmp-js": {
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/bmp-js/-/bmp-js-0.1.0.tgz",
"integrity": "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==",
"license": "MIT"
},
"node_modules/boolean": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz",
@ -2820,7 +2962,6 @@
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
"integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
@ -3005,7 +3146,6 @@
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
"dev": true,
"license": "MIT",
"dependencies": {
"delayed-stream": "~1.0.0"
@ -3294,12 +3434,20 @@
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=0.4.0"
}
},
"node_modules/dequal": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz",
"integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==",
"license": "MIT",
"engines": {
"node": ">=6"
}
},
"node_modules/detect-libc": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz",
@ -3317,6 +3465,19 @@
"license": "MIT",
"optional": true
},
"node_modules/devlop": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz",
"integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==",
"license": "MIT",
"dependencies": {
"dequal": "^2.0.0"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/wooorm"
}
},
"node_modules/devtools-protocol": {
"version": "0.0.1581282",
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz",
@ -3461,7 +3622,6 @@
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
"integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
"dev": true,
"license": "MIT",
"dependencies": {
"call-bind-apply-helpers": "^1.0.1",
@ -3718,7 +3878,6 @@
"version": "0.1.13",
"resolved": "https://registry.npmjs.org/encoding/-/encoding-0.1.13.tgz",
"integrity": "sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==",
"dev": true,
"license": "MIT",
"optional": true,
"dependencies": {
@ -3776,7 +3935,6 @@
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
"integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
@ -3786,7 +3944,6 @@
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
"integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
@ -3796,7 +3953,6 @@
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
"integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
"dev": true,
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0"
@ -3809,7 +3965,6 @@
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
"integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
"dev": true,
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
@ -3920,6 +4075,12 @@
"dev": true,
"license": "Apache-2.0"
},
"node_modules/extend": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
"integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==",
"license": "MIT"
},
"node_modules/extract-zip": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz",
@ -3998,6 +4159,24 @@
}
}
},
"node_modules/file-type": {
"version": "21.3.4",
"resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.4.tgz",
"integrity": "sha512-Ievi/yy8DS3ygGvT47PjSfdFoX+2isQueoYP1cntFW1JLYAuS4GD7NUPGg4zv2iZfV52uDyk5w5Z0TdpRS6Q1g==",
"license": "MIT",
"dependencies": {
"@tokenizer/inflate": "^0.4.1",
"strtok3": "^10.3.4",
"token-types": "^6.1.1",
"uint8array-extras": "^1.4.0"
},
"engines": {
"node": ">=20"
},
"funding": {
"url": "https://github.com/sindresorhus/file-type?sponsor=1"
}
},
"node_modules/filelist": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/filelist/-/filelist-1.0.4.tgz",
@ -4031,6 +4210,26 @@
"node": ">=10"
}
},
"node_modules/follow-redirects": {
"version": "1.15.11",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz",
"integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==",
"funding": [
{
"type": "individual",
"url": "https://github.com/sponsors/RubenVerborgh"
}
],
"license": "MIT",
"engines": {
"node": ">=4.0"
},
"peerDependenciesMeta": {
"debug": {
"optional": true
}
}
},
"node_modules/foreground-child": {
"version": "3.3.1",
"resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz",
@ -4065,7 +4264,6 @@
"version": "4.0.5",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz",
"integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==",
"dev": true,
"license": "MIT",
"dependencies": {
"asynckit": "^0.4.0",
@ -4117,7 +4315,6 @@
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
"integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
"dev": true,
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/ljharb"
@ -4136,7 +4333,6 @@
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
"integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"call-bind-apply-helpers": "^1.0.2",
@ -4161,7 +4357,6 @@
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
"integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
"dev": true,
"license": "MIT",
"dependencies": {
"dunder-proto": "^1.0.1",
@ -4290,7 +4485,6 @@
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
"integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
@ -4359,7 +4553,6 @@
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
"integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
@ -4372,7 +4565,6 @@
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
"integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
"dev": true,
"license": "MIT",
"dependencies": {
"has-symbols": "^1.0.3"
@ -4388,7 +4580,6 @@
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"function-bind": "^1.1.2"
@ -4487,7 +4678,7 @@
"version": "0.6.3",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
"integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
"dev": true,
"devOptional": true,
"license": "MIT",
"dependencies": {
"safer-buffer": ">= 2.1.2 < 3.0.0"
@ -4496,6 +4687,12 @@
"node": ">=0.10.0"
}
},
"node_modules/idb-keyval": {
"version": "6.2.2",
"resolved": "https://registry.npmjs.org/idb-keyval/-/idb-keyval-6.2.2.tgz",
"integrity": "sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==",
"license": "Apache-2.0"
},
"node_modules/ieee754": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
@ -4616,6 +4813,18 @@
"node": ">=8"
}
},
"node_modules/is-plain-obj": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-4.1.0.tgz",
"integrity": "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==",
"license": "MIT",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/is-unicode-supported": {
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz",
@ -4629,6 +4838,12 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/is-url": {
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/is-url/-/is-url-1.2.4.tgz",
"integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==",
"license": "MIT"
},
"node_modules/isbinaryfile": {
"version": "5.0.7",
"resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-5.0.7.tgz",
@ -5133,7 +5348,6 @@
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
"integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">= 0.4"
@ -5156,7 +5370,6 @@
"version": "1.52.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">= 0.6"
@ -5166,7 +5379,6 @@
"version": "2.1.35",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
"dev": true,
"license": "MIT",
"dependencies": {
"mime-db": "1.52.0"
@ -5467,6 +5679,26 @@
"node": ">=10"
}
},
"node_modules/node-fetch": {
"version": "2.7.0",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
"license": "MIT",
"dependencies": {
"whatwg-url": "^5.0.0"
},
"engines": {
"node": "4.x || >=6.0.0"
},
"peerDependencies": {
"encoding": "^0.1.0"
},
"peerDependenciesMeta": {
"encoding": {
"optional": true
}
}
},
"node_modules/node-gyp": {
"version": "11.5.0",
"resolved": "https://registry.npmjs.org/node-gyp/-/node-gyp-11.5.0.tgz",
@ -5607,6 +5839,15 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/opencollective-postinstall": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz",
"integrity": "sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==",
"license": "MIT",
"bin": {
"opencollective-postinstall": "index.js"
}
},
"node_modules/ora": {
"version": "5.4.1",
"resolved": "https://registry.npmjs.org/ora/-/ora-5.4.1.tgz",
@ -6205,6 +6446,12 @@
"node": ">= 6"
}
},
"node_modules/regenerator-runtime": {
"version": "0.13.11",
"resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz",
"integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==",
"license": "MIT"
},
"node_modules/require-directory": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
@ -6344,7 +6591,7 @@
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
"dev": true,
"devOptional": true,
"license": "MIT"
},
"node_modules/sanitize-filename": {
@ -6754,6 +7001,22 @@
"node": ">=8"
}
},
"node_modules/strtok3": {
"version": "10.3.5",
"resolved": "https://registry.npmjs.org/strtok3/-/strtok3-10.3.5.tgz",
"integrity": "sha512-ki4hZQfh5rX0QDLLkOCj+h+CVNkqmp/CMf8v8kZpkNVK6jGQooMytqzLZYUVYIZcFZ6yDB70EfD8POcFXiF5oA==",
"license": "MIT",
"dependencies": {
"@tokenizer/token": "^0.3.0"
},
"engines": {
"node": ">=18"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/sumchecker": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/sumchecker/-/sumchecker-3.0.1.tgz",
@ -6991,6 +7254,30 @@
"mkdirp": "bin/cmd.js"
}
},
"node_modules/tesseract.js": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/tesseract.js/-/tesseract.js-7.0.0.tgz",
"integrity": "sha512-exPBkd+z+wM1BuMkx/Bjv43OeLBxhL5kKWsz/9JY+DXcXdiBjiAch0V49QR3oAJqCaL5qURE0vx9Eo+G5YE7mA==",
"hasInstallScript": true,
"license": "Apache-2.0",
"dependencies": {
"bmp-js": "^0.1.0",
"idb-keyval": "^6.2.0",
"is-url": "^1.2.4",
"node-fetch": "^2.6.9",
"opencollective-postinstall": "^2.0.3",
"regenerator-runtime": "^0.13.3",
"tesseract.js-core": "^7.0.0",
"wasm-feature-detect": "^1.8.0",
"zlibjs": "^0.3.1"
}
},
"node_modules/tesseract.js-core": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-7.0.0.tgz",
"integrity": "sha512-WnNH518NzmbSq9zgTPeoF8c+xmilS8rFIl1YKbk/ptuuc7p6cLNELNuPAzcmsYw450ca6bLa8j3t0VAtq435Vw==",
"license": "Apache-2.0"
},
"node_modules/text-decoder": {
"version": "1.2.7",
"resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.7.tgz",
@ -7063,6 +7350,30 @@
"tmp": "^0.2.0"
}
},
"node_modules/token-types": {
"version": "6.1.2",
"resolved": "https://registry.npmjs.org/token-types/-/token-types-6.1.2.tgz",
"integrity": "sha512-dRXchy+C0IgK8WPC6xvCHFRIWYUbqqdEIKPaKo/AcTUNzwLTK6AH7RjdLWsEZcAN/TBdtfUw3PYEgPr5VPr6ww==",
"license": "MIT",
"dependencies": {
"@borewit/text-codec": "^0.2.1",
"@tokenizer/token": "^0.3.0",
"ieee754": "^1.2.1"
},
"engines": {
"node": ">=14.16"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/tr46": {
"version": "0.0.3",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
"license": "MIT"
},
"node_modules/tree-kill": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/tree-kill/-/tree-kill-1.2.2.tgz",
@ -7072,6 +7383,16 @@
"tree-kill": "cli.js"
}
},
"node_modules/trough": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/trough/-/trough-2.2.0.tgz",
"integrity": "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==",
"license": "MIT",
"funding": {
"type": "github",
"url": "https://github.com/sponsors/wooorm"
}
},
"node_modules/truncate-utf8-bytes": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz",
@ -7122,6 +7443,18 @@
"node": ">=14.17"
}
},
"node_modules/uint8array-extras": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/uint8array-extras/-/uint8array-extras-1.5.0.tgz",
"integrity": "sha512-rvKSBiC5zqCCiDZ9kAOszZcDvdAHwwIKJG33Ykj43OKcWsnmcBRL09YTU4nOeHZ8Y2a7l1MgTd08SBe9A8Qj6A==",
"license": "MIT",
"engines": {
"node": ">=18"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/unbzip2-stream": {
"version": "1.4.3",
"resolved": "https://registry.npmjs.org/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz",
@ -7139,6 +7472,25 @@
"devOptional": true,
"license": "MIT"
},
"node_modules/unified": {
"version": "11.0.5",
"resolved": "https://registry.npmjs.org/unified/-/unified-11.0.5.tgz",
"integrity": "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==",
"license": "MIT",
"dependencies": {
"@types/unist": "^3.0.0",
"bail": "^2.0.0",
"devlop": "^1.0.0",
"extend": "^3.0.0",
"is-plain-obj": "^4.0.0",
"trough": "^2.0.0",
"vfile": "^6.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/unique-filename": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/unique-filename/-/unique-filename-4.0.0.tgz",
@ -7165,6 +7517,19 @@
"node": "^18.17.0 || >=20.5.0"
}
},
"node_modules/unist-util-stringify-position": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-4.0.0.tgz",
"integrity": "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==",
"license": "MIT",
"dependencies": {
"@types/unist": "^3.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/universalify": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz",
@ -7228,6 +7593,40 @@
"node": ">=0.6.0"
}
},
"node_modules/vfile": {
"version": "6.0.3",
"resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz",
"integrity": "sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==",
"license": "MIT",
"dependencies": {
"@types/unist": "^3.0.0",
"vfile-message": "^4.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/vfile-message": {
"version": "4.0.3",
"resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.3.tgz",
"integrity": "sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw==",
"license": "MIT",
"dependencies": {
"@types/unist": "^3.0.0",
"unist-util-stringify-position": "^4.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/wasm-feature-detect": {
"version": "1.8.0",
"resolved": "https://registry.npmjs.org/wasm-feature-detect/-/wasm-feature-detect-1.8.0.tgz",
"integrity": "sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==",
"license": "Apache-2.0"
},
"node_modules/wcwidth": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/wcwidth/-/wcwidth-1.0.1.tgz",
@ -7244,6 +7643,22 @@
"integrity": "sha512-ARrjNjtWRRs2w4Tk7nqrf2gBI0QXWuOmMCx2hU+1jUt6d00MjMxURrhxhGbrsoiZKJrhTSTzbIrc554iKI10qw==",
"license": "Apache-2.0"
},
"node_modules/webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
"license": "BSD-2-Clause"
},
"node_modules/whatwg-url": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
"license": "MIT",
"dependencies": {
"tr46": "~0.0.3",
"webidl-conversions": "^3.0.0"
}
},
"node_modules/which": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/which/-/which-5.0.0.tgz",
@ -7399,6 +7814,15 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/zlibjs": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/zlibjs/-/zlibjs-0.3.1.tgz",
"integrity": "sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==",
"license": "MIT",
"engines": {
"node": "*"
}
},
"node_modules/zod": {
"version": "3.25.76",
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",

View file

@ -26,10 +26,10 @@
"automation"
],
"scripts": {
"start": "electron .",
"start": "electron . --no-sandbox",
"dist": "electron-builder",
"postinstall": "electron-builder install-app-deps",
"dev": "rm -rf app_dist && tsc && electron .",
"dev": "rm -rf app_dist && tsc && electron . --no-sandbox",
"setup:env": "npm install && cd servers/fastapi && uv sync && cd ../../servers/nextjs && npm install && cd ../.. && npm run setup:export-runtime",
"install:pyinstaller": "cd servers/fastapi && echo 'pyinstaller already in dependencies'",
"build:ts": "rm -rf app_dist && tsc",
@ -51,6 +51,7 @@
"email": "suraj@presenton.ai"
},
"dependencies": {
"@llamaindex/liteparse": "^1.4.0",
"@puppeteer/browsers": "^1.9.1",
"@tailwindcss/cli": "^4.1.5",
"@types/uuid": "^10.0.0",

View file

@ -0,0 +1,147 @@
#!/usr/bin/env node
/**
* CLI bridge for Python: one JSON line on stdout for LiteParse extraction.
*
* OCR follows LlamaIndex LiteParse guidance (built-in Tesseract by default):
* https://developers.llamaindex.ai/liteparse/guides/ocr/
*
* - ISO 639-3 for Tesseract (eng, fra, deu, jpn, ); multi-lang as "deu+eng" or "deu,eng".
* - Parallel workers CPU cores 1 (override --num-workers).
* - Optional HTTP OCR: --ocr-server-url or LITEPARSE_OCR_SERVER_URL.
* - Optional local models: --tessdata-path or LITEPARSE_TESSDATA_PATH (else TESSDATA_PREFIX / CDN).
*/
import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import { LiteParse } from "@llamaindex/liteparse";
function readArg(name) {
const idx = process.argv.indexOf(name);
if (idx === -1) return null;
return process.argv[idx + 1] ?? null;
}
function parseBool(value, fallback) {
if (value == null || value === "") return fallback;
const s = String(value).trim().toLowerCase();
if (["1", "true", "yes", "on"].includes(s)) return true;
if (["0", "false", "no", "off"].includes(s)) return false;
return fallback;
}
function toNumber(value, fallback, min, max) {
if (value == null || value === "") return fallback;
const parsed = Number(value);
if (Number.isNaN(parsed)) return fallback;
return Math.min(Math.max(parsed, min), max);
}
/** Tesseract accepts "deu+eng"; allow comma-separated CLI/env for convenience. */
function normalizeOcrLanguage(raw) {
const s = String(raw ?? "").trim();
if (!s) return "eng";
if (s.includes(",")) {
return s
.split(",")
.map((p) => p.trim())
.filter(Boolean)
.join("+");
}
return s;
}
function emit(result, exitCode = 0) {
process.stdout.write(`${JSON.stringify(result)}\n`);
process.exit(exitCode);
}
const filePath = readArg("--file");
if (!filePath) {
emit({ ok: false, error: "Missing required --file argument" }, 2);
}
const resolvedPath = path.resolve(filePath);
if (!fs.existsSync(resolvedPath)) {
emit({ ok: false, error: `File not found: ${resolvedPath}` }, 2);
}
const ocrEnabled = parseBool(readArg("--ocr-enabled"), true);
const dpi = toNumber(readArg("--dpi"), 150, 72, 600);
const numWorkers = toNumber(
readArg("--num-workers"),
Math.max(os.cpus().length - 2, 1),
1,
64
);
const cliOcrLanguage = readArg("--ocr-language");
const ocrLanguageRaw =
(process.env.LITEPARSE_OCR_LANGUAGE && String(process.env.LITEPARSE_OCR_LANGUAGE).trim()) ||
(cliOcrLanguage && String(cliOcrLanguage).trim()) ||
"";
const ocrLanguage = normalizeOcrLanguage(ocrLanguageRaw || "eng");
const outputFormatRaw = (readArg("--output-format") || "text").trim().toLowerCase();
const outputFormat = outputFormatRaw === "json" ? "json" : "text";
const ocrServerUrlArg = readArg("--ocr-server-url");
const ocrServerUrl =
(ocrServerUrlArg && String(ocrServerUrlArg).trim()) ||
(process.env.LITEPARSE_OCR_SERVER_URL && String(process.env.LITEPARSE_OCR_SERVER_URL).trim()) ||
undefined;
const tessdataArg = readArg("--tessdata-path");
const tessdataPath =
(tessdataArg && String(tessdataArg).trim()) ||
(process.env.LITEPARSE_TESSDATA_PATH && String(process.env.LITEPARSE_TESSDATA_PATH).trim()) ||
(process.env.TESSDATA_PREFIX && String(process.env.TESSDATA_PREFIX).trim()) ||
undefined;
try {
const config = {
ocrEnabled,
ocrLanguage,
outputFormat,
dpi,
numWorkers,
};
if (ocrServerUrl) {
config.ocrServerUrl = ocrServerUrl;
}
if (tessdataPath) {
config.tessdataPath = tessdataPath;
}
const parser = new LiteParse(config);
const result = await parser.parse(resolvedPath, true);
const text = result?.text ?? "";
emit({
ok: true,
filePath: resolvedPath,
text,
pageCount: Array.isArray(result?.pages) ? result.pages.length : 0,
ocr: {
engine: ocrServerUrl ? "http" : "tesseract",
ocrLanguage,
ocrEnabled,
dpi,
numWorkers,
},
});
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
const stack = error instanceof Error ? error.stack : undefined;
if (stack) {
process.stderr.write(`${stack}\n`);
}
emit(
{
ok: false,
filePath: resolvedPath,
error: message,
},
1
);
}

View file

@ -12,6 +12,7 @@ window.addEventListener("DOMContentLoaded", () => {
const labelMap = {
libreoffice: "LibreOffice",
puppeteer: "Chromium",
imagemagick: "ImageMagick",
};
const dependenciesEl = document.getElementById("status-dependencies");
@ -24,6 +25,7 @@ window.addEventListener("DOMContentLoaded", () => {
const currentStatus = {
libreoffice: "checking",
puppeteer: "checking",
imagemagick: "checking",
};
function setStatus(name, status) {
@ -83,6 +85,7 @@ window.addEventListener("DOMContentLoaded", () => {
if (!statusMap) return;
if (statusMap.libreoffice) setStatus("libreoffice", statusMap.libreoffice);
if (statusMap.puppeteer) setStatus("puppeteer", statusMap.puppeteer);
if (statusMap.imagemagick) setStatus("imagemagick", statusMap.imagemagick);
});
}
});

View file

@ -141,7 +141,7 @@
<div id="state-prompt" class="state active">
<div class="icon-wrap purple">📦</div>
<p class="heading" id="prompt-heading">Dependencies required</p>
<p class="sub" id="prompt-sub">Presenton needs LibreOffice and Chrome to create and export presentations. Install them now so everything works.</p>
<p class="sub" id="prompt-sub">Presenton needs LibreOffice, Chrome, and ImageMagick to create and export presentations reliably. Install them now so everything works.</p>
<div class="btn-row">
<button class="btn-primary" id="btn-install">Install</button>
<button class="btn-ghost" id="btn-skip">Skip for now</button>
@ -212,8 +212,9 @@
<script>
const STATES = ['prompt','downloading','installing','success','error'];
let logLines = 0;
let currentStep = null; // 'libreoffice' | 'chrome'
let status = { needsLibreOffice: false, needsChrome: false };
let currentStep = null; // 'libreoffice' | 'chrome' | 'imagemagick'
let status = { needsLibreOffice: false, needsChrome: false, needsImageMagick: false };
let steps = [];
let logOpen = false;
function showState(name) {
@ -264,15 +265,30 @@
document.getElementById('log-toggle-label').textContent = logOpen ? 'Hide details' : 'Show details';
}
function getStepsFromStatus() {
const queue = [];
if (status.needsLibreOffice) queue.push('libreoffice');
if (status.needsChrome) queue.push('chrome');
if (status.needsImageMagick) queue.push('imagemagick');
return queue;
}
function showPromptForStep(step) {
currentStep = step;
const total = (status.needsLibreOffice ? 1 : 0) + (status.needsChrome ? 1 : 0);
const stepNum = step === 'libreoffice' ? 1 : 2;
setStepBadge(stepNum, total, step === 'libreoffice' ? 'LibreOffice' : 'Chromium');
document.getElementById('prompt-heading').textContent = step === 'libreoffice' ? 'LibreOffice required' : 'Chromium required';
document.getElementById('prompt-sub').innerHTML = step === 'libreoffice'
? '<strong>Presenton</strong> uses LibreOffice to generate custom templates from PPTX files.'
: '<strong>Presenton</strong> uses Chromium for export and slide rendering. Download it now (~150 MB).';
const total = steps.length || 1;
const stepNum = Math.max(1, steps.indexOf(step) + 1);
const stepLabel = step === 'libreoffice' ? 'LibreOffice' : step === 'chrome' ? 'Chromium' : 'ImageMagick';
setStepBadge(stepNum, total, stepLabel);
document.getElementById('prompt-heading').textContent =
step === 'libreoffice' ? 'LibreOffice required' :
step === 'chrome' ? 'Chromium required' :
'ImageMagick required';
document.getElementById('prompt-sub').innerHTML =
step === 'libreoffice'
? '<strong>Presenton</strong> uses LibreOffice to generate custom templates from PPTX files.'
: step === 'chrome'
? '<strong>Presenton</strong> uses Chromium for export and slide rendering. Download it now (~150 MB).'
: '<strong>Presenton</strong> uses ImageMagick for OCR/document conversion support. Linux uses apt, macOS installs Homebrew first (if needed) and then runs brew install imagemagick, and Windows uses Chocolatey with a direct installer fallback.';
document.getElementById('btn-install').onclick = () => startInstall(step);
document.getElementById('btn-skip').onclick = () => handleSkip();
showState('prompt');
@ -286,7 +302,7 @@
document.getElementById('dl-heading').textContent = 'Downloading LibreOffice';
document.getElementById('dl-phase').textContent = 'This may take a few minutes (~300 MB)';
window.setupInstaller.installLibreOffice();
} else {
} else if (step === 'chrome') {
document.getElementById('dl-heading').textContent = 'Downloading Chromium';
document.getElementById('dl-phase').textContent = 'This may take a few minutes (~150 MB)';
window.setupInstaller.installChrome().then(res => {
@ -297,12 +313,36 @@
document.getElementById('btn-skip-error').onclick = () => nextOrDone();
}
});
} else {
document.getElementById('dl-heading').textContent = 'Installing ImageMagick';
document.getElementById('dl-phase').textContent = 'Linux: apt-get | macOS: Homebrew + brew install | Windows: choco or direct installer';
window.setupInstaller.installImageMagick().then((installResult) => {
if (!installResult || !installResult.ok) {
if (currentStep !== 'imagemagick') return;
document.getElementById('err-msg').textContent = installResult?.error || 'ImageMagick installation needs manual completion. Follow the shown commands and then click Retry.';
showState('error');
document.getElementById('btn-retry').onclick = () => startInstall('imagemagick');
document.getElementById('btn-skip-error').onclick = () => nextOrDone();
return;
}
window.setupInstaller.checkImageMagick().then(res => {
if (!res.ok && currentStep === 'imagemagick') {
document.getElementById('err-msg').textContent = res.error || 'ImageMagick is not installed yet.';
showState('error');
document.getElementById('btn-retry').onclick = () => startInstall('imagemagick');
document.getElementById('btn-skip-error').onclick = () => nextOrDone();
}
});
});
}
}
function nextOrDone() {
if (currentStep === 'libreoffice' && status.needsChrome) {
showPromptForStep('chrome');
const idx = steps.indexOf(currentStep);
const nextStep = idx >= 0 ? steps[idx + 1] : null;
if (nextStep) {
showPromptForStep(nextStep);
} else {
window.setupInstaller.done();
}
@ -338,13 +378,17 @@
}
if (phase === 'done') {
showState('success');
document.getElementById('success-heading').textContent = currentStep === 'libreoffice' ? 'LibreOffice installed' : 'Chromium installed';
document.getElementById('success-sub').textContent = status.needsChrome && currentStep === 'libreoffice' ? 'Next: Chrome.' : 'Continuing in a moment…';
document.getElementById('success-heading').textContent =
currentStep === 'libreoffice' ? 'LibreOffice installed' :
currentStep === 'chrome' ? 'Chromium installed' :
'ImageMagick ready';
const idx = steps.indexOf(currentStep);
const nextStep = idx >= 0 ? steps[idx + 1] : null;
document.getElementById('success-sub').textContent = nextStep ? 'Continuing with next step…' : 'Continuing in a moment…';
const bar = document.getElementById('success-bar');
if (bar) bar.style.width = '100%';
setTimeout(() => {
if (currentStep === 'libreoffice' && status.needsChrome) showPromptForStep('chrome');
else window.setupInstaller.done();
nextOrDone();
}, 2200);
return;
}
@ -366,18 +410,20 @@
window.setupInstaller.onLibreOfficeLog((data) => onLog('libreoffice', data));
window.setupInstaller.onChromeProgress((data) => onProgress('chrome', data));
window.setupInstaller.onChromeLog((data) => onLog('chrome', data));
window.setupInstaller.onImageMagickProgress((data) => onProgress('imagemagick', data));
window.setupInstaller.onImageMagickLog((data) => onLog('imagemagick', data));
document.getElementById('btn-retry').onclick = () => startInstall(currentStep);
document.getElementById('btn-skip-error').onclick = () => nextOrDone();
window.setupInstaller.getStatus().then(s => {
status = s;
if (!status.needsLibreOffice && !status.needsChrome) {
steps = getStepsFromStatus();
if (steps.length === 0) {
window.setupInstaller.done();
return;
}
if (status.needsLibreOffice) showPromptForStep('libreoffice');
else showPromptForStep('chrome');
showPromptForStep(steps[0]);
});
</script>
</body>

View file

@ -4,6 +4,7 @@ from typing import Annotated, List, Optional
from fastapi import APIRouter, Body, File, UploadFile
from constants.documents import UPLOAD_ACCEPTED_FILE_TYPES
from models.decompose_files_body import DecomposeFilesBody
from models.decomposed_file_info import DecomposedFileInfo
from services.temp_file_service import TEMP_FILE_SERVICE
from services.documents_loader import DocumentsLoader
@ -38,18 +39,21 @@ async def upload_files(files: Optional[List[UploadFile]]):
@FILES_ROUTER.post("/decompose", response_model=List[DecomposedFileInfo])
async def decompose_files(file_paths: Annotated[List[str], Body(embed=True)]):
async def decompose_files(body: DecomposeFilesBody):
temp_dir = TEMP_FILE_SERVICE.create_temp_dir(str(uuid.uuid4()))
txt_files = []
other_files = []
for file_path in file_paths:
for file_path in body.file_paths:
if file_path.endswith(".txt"):
txt_files.append(file_path)
else:
other_files.append(file_path)
documents_loader = DocumentsLoader(file_paths=other_files)
documents_loader = DocumentsLoader(
file_paths=other_files,
presentation_language=body.language,
)
await documents_loader.load_documents(temp_dir)
parsed_documents = documents_loader.documents

View file

@ -43,7 +43,10 @@ async def stream_outlines(
additional_context = ""
if presentation.file_paths:
documents_loader = DocumentsLoader(file_paths=presentation.file_paths)
documents_loader = DocumentsLoader(
file_paths=presentation.file_paths,
presentation_language=presentation.language,
)
await documents_loader.load_documents(temp_dir)
documents = documents_loader.documents
if documents:

View file

@ -15,7 +15,7 @@ import re
from services.documents_loader import DocumentsLoader
from utils.asset_directory_utils import get_images_directory
import uuid
from constants.documents import POWERPOINT_TYPES
from constants.documents import PPTX_MIME_TYPES
def _get_soffice_binary() -> str:
@ -330,7 +330,7 @@ async def process_pptx_slides(
"""
# Validate PPTX file
if pptx_file.content_type not in POWERPOINT_TYPES:
if pptx_file.content_type not in PPTX_MIME_TYPES:
raise HTTPException(
status_code=400,
detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}",
@ -441,7 +441,7 @@ async def process_pptx_fonts(
Uses the exact same font extraction and analysis utilities as the /pptx-slides endpoint.
"""
# Validate PPTX file
if pptx_file.content_type not in POWERPOINT_TYPES:
if pptx_file.content_type not in PPTX_MIME_TYPES:
raise HTTPException(
status_code=400,
detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}",

View file

@ -518,7 +518,10 @@ async def generate_presentation_handler(
await sql_session.commit()
if request.files:
documents_loader = DocumentsLoader(file_paths=request.files)
documents_loader = DocumentsLoader(
file_paths=request.files,
presentation_language=request.language,
)
await documents_loader.load_documents()
documents = documents_loader.documents
if documents:

View file

@ -1,20 +1,90 @@
PDF_EXTENSIONS = [".pdf"]
TEXT_EXTENSIONS = [".txt"]
WORD_EXTENSIONS = [".doc", ".docx", ".docm", ".odt", ".rtf"]
POWERPOINT_EXTENSIONS = [".ppt", ".pptx", ".pptm", ".odp"]
SPREADSHEET_EXTENSIONS = [".xls", ".xlsx", ".xlsm", ".ods", ".csv", ".tsv"]
JPEG_EXTENSIONS = [".jpg", ".jpeg"]
PNG_EXTENSIONS = [".png"]
GIF_EXTENSIONS = [".gif"]
BMP_EXTENSIONS = [".bmp"]
TIFF_EXTENSIONS = [".tiff", ".tif"]
WEBP_EXTENSIONS = [".webp"]
SVG_EXTENSIONS = [".svg"]
IMAGE_EXTENSIONS = (
JPEG_EXTENSIONS
+ PNG_EXTENSIONS
+ GIF_EXTENSIONS
+ BMP_EXTENSIONS
+ TIFF_EXTENSIONS
+ WEBP_EXTENSIONS
+ SVG_EXTENSIONS
)
OFFICE_EXTENSIONS = WORD_EXTENSIONS + POWERPOINT_EXTENSIONS + SPREADSHEET_EXTENSIONS
PDF_MIME_TYPES = ["application/pdf"]
TEXT_MIME_TYPES = ["text/plain"]
POWERPOINT_TYPES = [
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
]
WORD_TYPES = [
TEXT_MIME_TYPES = ["text/plain", "text/markdown"]
WORD_MIME_TYPES = [
"application/msword",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.ms-word.document.macroenabled.12",
"application/vnd.oasis.opendocument.text",
"application/rtf",
"text/rtf",
]
SPREADSHEET_TYPES = ["text/csv", "application/csv"]
POWERPOINT_MIME_TYPES = [
"application/vnd.ms-powerpoint",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/vnd.ms-powerpoint.presentation.macroenabled.12",
"application/vnd.oasis.opendocument.presentation",
]
PNG_MIME_TYPES = ["image/png"]
JPEG_MIME_TYPES = ["image/jpeg"]
WEBP_MIME_TYPES = ["image/webp"]
SPREADSHEET_MIME_TYPES = [
"application/vnd.ms-excel",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.ms-excel.sheet.macroenabled.12",
"application/vnd.oasis.opendocument.spreadsheet",
"text/csv",
"application/csv",
"text/tab-separated-values",
"text/tsv",
]
IMAGE_MIME_TYPES = [
"image/jpeg",
"image/png",
"image/gif",
"image/bmp",
"image/tiff",
"image/webp",
"image/svg+xml",
]
UPLOAD_ACCEPTED_FILE_TYPES = (
PDF_MIME_TYPES + TEXT_MIME_TYPES + POWERPOINT_TYPES + WORD_TYPES
UPLOAD_ACCEPTED_MIME_TYPES = (
PDF_MIME_TYPES
+ TEXT_MIME_TYPES
+ WORD_MIME_TYPES
+ POWERPOINT_MIME_TYPES
+ SPREADSHEET_MIME_TYPES
+ IMAGE_MIME_TYPES
)
UPLOAD_ACCEPTED_EXTENSIONS = (
PDF_EXTENSIONS + TEXT_EXTENSIONS + OFFICE_EXTENSIONS + IMAGE_EXTENSIONS
)
# Includes both MIME types and extensions because some clients upload legacy
# office files with generic content-type values.
UPLOAD_ACCEPTED_FILE_TYPES = UPLOAD_ACCEPTED_MIME_TYPES + UPLOAD_ACCEPTED_EXTENSIONS
# Kept for endpoints that strictly require modern .pptx files.
PPTX_MIME_TYPES = ["application/vnd.openxmlformats-officedocument.presentationml.presentation"]
# Backward compatibility aliases used across existing modules.
POWERPOINT_TYPES = PPTX_MIME_TYPES
WORD_TYPES = WORD_MIME_TYPES
SPREADSHEET_TYPES = SPREADSHEET_MIME_TYPES

View file

@ -0,0 +1,11 @@
from typing import List, Optional
from pydantic import BaseModel, Field
class DecomposeFilesBody(BaseModel):
file_paths: List[str]
language: Optional[str] = Field(
default=None,
description="Presentation language from the UI; used as LiteParse/Tesseract OCR language hint.",
)

View file

@ -1,47 +1,45 @@
[project]
name = "presenton-backend"
version = "0.1.0"
description = "Add your description here"
requires-python = ">=3.11,<3.12"
dependencies = [
"alembic>=1.14.0",
"aiohttp>=3.12.15",
"aiomysql>=0.2.0",
"aiosqlite>=0.21.0",
"anthropic>=0.60.0",
"asyncpg>=0.30.0",
"dirtyjson>=1.0.8",
# Platform-specific: docling for Linux/macOS only
"docling>=2.43.0; sys_platform != 'win32'",
"fastapi[standard]>=0.116.1",
"fastembed-vectorstore>=0.5.2",
"fastmcp>=2.11.0",
"google-genai>=1.28.0",
# Platform-specific: greenlet for macOS only (critical for SQLAlchemy async)
"greenlet>=3.0.0; sys_platform == 'darwin'",
"nltk>=3.9.1",
"openai>=1.98.0",
"pathvalidate>=3.3.1",
"pdfplumber>=0.11.7",
# Platform-specific: docx2everything for DOCX/Markdown extraction on Windows
"docx2everything>=1.0.0; sys_platform == 'win32'",
"pyinstaller>=6.18.0",
"pytest>=8.4.1",
"python-pptx>=1.0.2; sys_platform == 'win32'",
"redis>=6.2.0",
"sqlmodel>=0.0.24",
]
[tool.uv]
index-strategy = "unsafe-best-match"
[[tool.uv.index]]
url = "https://download.pytorch.org/whl/cpu"
[dependency-groups]
dev = [
]
[tool.setuptools.packages.find]
where = ["."]
include = ["api*", "enums*", "models*", "services*", "constants*", "utils*"]
[project]
name = "presenton-backend"
version = "0.1.0"
description = "Add your description here"
requires-python = ">=3.11,<3.12"
dependencies = [
"alembic>=1.14.0",
"aiohttp>=3.12.15",
"aiomysql>=0.2.0",
"aiosqlite>=0.21.0",
"anthropic>=0.60.0",
"asyncpg>=0.30.0",
"dirtyjson>=1.0.8",
"fastapi[standard]>=0.116.1",
"fastembed-vectorstore>=0.5.2",
"fastmcp>=2.11.0",
"google-genai>=1.28.0",
# Platform-specific: greenlet for macOS only (critical for SQLAlchemy async)
"greenlet>=3.0.0; sys_platform == 'darwin'",
"nltk>=3.9.1",
"openai>=1.98.0",
"pathvalidate>=3.3.1",
"pdfplumber>=0.11.7",
# Platform-specific: docx2everything for DOCX/Markdown extraction on Windows
"docx2everything>=1.0.0; sys_platform == 'win32'",
"pyinstaller>=6.18.0",
"pytest>=8.4.1",
"python-pptx>=1.0.2",
"redis>=6.2.0",
"sqlmodel>=0.0.24",
]
[tool.uv]
index-strategy = "unsafe-best-match"
[[tool.uv.index]]
url = "https://download.pytorch.org/whl/cpu"
[dependency-groups]
dev = [
]
[tool.setuptools.packages.find]
where = ["."]
include = ["api*", "enums*", "models*", "services*", "constants*", "utils*"]

View file

@ -1,78 +0,0 @@
"""
Runtime hook to fix docling metadata lookup and python-docx template path resolution in PyInstaller builds.
PyInstaller doesn't always preserve package metadata (dist-info) in a way that
importlib.metadata can find it. This hook patches the version lookup to return
a default version if metadata isn't found, allowing docling to import successfully.
Additionally, python-docx uses __file__ to locate template files, which doesn't work
correctly in PyInstaller bundles. This hook patches the path resolution to use
sys._MEIPASS to find the templates.
"""
import sys
import os
# Only apply this fix when running in PyInstaller bundle
if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
try:
import importlib.metadata
# Store original version function
_original_version = importlib.metadata.version
def _patched_version(package_name):
"""Patched version that handles missing metadata gracefully."""
try:
return _original_version(package_name)
except importlib.metadata.PackageNotFoundError:
# For docling packages, return a default version if metadata not found
if package_name in ('docling', 'docling-core', 'docling-parse', 'docling-ibm-models'):
# Return a reasonable default version to allow import to proceed
return '2.43.0'
raise
# Patch the version function
importlib.metadata.version = _patched_version
except Exception:
# If patching fails, continue anyway
pass
# Fix python-docx template path resolution
try:
import docx.parts.hdrftr as hdrftr_module
# Store the original _default_header_xml function
if hasattr(hdrftr_module, '_default_header_xml'):
_original_default_header_xml = hdrftr_module._default_header_xml
def _patched_default_header_xml():
"""Patched function that resolves template path correctly in PyInstaller bundle."""
# Try to find the template file in the bundle
template_path = os.path.join(sys._MEIPASS, 'docx', 'templates', 'default-header.xml')
if os.path.exists(template_path):
with open(template_path, 'rb') as f:
return f.read()
# Fallback to original implementation
return _original_default_header_xml()
# Patch the function
hdrftr_module._default_header_xml = _patched_default_header_xml
# Also patch _default_footer_xml if it exists
if hasattr(hdrftr_module, '_default_footer_xml'):
_original_default_footer_xml = hdrftr_module._default_footer_xml
def _patched_default_footer_xml():
"""Patched function that resolves template path correctly in PyInstaller bundle."""
template_path = os.path.join(sys._MEIPASS, 'docx', 'templates', 'default-footer.xml')
if os.path.exists(template_path):
with open(template_path, 'rb') as f:
return f.read()
return _original_default_footer_xml()
hdrftr_module._default_footer_xml = _patched_default_footer_xml
except Exception:
# If patching fails, continue anyway
pass

View file

@ -17,18 +17,6 @@ datas_docx2everything, binaries_docx2everything, hiddenimports_docx2everything =
# collect_all returns empty lists if package not installed, so safe to call always
datas_greenlet, binaries_greenlet, hiddenimports_greenlet = collect_all('greenlet')
# Collect docling - only installed on Linux/macOS (via pyproject.toml)
# collect_all returns empty lists if package not installed, so safe to call always
datas_docling, binaries_docling, hiddenimports_docling = collect_all('docling')
# Also collect docling dependencies which are needed for metadata lookup
datas_docling_core, binaries_docling_core, hiddenimports_docling_core = collect_all('docling-core')
datas_docling_parse, binaries_docling_parse, hiddenimports_docling_parse = collect_all('docling-parse')
datas_docling_ibm, binaries_docling_ibm, hiddenimports_docling_ibm = collect_all('docling-ibm-models')
# Collect python-docx (dependency of docling) - needed for Word document processing on Linux/macOS
# collect_all returns empty lists if package not installed, so safe to call conditionally
datas_docx, binaries_docx, hiddenimports_docx = collect_all('docx')
# fastembed_cache is created at runtime when models are first used; include only if present (e.g. local dev)
datas_fastembed_cache = [('fastembed_cache', 'fastembed_cache')] if os.path.isdir('fastembed_cache') else []
@ -37,12 +25,12 @@ excludes = []
a = Analysis(
['server.py'],
pathex=[],
binaries=binaries_fastembed + binaries_fastembed_vs + binaries_onnx + binaries_pptx + binaries_docx2everything + binaries_greenlet + binaries_docling + binaries_docling_core + binaries_docling_parse + binaries_docling_ibm + binaries_docx,
binaries=binaries_fastembed + binaries_fastembed_vs + binaries_onnx + binaries_pptx + binaries_docx2everything + binaries_greenlet,
datas=[
('assets', 'assets'),
('static', 'static'),
('alembic', 'alembic'),
] + datas_fastembed_cache + datas_fastembed + datas_fastembed_vs + datas_onnx + datas_pptx + datas_docx2everything + datas_greenlet + datas_docling + datas_docling_core + datas_docling_parse + datas_docling_ibm + datas_docx,
] + datas_fastembed_cache + datas_fastembed + datas_fastembed_vs + datas_onnx + datas_pptx + datas_docx2everything + datas_greenlet,
hiddenimports=[
'aiosqlite',
'alembic',
@ -52,10 +40,10 @@ a = Analysis(
'greenlet',
'greenlet._greenlet',
'importlib.metadata',
] + hiddenimports_fastembed + hiddenimports_fastembed_vs + hiddenimports_onnx + hiddenimports_pptx + hiddenimports_docx2everything + hiddenimports_greenlet + hiddenimports_docling + hiddenimports_docling_core + hiddenimports_docling_parse + hiddenimports_docling_ibm + hiddenimports_docx,
] + hiddenimports_fastembed + hiddenimports_fastembed_vs + hiddenimports_onnx + hiddenimports_pptx + hiddenimports_docx2everything + hiddenimports_greenlet,
hookspath=[],
hooksconfig={},
runtime_hooks=['runtime_hook_docling.py'],
runtime_hooks=[],
excludes=excludes,
noarchive=False,
optimize=0,

View file

@ -1,38 +0,0 @@
from docling.document_converter import (
DocumentConverter,
PdfFormatOption,
PowerpointFormatOption,
WordFormatOption,
)
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.datamodel.base_models import InputFormat
from utils.path_helpers import patch_python_docx_templates
class DoclingService:
def __init__(self):
# Patch python-docx template path resolution before initializing converter
# This is safe to call in any environment (Docker, development, PyInstaller)
patch_python_docx_templates()
self.pipeline_options = PdfPipelineOptions()
self.pipeline_options.do_ocr = False
self.converter = DocumentConverter(
allowed_formats=[InputFormat.PPTX, InputFormat.PDF, InputFormat.DOCX],
format_options={
InputFormat.DOCX: WordFormatOption(
pipeline_options=self.pipeline_options,
),
InputFormat.PPTX: PowerpointFormatOption(
pipeline_options=self.pipeline_options,
),
InputFormat.PDF: PdfFormatOption(
pipeline_options=self.pipeline_options,
),
},
)
def parse_to_markdown(self, file_path: str) -> str:
result = self.converter.convert(file_path)
return result.document.export_to_markdown()

View file

@ -0,0 +1,165 @@
import os
import subprocess
from pathlib import Path
from typing import Dict, List
class DocumentConversionError(Exception):
pass
def _windows_hidden_subprocess_kwargs() -> Dict[str, object]:
if os.name != "nt":
return {}
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
return {
"creationflags": getattr(subprocess, "CREATE_NO_WINDOW", 0),
"startupinfo": startupinfo,
}
class DocumentConversionService:
def __init__(self):
self.soffice_binary = self._resolve_soffice_binary()
self.imagemagick_binary = self._resolve_imagemagick_binary()
@staticmethod
def _resolve_soffice_binary() -> str:
configured = (os.getenv("SOFFICE_PATH") or "").strip()
if configured:
return configured
return "soffice.exe" if os.name == "nt" else "soffice"
@staticmethod
def _can_execute(command: str, args: List[str]) -> bool:
try:
result = subprocess.run(
[command, *args],
capture_output=True,
text=True,
timeout=10,
check=False,
**_windows_hidden_subprocess_kwargs(),
)
return result.returncode == 0
except Exception:
return False
def _resolve_imagemagick_binary(self) -> str:
configured = (os.getenv("IMAGEMAGICK_BINARY") or "").strip()
if configured:
return configured
for candidate in ["magick", "convert"]:
if self._can_execute(candidate, ["-version"]):
return candidate
return "magick" if os.name == "nt" else "convert"
def convert_office_to_pdf(
self,
file_path: str,
output_dir: str,
timeout_seconds: int = 180,
) -> str:
Path(output_dir).mkdir(parents=True, exist_ok=True)
existing_pdfs = {
p.name for p in Path(output_dir).glob("*.pdf") if p.is_file()
}
try:
subprocess.run(
[
self.soffice_binary,
"--headless",
"--convert-to",
"pdf",
"--outdir",
output_dir,
file_path,
],
check=True,
capture_output=True,
text=True,
timeout=timeout_seconds,
**_windows_hidden_subprocess_kwargs(),
)
except subprocess.TimeoutExpired as exc:
raise DocumentConversionError(
f"LibreOffice conversion timed out for {os.path.basename(file_path)}"
) from exc
except subprocess.CalledProcessError as exc:
stderr = (exc.stderr or "").strip()
stdout = (exc.stdout or "").strip()
details = stderr or stdout or str(exc)
raise DocumentConversionError(
f"LibreOffice conversion failed for {os.path.basename(file_path)}: {details}"
) from exc
except Exception as exc:
raise DocumentConversionError(
f"LibreOffice conversion failed for {os.path.basename(file_path)}: {exc}"
) from exc
expected_pdf = Path(output_dir) / f"{Path(file_path).stem}.pdf"
if expected_pdf.is_file():
return str(expected_pdf)
generated_pdfs = [
p
for p in Path(output_dir).glob("*.pdf")
if p.is_file() and p.name not in existing_pdfs
]
if generated_pdfs:
newest = max(generated_pdfs, key=lambda p: p.stat().st_mtime)
return str(newest)
raise DocumentConversionError(
f"LibreOffice did not create a PDF for {os.path.basename(file_path)}"
)
def convert_image_to_png(
self,
file_path: str,
output_dir: str,
timeout_seconds: int = 120,
) -> str:
Path(output_dir).mkdir(parents=True, exist_ok=True)
output_path = Path(output_dir) / f"{Path(file_path).stem}_converted.png"
command = [self.imagemagick_binary, file_path, str(output_path)]
try:
subprocess.run(
command,
check=True,
capture_output=True,
text=True,
timeout=timeout_seconds,
**_windows_hidden_subprocess_kwargs(),
)
except subprocess.TimeoutExpired as exc:
raise DocumentConversionError(
f"ImageMagick conversion timed out for {os.path.basename(file_path)}"
) from exc
except subprocess.CalledProcessError as exc:
stderr = (exc.stderr or "").strip()
stdout = (exc.stdout or "").strip()
details = stderr or stdout or str(exc)
raise DocumentConversionError(
f"ImageMagick conversion failed for {os.path.basename(file_path)}: {details}"
) from exc
except Exception as exc:
raise DocumentConversionError(
f"ImageMagick conversion failed for {os.path.basename(file_path)}: {exc}"
) from exc
if not output_path.is_file():
raise DocumentConversionError(
f"ImageMagick did not create a PNG for {os.path.basename(file_path)}"
)
return str(output_path)

View file

@ -1,45 +1,44 @@
import mimetypes
import sys
from fastapi import HTTPException
import os, asyncio
import asyncio
import os
import tempfile
from pathlib import Path
from typing import List, Optional, Tuple
import pdfplumber
from constants.documents import (
PDF_MIME_TYPES,
POWERPOINT_TYPES,
TEXT_MIME_TYPES,
WORD_TYPES,
)
from fastapi import HTTPException
# Platform-specific document service imports
is_windows = sys.platform == 'win32'
if not is_windows:
from services.docling_service import DoclingService
DocumentService = None
else:
DoclingService = None
from constants.documents import (
IMAGE_EXTENSIONS,
OFFICE_EXTENSIONS,
PDF_EXTENSIONS,
TEXT_EXTENSIONS,
)
from services.document_conversion_service import (
DocumentConversionError,
DocumentConversionService,
)
from services.liteparse_service import LiteParseError, LiteParseService
from utils.ocr_language import presentation_language_to_ocr_code
# Optional fallback converter (primarily useful on Windows)
try:
from services.lightweight_document_service import DocumentService
except Exception:
DocumentService = None
class DocumentsLoader:
def __init__(self, file_paths: List[str]):
def __init__(
self,
file_paths: List[str],
presentation_language: Optional[str] = None,
):
self._file_paths = file_paths
# Initialize document service based on platform
if not is_windows and DoclingService is not None:
# Use DoclingService on Linux/macOS
self.docling_service = DoclingService()
self.document_service = None
elif is_windows and DocumentService is not None:
# Use lightweight DocumentService on Windows
self.docling_service = None
self.document_service = DocumentService()
else:
# Fallback if neither is available
self.docling_service = None
self.document_service = None
self._ocr_language = presentation_language_to_ocr_code(presentation_language)
self.liteparse_service = LiteParseService()
self.document_conversion_service = DocumentConversionService()
self.document_service = DocumentService() if DocumentService is not None else None
self._documents: List[str] = []
self._images: List[List[str]] = []
@ -61,7 +60,7 @@ class DocumentsLoader:
"""If load_images is True, temp_dir must be provided"""
documents: List[str] = []
images: List[str] = []
images: List[List[str]] = []
for file_path in self._file_paths:
if not os.path.exists(file_path):
@ -72,17 +71,28 @@ class DocumentsLoader:
document = ""
imgs = []
mime_type = mimetypes.guess_type(file_path)[0]
if mime_type in PDF_MIME_TYPES:
extension = Path(file_path).suffix.lower()
if extension in PDF_EXTENSIONS:
document, imgs = await self.load_pdf(
file_path, load_text, load_images, temp_dir
)
elif mime_type in TEXT_MIME_TYPES:
elif extension in TEXT_EXTENSIONS:
document = await self.load_text(file_path)
elif mime_type in POWERPOINT_TYPES:
document = self.load_powerpoint(file_path)
elif mime_type in WORD_TYPES:
document = self.load_msword(file_path)
elif extension in OFFICE_EXTENSIONS:
document = await asyncio.to_thread(
self.load_office_document,
file_path,
temp_dir,
)
elif extension in IMAGE_EXTENSIONS:
document = await asyncio.to_thread(
self.load_image,
file_path,
temp_dir,
)
else:
document = await asyncio.to_thread(self._parse_with_liteparse, file_path)
documents.append(document)
images.append(imgs)
@ -101,43 +111,64 @@ class DocumentsLoader:
document: str = ""
if load_text:
document = await self.load_text_from_pdf_locally(file_path)
document = await asyncio.to_thread(self._parse_with_liteparse, file_path)
if load_images:
image_paths = await self.get_page_images_from_pdf_async(file_path, temp_dir)
return document, image_paths
async def load_text_from_pdf_locally(self, file_path: str) -> str:
return await asyncio.to_thread(self._extract_text_from_pdf, file_path)
@staticmethod
def _extract_text_from_pdf(file_path: str) -> str:
texts: List[str] = []
with pdfplumber.open(file_path) as pdf:
for idx, page in enumerate(pdf.pages):
page_text = f"## Page {idx + 1}\n"
page_text += page.extract_text() or ""
texts.append(page_text)
return "\n\n".join(texts)
async def load_text(self, file_path: str) -> str:
with open(file_path, "r", encoding="utf-8") as file:
return await asyncio.to_thread(file.read)
def load_msword(self, file_path: str) -> str:
if self.docling_service is not None:
return self.docling_service.parse_to_markdown(file_path)
elif self.document_service is not None:
return self.document_service.parse_to_markdown(file_path)
return "" # Document service not available
def load_office_document(self, file_path: str, temp_dir: Optional[str] = None) -> str:
if temp_dir:
converted_path = self.document_conversion_service.convert_office_to_pdf(
file_path,
temp_dir,
)
return self._parse_with_liteparse(converted_path)
def load_powerpoint(self, file_path: str) -> str:
if self.docling_service is not None:
return self.docling_service.parse_to_markdown(file_path)
elif self.document_service is not None:
return self.document_service.parse_to_markdown(file_path)
return "" # Document service not available
with tempfile.TemporaryDirectory(prefix="office-convert-") as conversion_dir:
converted_path = self.document_conversion_service.convert_office_to_pdf(
file_path,
conversion_dir,
)
return self._parse_with_liteparse(converted_path)
def load_image(self, file_path: str, temp_dir: Optional[str] = None) -> str:
if temp_dir:
converted_path = self.document_conversion_service.convert_image_to_png(
file_path,
temp_dir,
)
return self._parse_with_liteparse(converted_path)
with tempfile.TemporaryDirectory(prefix="image-convert-") as conversion_dir:
converted_path = self.document_conversion_service.convert_image_to_png(
file_path,
conversion_dir,
)
return self._parse_with_liteparse(converted_path)
def _parse_with_liteparse(self, file_path: str) -> str:
try:
return self.liteparse_service.parse_to_markdown(
file_path,
ocr_enabled=True,
ocr_language=self._ocr_language,
)
except (LiteParseError, DocumentConversionError) as exc:
if self.document_service is not None:
try:
return self.document_service.parse_to_markdown(file_path)
except Exception:
pass
raise HTTPException(
status_code=500,
detail=f"Failed to parse document {os.path.basename(file_path)}: {exc}",
) from exc
@classmethod
def get_page_images_from_pdf(cls, file_path: str, temp_dir: str) -> List[str]:

View file

@ -1,177 +1,177 @@
"""
Lightweight document converter for Windows/MSIX compatibility.
Uses pure-Python libraries: pdfplumber for PDF, docx2txt for DOCX, python-pptx for PPTX.
No subprocess, no external runtimes, MSIX/Appx safe.
"""
import os
from typing import List, Optional
import docx2everything
import pdfplumber
from pptx import Presentation
class LightweightDocumentConverter:
"""Lightweight document converter supporting PDF, DOCX, and PPTX."""
def convert(self, file_path: str) -> str:
"""
Convert document to markdown text.
Args:
file_path: Path to the document file
Returns:
Extracted text in markdown format
Raises:
ValueError: If file format is not supported
FileNotFoundError: If file does not exist
"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
file_ext = os.path.splitext(file_path)[1].lower()
if file_ext == '.pdf':
return self._convert_pdf(file_path)
elif file_ext == '.docx':
return self._convert_docx(file_path)
elif file_ext == '.pptx':
return self._convert_pptx(file_path)
else:
raise ValueError(f"Unsupported file format: {file_ext}")
def _convert_pdf(self, path: str) -> str:
"""
Convert PDF to markdown using pdfplumber.
Args:
path: Path to PDF file
Returns:
Extracted text in markdown format
"""
texts: List[str] = []
with pdfplumber.open(path) as pdf:
for idx, page in enumerate(pdf.pages):
page_text = f"## Page {idx + 1}\n"
page_text += page.extract_text() or ""
texts.append(page_text)
return "\n\n".join(texts)
def _convert_docx(self, path: str) -> str:
"""
Extract markdown from DOCX using docx2everything (no images).
Args:
path: Path to DOCX file
Returns:
Extracted markdown (no images)
"""
# Use the correct API: process_to_markdown(path) without img_dir extracts markdown without images
markdown = docx2everything.process_to_markdown(path)
return markdown if markdown else ""
def _convert_pptx(self, path: str) -> str:
"""
Convert PPTX to markdown using python-pptx.
Args:
path: Path to PPTX file
Returns:
Extracted text in markdown format
"""
prs = Presentation(path)
markdown_parts = []
for slide_num, slide in enumerate(prs.slides, start=1):
slide_parts = []
# Extract slide title (usually first shape with title placeholder)
title_text = None
for shape in slide.shapes:
if hasattr(shape, "placeholder"):
if shape.placeholder.placeholder_format.type == 1: # Title placeholder
if hasattr(shape, "text") and shape.text.strip():
title_text = shape.text.strip()
break
# If no title placeholder found, try to find text box at top
if not title_text:
for shape in slide.shapes:
if hasattr(shape, "text") and shape.text.strip():
# Check if it's likely a title (first text shape, short text)
text = shape.text.strip()
if len(text) < 200: # Heuristic: titles are usually short
title_text = text
break
# Add slide title
if title_text:
slide_parts.append(f"# {title_text}")
else:
slide_parts.append(f"# Slide {slide_num}")
# Extract content (bullet points and text)
for shape in slide.shapes:
if not hasattr(shape, "text"):
continue
text = shape.text.strip()
if not text:
continue
# Skip if this is the title we already added
if title_text and text == title_text:
continue
# Check if it's a text frame with paragraphs (bullet points)
if hasattr(shape, "text_frame"):
paragraphs = shape.text_frame.paragraphs
if len(paragraphs) > 1:
# Multiple paragraphs - likely bullet points
for para in paragraphs:
para_text = para.text.strip()
if para_text:
# Check bullet level
level = para.level
indent = " " * level
slide_parts.append(f"{indent}- {para_text}")
else:
# Single paragraph
if text and text != title_text:
slide_parts.append(text)
else:
# Plain text shape
if text and text != title_text:
slide_parts.append(text)
if slide_parts:
markdown_parts.append("\n".join(slide_parts))
return "\n\n---\n\n".join(markdown_parts)
class DocumentService:
"""
Document service wrapper providing parse_to_markdown interface.
Compatible with DoclingService interface for easy swapping.
"""
def __init__(self):
self.converter = LightweightDocumentConverter()
def parse_to_markdown(self, file_path: str) -> str:
"""
Parse document to markdown format.
Args:
file_path: Path to the document file
Returns:
Extracted text in markdown format
"""
return self.converter.convert(file_path)
"""
Lightweight document converter for Windows/MSIX compatibility.
Uses pure-Python libraries: pdfplumber for PDF, docx2txt for DOCX, python-pptx for PPTX.
No subprocess, no external runtimes, MSIX/Appx safe.
"""
import os
from typing import List, Optional
import docx2everything
import pdfplumber
from pptx import Presentation
class LightweightDocumentConverter:
"""Lightweight document converter supporting PDF, DOCX, and PPTX."""
def convert(self, file_path: str) -> str:
"""
Convert document to markdown text.
Args:
file_path: Path to the document file
Returns:
Extracted text in markdown format
Raises:
ValueError: If file format is not supported
FileNotFoundError: If file does not exist
"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
file_ext = os.path.splitext(file_path)[1].lower()
if file_ext == '.pdf':
return self._convert_pdf(file_path)
elif file_ext == '.docx':
return self._convert_docx(file_path)
elif file_ext == '.pptx':
return self._convert_pptx(file_path)
else:
raise ValueError(f"Unsupported file format: {file_ext}")
def _convert_pdf(self, path: str) -> str:
"""
Convert PDF to markdown using pdfplumber.
Args:
path: Path to PDF file
Returns:
Extracted text in markdown format
"""
texts: List[str] = []
with pdfplumber.open(path) as pdf:
for idx, page in enumerate(pdf.pages):
page_text = f"## Page {idx + 1}\n"
page_text += page.extract_text() or ""
texts.append(page_text)
return "\n\n".join(texts)
def _convert_docx(self, path: str) -> str:
"""
Extract markdown from DOCX using docx2everything (no images).
Args:
path: Path to DOCX file
Returns:
Extracted markdown (no images)
"""
# Use the correct API: process_to_markdown(path) without img_dir extracts markdown without images
markdown = docx2everything.process_to_markdown(path)
return markdown if markdown else ""
def _convert_pptx(self, path: str) -> str:
"""
Convert PPTX to markdown using python-pptx.
Args:
path: Path to PPTX file
Returns:
Extracted text in markdown format
"""
prs = Presentation(path)
markdown_parts = []
for slide_num, slide in enumerate(prs.slides, start=1):
slide_parts = []
# Extract slide title (usually first shape with title placeholder)
title_text = None
for shape in slide.shapes:
if hasattr(shape, "placeholder"):
if shape.placeholder.placeholder_format.type == 1: # Title placeholder
if hasattr(shape, "text") and shape.text.strip():
title_text = shape.text.strip()
break
# If no title placeholder found, try to find text box at top
if not title_text:
for shape in slide.shapes:
if hasattr(shape, "text") and shape.text.strip():
# Check if it's likely a title (first text shape, short text)
text = shape.text.strip()
if len(text) < 200: # Heuristic: titles are usually short
title_text = text
break
# Add slide title
if title_text:
slide_parts.append(f"# {title_text}")
else:
slide_parts.append(f"# Slide {slide_num}")
# Extract content (bullet points and text)
for shape in slide.shapes:
if not hasattr(shape, "text"):
continue
text = shape.text.strip()
if not text:
continue
# Skip if this is the title we already added
if title_text and text == title_text:
continue
# Check if it's a text frame with paragraphs (bullet points)
if hasattr(shape, "text_frame"):
paragraphs = shape.text_frame.paragraphs
if len(paragraphs) > 1:
# Multiple paragraphs - likely bullet points
for para in paragraphs:
para_text = para.text.strip()
if para_text:
# Check bullet level
level = para.level
indent = " " * level
slide_parts.append(f"{indent}- {para_text}")
else:
# Single paragraph
if text and text != title_text:
slide_parts.append(text)
else:
# Plain text shape
if text and text != title_text:
slide_parts.append(text)
if slide_parts:
markdown_parts.append("\n".join(slide_parts))
return "\n\n---\n\n".join(markdown_parts)
class DocumentService:
"""
Document service wrapper providing parse_to_markdown interface.
Same parse_to_markdown entry point as LiteParseService for optional Windows fallback.
"""
def __init__(self):
self.converter = LightweightDocumentConverter()
def parse_to_markdown(self, file_path: str) -> str:
"""
Parse document to markdown format.
Args:
file_path: Path to the document file
Returns:
Extracted text in markdown format
"""
return self.converter.convert(file_path)

View file

@ -0,0 +1,197 @@
import json
import os
import subprocess
from typing import Any, Dict, Tuple
class LiteParseError(Exception):
pass
class LiteParseService:
def __init__(self, timeout_seconds: int = 180):
self.timeout_seconds = timeout_seconds
self.node_binary = os.getenv("LITEPARSE_NODE_BINARY", "node")
self.runner_path = os.getenv("LITEPARSE_RUNNER_PATH", self._resolve_runner_path())
self.runner_dir = os.path.dirname(self.runner_path)
self._npm_project_root = self._resolve_npm_project_root()
def _resolve_npm_project_root(self) -> str:
"""Directory whose node_modules contains @llamaindex/liteparse (runner dir or Electron app root)."""
local_nm = os.path.join(
self.runner_dir, "node_modules", "@llamaindex", "liteparse"
)
if os.path.isdir(local_nm):
return self.runner_dir
electron_nm = os.path.abspath(
os.path.join(self.runner_dir, "..", "..", "node_modules", "@llamaindex", "liteparse")
)
if os.path.isdir(electron_nm):
return os.path.abspath(os.path.join(self.runner_dir, "..", ".."))
return os.path.abspath(os.path.join(self.runner_dir, "..", ".."))
@staticmethod
def _resolve_runner_path() -> str:
cwd = os.path.abspath(".")
candidates = [
# electron/servers/fastapi → electron/resources/...
os.path.abspath(
os.path.join(
cwd, "..", "..", "resources", "document-extraction", "liteparse_runner.mjs"
)
),
# servers/fastapi (repo root layout) → electron/resources/...
os.path.abspath(
os.path.join(
cwd,
"..",
"..",
"electron",
"resources",
"document-extraction",
"liteparse_runner.mjs",
)
),
# PyInstaller bundle layout
os.path.abspath(
os.path.join(
cwd, "..", "..", "app", "resources", "document-extraction", "liteparse_runner.mjs"
)
),
# Docker / explicit layout
"/app/document-extraction-liteparse/liteparse_runner.mjs",
]
for path in candidates:
if os.path.isfile(path):
return path
return candidates[0]
def check_runtime_ready(self) -> Tuple[bool, str]:
if not os.path.isfile(self.runner_path):
return False, f"LiteParse runner not found at: {self.runner_path}"
try:
subprocess.run(
[self.node_binary, "--version"],
cwd=self.runner_dir,
check=True,
capture_output=True,
text=True,
timeout=10,
)
except Exception as exc:
return False, f"Node.js runtime is unavailable: {exc}"
liteparse_dir = os.path.join(
self._npm_project_root, "node_modules", "@llamaindex", "liteparse"
)
if not os.path.isdir(liteparse_dir):
return (
False,
f"LiteParse npm package missing at {liteparse_dir}. Run npm install in the Electron app directory.",
)
# @llamaindex/liteparse is ESM-only; require.resolve() fails. Use dynamic import.
try:
subprocess.run(
[
self.node_binary,
"--input-type=module",
"-e",
"import '@llamaindex/liteparse'",
],
cwd=self._npm_project_root,
check=True,
capture_output=True,
text=True,
timeout=20,
)
except Exception as exc:
return False, f"LiteParse dependency is unavailable: {exc}"
return True, "ok"
def parse_to_markdown(
self,
file_path: str,
ocr_enabled: bool = True,
ocr_language: str = "eng",
) -> str:
result = self.parse(
file_path=file_path,
ocr_enabled=ocr_enabled,
ocr_language=ocr_language,
)
return str(result.get("text") or "")
def parse(
self,
file_path: str,
ocr_enabled: bool = True,
ocr_language: str = "eng",
) -> Dict[str, Any]:
is_ready, reason = self.check_runtime_ready()
if not is_ready:
raise LiteParseError(reason)
command = [
self.node_binary,
self.runner_path,
"--file",
file_path,
"--ocr-enabled",
"true" if ocr_enabled else "false",
"--ocr-language",
ocr_language,
]
ocr_server = (os.getenv("LITEPARSE_OCR_SERVER_URL") or "").strip()
if ocr_server:
command.extend(["--ocr-server-url", ocr_server])
tessdata = (os.getenv("LITEPARSE_TESSDATA_PATH") or "").strip()
if tessdata:
command.extend(["--tessdata-path", tessdata])
process = subprocess.run(
command,
cwd=self._npm_project_root,
capture_output=True,
text=True,
timeout=self.timeout_seconds,
env=os.environ.copy(),
)
payload = self._decode_runner_output(process.stdout)
if process.returncode != 0:
message = payload.get("error") or process.stderr.strip() or "Unknown error"
raise LiteParseError(message)
if not payload.get("ok"):
raise LiteParseError(payload.get("error") or "LiteParse parse failed")
return payload
@staticmethod
def _decode_runner_output(stdout: str) -> Dict[str, Any]:
raw = (stdout or "").lstrip("\ufeff").strip()
if not raw:
raise LiteParseError("LiteParse runner returned empty output")
# Prefer the last line that parses as JSON (handles stray log lines before our payload).
lines = [line.strip() for line in raw.splitlines() if line.strip()]
for line in reversed(lines):
try:
parsed = json.loads(line)
if isinstance(parsed, dict):
return parsed
except json.JSONDecodeError:
continue
# Single blob without newlines (entire stdout is one JSON object).
try:
parsed = json.loads(raw)
if isinstance(parsed, dict):
return parsed
except json.JSONDecodeError:
pass
raise LiteParseError("LiteParse runner returned invalid JSON output")

View file

@ -0,0 +1,126 @@
"""
Map presentation UI language strings (LanguageType enum values from Next.js) to
Tesseract / LiteParse OCR language codes (ISO 639-3 where applicable).
Keep keys in sync with:
electron/servers/nextjs/app/(presentation-generator)/upload/type.ts LanguageType
"""
from __future__ import annotations
import re
from typing import Optional
# Values must match `LanguageType` string literals in the upload UI.
PRESENTATION_LANGUAGE_TO_TESSERACT: dict[str, str] = {
"English": "eng",
"Spanish (Español)": "spa",
"French (Français)": "fra",
"German (Deutsch)": "deu",
"Portuguese (Português)": "por",
"Italian (Italiano)": "ita",
"Dutch (Nederlands)": "nld",
"Russian (Русский)": "rus",
"Chinese (Simplified - 中文, 汉语)": "chi_sim",
"Chinese (Traditional - 中文, 漢語)": "chi_tra",
"Japanese (日本語)": "jpn",
"Korean (한국어)": "kor",
"Arabic (العربية)": "ara",
"Hindi (हिन्दी)": "hin",
"Bengali (বাংলা)": "ben",
"Polish (Polski)": "pol",
"Czech (Čeština)": "ces",
"Slovak (Slovenčina)": "slk",
"Hungarian (Magyar)": "hun",
"Romanian (Română)": "ron",
"Bulgarian (Български)": "bul",
"Greek (Ελληνικά)": "ell",
"Serbian (Српски / Srpski)": "srp",
"Croatian (Hrvatski)": "hrv",
"Bosnian (Bosanski)": "bos",
"Slovenian (Slovenščina)": "slv",
"Finnish (Suomi)": "fin",
"Swedish (Svenska)": "swe",
"Danish (Dansk)": "dan",
"Norwegian (Norsk)": "nor",
"Icelandic (Íslenska)": "isl",
"Lithuanian (Lietuvių)": "lit",
"Latvian (Latviešu)": "lav",
"Estonian (Eesti)": "est",
"Maltese (Malti)": "mlt",
"Welsh (Cymraeg)": "cym",
"Irish (Gaeilge)": "gle",
"Scottish Gaelic (Gàidhlig)": "gla",
"Ukrainian (Українська)": "ukr",
"Hebrew (עברית)": "heb",
"Persian/Farsi (فارسی)": "fas",
"Turkish (Türkçe)": "tur",
"Kurdish (Kurdî / کوردی)": "kmr",
"Pashto (پښتو)": "pus",
"Dari (دری)": "prs",
"Uzbek (Oʻzbek)": "uzb",
"Kazakh (Қазақша)": "kaz",
"Tajik (Тоҷикӣ)": "tgk",
"Turkmen (Türkmençe)": "tuk",
"Azerbaijani (Azərbaycan dili)": "aze",
"Urdu (اردو)": "urd",
"Tamil (தமிழ்)": "tam",
"Telugu (తెలుగు)": "tel",
"Marathi (मराठी)": "mar",
"Punjabi (ਪੰਜਾਬੀ / پنجابی)": "pan",
"Gujarati (ગુજરાતી)": "guj",
"Malayalam (മലയാളം)": "mal",
"Kannada (ಕನ್ನಡ)": "kan",
"Odia (ଓଡ଼ିଆ)": "ori",
"Sinhala (සිංහල)": "sin",
"Nepali (नेपाली)": "nep",
"Thai (ไทย)": "tha",
"Vietnamese (Tiếng Việt)": "vie",
"Lao (ລາວ)": "lao",
"Khmer (ភាសាខ្មែរ)": "khm",
"Burmese (မြန်မာစာ)": "mya",
"Tagalog/Filipino (Tagalog/Filipino)": "tgl",
"Javanese (Basa Jawa)": "jav",
"Sundanese (Basa Sunda)": "sun",
"Malay (Bahasa Melayu)": "msa",
"Mongolian (Монгол)": "mon",
"Swahili (Kiswahili)": "swa",
"Hausa (Hausa)": "hau",
"Yoruba (Yorùbá)": "yor",
"Igbo (Igbo)": "ibo",
"Amharic (አማርኛ)": "amh",
"Zulu (isiZulu)": "zul",
"Xhosa (isiXhosa)": "xho",
"Shona (ChiShona)": "sna",
"Somali (Soomaaliga)": "som",
"Basque (Euskara)": "eus",
"Catalan (Català)": "cat",
"Galician (Galego)": "glg",
"Quechua (Runasimi)": "que",
"Nahuatl (Nāhuatl)": "nah",
"Hawaiian (ʻŌlelo Hawaiʻi)": "haw",
"Maori (Te Reo Māori)": "mri",
# No dedicated Tahitian traineddata in default Tesseract bundles.
"Tahitian (Reo Tahiti)": "eng",
"Samoan (Gagana Samoa)": "smo",
}
_LOWER_MAP = {k.lower(): v for k, v in PRESENTATION_LANGUAGE_TO_TESSERACT.items()}
_OCR_CODE_RE = re.compile(r"^[a-zA-Z0-9_,+]+$")
def presentation_language_to_ocr_code(language: Optional[str]) -> str:
"""Resolve UI language label to a Tesseract language code; default English."""
if language is None:
return "eng"
s = str(language).strip()
if not s:
return "eng"
if s in PRESENTATION_LANGUAGE_TO_TESSERACT:
code = PRESENTATION_LANGUAGE_TO_TESSERACT[s]
else:
code = _LOWER_MAP.get(s.lower(), "eng")
if not _OCR_CODE_RE.fullmatch(code):
return "eng"
return code

View file

@ -156,7 +156,7 @@ def patch_python_docx_templates():
- Docker/Development: Returns immediately without patching (no-op)
- PyInstaller: Patches the template loading functions
Note: This should be called before using docling service in PyInstaller bundles.
Note: Call before any code path that uses python-docx inside a PyInstaller bundle.
"""
# Only patch if running in PyInstaller bundle
# This check ensures Docker and development environments are unaffected

View file

@ -1,9 +1,25 @@
from pathlib import Path
from typing import List
from fastapi import HTTPException
from fastapi import UploadFile
def _is_accepted_file_type(file: UploadFile, accepted_types: List[str]) -> bool:
accepted_mime_types = {t.lower() for t in accepted_types if not t.startswith(".")}
accepted_extensions = {t.lower() for t in accepted_types if t.startswith(".")}
content_type = (file.content_type or "").strip().lower()
if content_type in accepted_mime_types:
return True
extension = Path(file.filename or "").suffix.lower()
if extension in accepted_extensions:
return True
return False
def validate_files(
field,
nullable: bool,
@ -15,12 +31,14 @@ def validate_files(
if field:
files: List[UploadFile] = field if multiple else [field]
for each_file in files:
if (max_size * 1024 * 1024) < each_file.size:
file_size = each_file.size or 0
if (max_size * 1024 * 1024) < file_size:
raise HTTPException(
400,
detail=f"File '{each_file.filename}' exceeded max upload size of {max_size} MB",
)
elif each_file.content_type not in accepted_types:
elif not _is_accepted_file_type(each_file, accepted_types):
raise HTTPException(
400,
detail=f"File '{each_file.filename}' not accepted. Accepted types: {accepted_types}",

File diff suppressed because it is too large Load diff

View file

@ -29,7 +29,10 @@ export class PresentationGenerationApi {
}
}
static async decomposeDocuments(documentKeys: string[]) {
static async decomposeDocuments(
documentKeys: string[],
language?: string | null
) {
try {
const response = await fetch(
getApiUrl(`/api/v1/ppt/files/decompose`),
@ -38,6 +41,7 @@ export class PresentationGenerationApi {
headers: getHeader(),
body: JSON.stringify({
file_paths: documentKeys,
language: language ?? null,
}),
cache: "no-cache",
}

View file

@ -13,37 +13,50 @@ interface SupportingDocProps {
const PDF_TYPES = ['.pdf']
const TEXT_TYPES = ['.txt']
const POWERPOINT_TYPES = ['.pptx']
const WORD_TYPES = ['.docx']
const WORD_TYPES = ['.doc', '.docx', '.docm', '.odt', '.rtf']
const POWERPOINT_TYPES = ['.ppt', '.pptx', '.pptm', '.odp']
const SPREADSHEET_TYPES = ['.xls', '.xlsx', '.xlsm', '.ods', '.csv', '.tsv']
const IMAGE_TYPES = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp', '.svg']
const ACCEPT_DEFAULT = [
'application/pdf',
'text/plain',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
...PDF_TYPES,
...TEXT_TYPES,
...POWERPOINT_TYPES,
...WORD_TYPES,
].join(',')
const ALLOWED_MIME_PREFIXES: string[] = []
const ALLOWED_MIME_PREFIXES: string[] = ['image/']
const ALLOWED_MIME_TYPES = [
'application/pdf',
'application/x-pdf',
'application/acrobat',
'applications/pdf',
'text/pdf',
'application/vnd.pdf',
'text/plain',
'text/csv',
'application/csv',
'text/tab-separated-values',
'text/tsv',
'application/msword',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.ms-word.document.macroenabled.12',
'application/vnd.oasis.opendocument.text',
'application/rtf',
'text/rtf',
'application/vnd.ms-powerpoint',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'application/vnd.ms-powerpoint.presentation.macroenabled.12',
'application/vnd.oasis.opendocument.presentation',
'application/vnd.ms-excel',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/vnd.ms-excel.sheet.macroenabled.12',
'application/vnd.oasis.opendocument.spreadsheet',
'image/jpeg',
'image/png',
'image/gif',
'image/bmp',
'image/tiff',
'image/webp',
'image/svg+xml',
]
const ALLOWED_EXTENSIONS = [
...PDF_TYPES,
...TEXT_TYPES,
...POWERPOINT_TYPES,
...WORD_TYPES,
...POWERPOINT_TYPES,
...SPREADSHEET_TYPES,
...IMAGE_TYPES,
]
const ACCEPT_DEFAULT = [...ALLOWED_MIME_TYPES, ...ALLOWED_EXTENSIONS].join(',')
const SupportingDoc = ({
files,
@ -75,7 +88,7 @@ const SupportingDoc = ({
const disallowed = filesToReview.filter((file) => !isAllowedFile(file))
if (disallowed.length > 0) {
toast.error('Some files are not supported', {
description: 'Only PDF, TXT, PPTX, and DOCX files are allowed.',
description: 'Supported: Word, PowerPoint, spreadsheets, PDF/TXT, and image files.',
})
}
}
@ -171,7 +184,7 @@ const SupportingDoc = ({
<div className="flex flex-col items-center gap-2">
<Paperclip className="h-6 w-6 text-[#5146E5]" />
<p className="text-sm font-medium text-gray-800 font-syne">
Drag and drop PDF, TXT, PPTX, DOCX, or <span className="text-[#5146E5]">click to browse</span>
Drag and drop Office docs, spreadsheets, images, PDF/TXT, or <span className="text-[#5146E5]">click to browse</span>
</p>
</div>
</label>
@ -214,7 +227,7 @@ const SupportingDoc = ({
</ul>
{filteredFiles.length !== files.length && (
<p className="mt-2 text-xs text-amber-600 font-syne">
Some files were skipped. Only PDF, TXT, PPTX, and DOCX files are supported.
Some files were skipped. Supported: Word, PowerPoint, spreadsheets, PDF/TXT, and image files.
</p>
)}
</div>

View file

@ -132,7 +132,12 @@ const UploadPage = () => {
if (documents.length > 0) {
trackEvent(MixpanelEvent.Upload_Decompose_Documents_API_Call);
promises.push(PresentationGenerationApi.decomposeDocuments(documents));
promises.push(
PresentationGenerationApi.decomposeDocuments(
documents,
config?.language ?? null
)
);
}
const responses = await Promise.all(promises);
dispatch(setPptGenUploadState({