From 3207422651f889644c26344e3f709370a8eaa852 Mon Sep 17 00:00:00 2001 From: sudipnext Date: Sat, 28 Mar 2026 15:34:53 +0545 Subject: [PATCH 1/3] feat: add language parameter to decomposeDocuments API call - Updated the decomposeDocuments method in PresentationGenerationApi to accept an optional language parameter. - Modified the UploadPage component to pass the selected language from the config when calling the decomposeDocuments method. --- electron/app/ipc/setup_install_handlers.ts | 167 +++- electron/app/main.ts | 11 +- electron/app/preloads/setup-installer.ts | 11 + electron/app/types/index.d.ts | 2 + electron/app/utils/imagemagick-check.ts | 27 + electron/app/utils/liteparse-check.ts | 28 + electron/app/utils/puppeteer-check.ts | 1 + electron/app/utils/setup-dependencies.ts | 24 +- electron/package-lock.json | 470 +++++++++- electron/package.json | 5 +- .../document-extraction/liteparse_runner.mjs | 147 ++++ .../document-extraction/package.json | 8 + electron/resources/ui/homepage/script.js | 3 + .../resources/ui/setup-installer/index.html | 77 +- .../fastapi/api/v1/ppt/endpoints/files.py | 10 +- .../fastapi/api/v1/ppt/endpoints/outlines.py | 5 +- .../api/v1/ppt/endpoints/presentation.py | 5 +- .../servers/fastapi/constants/documents.py | 3 +- .../fastapi/models/decompose_files_body.py | 11 + electron/servers/fastapi/pyproject.toml | 92 +- .../servers/fastapi/runtime_hook_docling.py | 78 -- electron/servers/fastapi/server.spec | 20 +- .../fastapi/services/docling_service.py | 38 - .../fastapi/services/documents_loader.py | 88 +- .../services/lightweight_document_service.py | 354 ++++---- .../fastapi/services/liteparse_service.py | 197 +++++ .../servers/fastapi/utils/ocr_language.py | 126 +++ .../servers/fastapi/utils/path_helpers.py | 2 +- electron/servers/fastapi/uv.lock | 826 +----------------- .../services/api/presentation-generation.ts | 6 +- .../upload/components/UploadPage.tsx | 7 +- 31 files changed, 1554 insertions(+), 1295 deletions(-) create mode 100644 electron/app/utils/imagemagick-check.ts create mode 100644 electron/app/utils/liteparse-check.ts create mode 100644 electron/resources/document-extraction/liteparse_runner.mjs create mode 100644 electron/resources/document-extraction/package.json create mode 100644 electron/servers/fastapi/models/decompose_files_body.py delete mode 100644 electron/servers/fastapi/runtime_hook_docling.py delete mode 100644 electron/servers/fastapi/services/docling_service.py create mode 100644 electron/servers/fastapi/services/liteparse_service.py create mode 100644 electron/servers/fastapi/utils/ocr_language.py diff --git a/electron/app/ipc/setup_install_handlers.ts b/electron/app/ipc/setup_install_handlers.ts index 7f840272..29ddb454 100644 --- a/electron/app/ipc/setup_install_handlers.ts +++ b/electron/app/ipc/setup_install_handlers.ts @@ -1,13 +1,14 @@ /** - * IPC handlers for the unified setup installer (LibreOffice + Chromium). + * IPC handlers for the unified setup installer (LibreOffice + Chromium + ImageMagick). * - setup:get-status — which dependencies are missing * - setup:install-chrome — download Chromium (browser-snapshots) with progress */ -import { ipcMain, WebContents } from "electron"; +import { ipcMain, WebContents, shell } from "electron"; import fs from "fs"; import path from "path"; import os from "os"; +import { spawn, spawnSync } from "child_process"; import puppeteer from "puppeteer"; import { Browser, @@ -17,6 +18,10 @@ import { resolveBuildId, } from "@puppeteer/browsers"; import { getSetupStatus } from "../utils/setup-dependencies"; +import { + getImageMagickDownloadUrl, + isImageMagickInstalled, +} from "../utils/imagemagick-check"; function getPuppeteerCacheDir(): string { const configCache = @@ -42,9 +47,78 @@ function sendChromeLog(wc: WebContents, level: string, text: string) { } } +function sendImageMagickProgress( + wc: WebContents, + phase: "installing" | "done" | "error", + percent?: number, + message?: string +) { + if (!wc.isDestroyed()) { + wc.send("setup:imagemagick-progress", { phase, percent, message }); + } +} + +function sendImageMagickLog(wc: WebContents, level: string, text: string) { + if (!wc.isDestroyed()) { + wc.send("setup:imagemagick-log", { level, text }); + } +} + +function commandExists(command: string, versionArgs: string[] = ["--version"]): boolean { + const result = spawnSync(command, versionArgs, { + stdio: "pipe", + windowsHide: true, + }); + return result.status === 0; +} + +function runInstallCommand( + wc: WebContents, + command: string, + args: string[] +): Promise { + sendImageMagickLog(wc, "info", `Running: ${command} ${args.join(" ")}`); + return new Promise((resolve, reject) => { + const child = spawn(command, args, { + stdio: ["ignore", "pipe", "pipe"], + windowsHide: process.platform === "win32", + }); + + child.stdout.on("data", (data) => { + const text = String(data).trim(); + if (text) sendImageMagickLog(wc, "info", text); + }); + child.stderr.on("data", (data) => { + const text = String(data).trim(); + if (text) { + sendImageMagickLog( + wc, + text.toLowerCase().includes("error") ? "error" : "info", + text + ); + } + }); + + child.on("error", reject); + child.on("close", (code) => { + if (code === 0) { + resolve(); + return; + } + reject(new Error(`${command} exited with code ${code}`)); + }); + }); +} + export function setupSetupInstallHandlers() { ipcMain.handle("setup:get-status", () => { - return getSetupStatus() ?? { needsLibreOffice: false, needsChrome: false }; + return ( + getSetupStatus() ?? { + needsLibreOffice: false, + needsChrome: false, + needsImageMagick: false, + } + ); }); ipcMain.handle( @@ -121,4 +195,91 @@ export function setupSetupInstallHandlers() { return { ok: true }; } ); + + ipcMain.handle( + "setup:install-imagemagick", + async (event): Promise<{ ok: boolean; error?: string }> => { + const wc = event.sender; + try { + sendImageMagickProgress( + wc, + "installing", + undefined, + "Installing ImageMagick..." + ); + + if (process.platform === "linux") { + if (commandExists("apt-get")) { + await runInstallCommand(wc, "pkexec", [ + "apt-get", + "install", + "-y", + "imagemagick", + ]); + } else { + throw new Error( + "apt-get is unavailable. Install ImageMagick manually from the official download page." + ); + } + } else if (process.platform === "darwin") { + if (commandExists("brew")) { + await runInstallCommand(wc, "brew", ["install", "imagemagick"]); + } else { + throw new Error( + "Homebrew is not installed. Install ImageMagick manually from the official download page." + ); + } + } else if (process.platform === "win32") { + if (commandExists("choco", ["-v"])) { + await runInstallCommand(wc, "choco", [ + "install", + "imagemagick.app", + "-y", + ]); + } else { + throw new Error( + "Chocolatey is not installed. Install ImageMagick manually from the official download page." + ); + } + } else { + throw new Error( + "Unsupported platform for automatic install. Use manual install from the official download page." + ); + } + + sendImageMagickProgress(wc, "done", 100, "ImageMagick install finished"); + return { ok: true }; + } catch (error) { + const message = + error instanceof Error ? error.message : "ImageMagick install failed"; + sendImageMagickLog(wc, "error", message); + const downloadUrl = getImageMagickDownloadUrl(); + sendImageMagickLog( + wc, + "info", + `Falling back to manual install page: ${downloadUrl}` + ); + await shell.openExternal(downloadUrl); + return { ok: true }; + } + } + ); + + ipcMain.handle( + "setup:check-imagemagick", + async (event): Promise<{ ok: boolean; error?: string }> => { + const wc = event.sender; + const installed = isImageMagickInstalled(); + if (installed) { + sendImageMagickProgress(wc, "done", 100, "ImageMagick detected"); + sendImageMagickLog(wc, "ok", "ImageMagick is installed and ready."); + return { ok: true }; + } + const message = + "ImageMagick is not detected yet. Install it, then click Retry."; + sendImageMagickProgress(wc, "error", undefined, message); + sendImageMagickLog(wc, "error", message); + return { ok: false, error: message }; + } + ); } diff --git a/electron/app/main.ts b/electron/app/main.ts index 0a526938..b478b7ca 100644 --- a/electron/app/main.ts +++ b/electron/app/main.ts @@ -13,6 +13,8 @@ import { setupSetupInstallHandlers } from "./ipc/setup_install_handlers"; import { checkDependenciesBeforeWindow } from "./utils/setup-dependencies"; import { getSofficePath, isLibreOfficeInstalled } from "./utils/libreoffice-check"; import { getPuppeteerExecutablePath, isChromeInstalled } from "./utils/puppeteer-check"; +import { getLiteParseRunnerPath } from "./utils/liteparse-check"; +import { isImageMagickInstalled } from "./utils/imagemagick-check"; import { startUpdateChecker, stopUpdateChecker } from "./utils/update-checker"; @@ -23,6 +25,7 @@ let isStopping = false; const startupStatus: Record = { libreoffice: "checking", puppeteer: "checking", + imagemagick: "checking", }; // Allow renderer to query initial startup status as soon as it loads. @@ -122,6 +125,7 @@ async function startServers(fastApiPort: number, nextjsPort: number) { // Resolved by libreoffice-check.ts at startup; lets Python invoke the // exact binary path instead of relying on the system PATH. SOFFICE_PATH: getSofficePath(), + LITEPARSE_RUNNER_PATH: getLiteParseRunnerPath(), }, isDev, ); @@ -188,7 +192,7 @@ app.whenReady().then(async () => { createWindow(); win?.loadFile(path.join(baseDir, "resources/ui/homepage/index.html")); - // Single installer: checks LibreOffice and Chrome; if either is missing, shows one + // Single installer: checks LibreOffice, Chrome, and ImageMagick; if any are missing, shows one // window that installs them one after another. Resolves when the window closes. const setupCompleted = await checkDependenciesBeforeWindow(); if (!setupCompleted) { @@ -199,12 +203,14 @@ app.whenReady().then(async () => { } // Update startup status after setup (user may have installed one or both) - const [loResult, chromeOk] = await Promise.all([ + const [loResult, chromeOk, imageMagickOk] = await Promise.all([ isLibreOfficeInstalled(), isChromeInstalled(), + Promise.resolve(isImageMagickInstalled()), ]); startupStatus.libreoffice = loResult.installed ? "installed" : "missing"; startupStatus.puppeteer = chromeOk ? "installed" : "missing"; + startupStatus.imagemagick = imageMagickOk ? "installed" : "missing"; // Show and focus main window win?.show(); @@ -218,6 +224,7 @@ app.whenReady().then(async () => { win?.webContents.once("did-finish-load", () => { sendStartupStatus("libreoffice", startupStatus.libreoffice); sendStartupStatus("puppeteer", startupStatus.puppeteer); + sendStartupStatus("imagemagick", startupStatus.imagemagick); }); setUserConfig({ diff --git a/electron/app/preloads/setup-installer.ts b/electron/app/preloads/setup-installer.ts index 57c30dc6..3b887a3f 100644 --- a/electron/app/preloads/setup-installer.ts +++ b/electron/app/preloads/setup-installer.ts @@ -5,6 +5,8 @@ contextBridge.exposeInMainWorld("setupInstaller", { installLibreOffice: () => ipcRenderer.invoke("lo:start-install"), installChrome: () => ipcRenderer.invoke("setup:install-chrome"), + installImageMagick: () => ipcRenderer.invoke("setup:install-imagemagick"), + checkImageMagick: () => ipcRenderer.invoke("setup:check-imagemagick"), done: () => ipcRenderer.send("setup:done"), @@ -25,4 +27,13 @@ contextBridge.exposeInMainWorld("setupInstaller", { onChromeLog: (cb: (data: { level: string; text: string }) => void) => { ipcRenderer.on("setup:chrome-log", (_event, data) => cb(data)); }, + + onImageMagickProgress: ( + cb: (data: { phase: string; percent?: number; message?: string }) => void + ) => { + ipcRenderer.on("setup:imagemagick-progress", (_event, data) => cb(data)); + }, + onImageMagickLog: (cb: (data: { level: string; text: string }) => void) => { + ipcRenderer.on("setup:imagemagick-log", (_event, data) => cb(data)); + }, }); diff --git a/electron/app/types/index.d.ts b/electron/app/types/index.d.ts index e82c05b1..10807ecf 100644 --- a/electron/app/types/index.d.ts +++ b/electron/app/types/index.d.ts @@ -33,6 +33,8 @@ interface FastApiEnv { MIGRATE_DATABASE_ON_STARTUP?: string, /** Absolute path to the soffice binary resolved at startup by libreoffice-check.ts. */ SOFFICE_PATH?: string, + /** Absolute path to the bundled LiteParse runner script. */ + LITEPARSE_RUNNER_PATH?: string, } interface NextJsEnv { diff --git a/electron/app/utils/imagemagick-check.ts b/electron/app/utils/imagemagick-check.ts new file mode 100644 index 00000000..6495c7f8 --- /dev/null +++ b/electron/app/utils/imagemagick-check.ts @@ -0,0 +1,27 @@ +import { spawnSync } from "child_process"; + +function canExecute(command: string, args: string[]): boolean { + const result = spawnSync(command, args, { + stdio: "pipe", + windowsHide: true, + }); + return result.status === 0; +} + +export function isImageMagickInstalled(): boolean { + // ImageMagick 7+ command + if (canExecute("magick", ["-version"])) return true; + // Legacy command on Linux/macOS packages + if (canExecute("convert", ["-version"])) return true; + return false; +} + +export function getImageMagickDownloadUrl(): string { + if (process.platform === "win32") { + return "https://imagemagick.org/script/download.php#windows"; + } + if (process.platform === "darwin") { + return "https://imagemagick.org/script/download.php#macosx"; + } + return "https://imagemagick.org/script/download.php#linux"; +} diff --git a/electron/app/utils/liteparse-check.ts b/electron/app/utils/liteparse-check.ts new file mode 100644 index 00000000..8384dd9c --- /dev/null +++ b/electron/app/utils/liteparse-check.ts @@ -0,0 +1,28 @@ +import fs from "fs"; +import path from "path"; +import { spawnSync } from "child_process"; +import { baseDir, isDev } from "./constants"; + +export function getLiteParseRunnerPath(): string { + return isDev + ? path.join(baseDir, "resources", "document-extraction", "liteparse_runner.mjs") + : path.join(baseDir, "resources", "document-extraction", "liteparse_runner.mjs"); +} + +export function getLiteParseDependencyPath(): string { + return path.join(baseDir, "node_modules", "@llamaindex", "liteparse"); +} + +export function isLiteParseInstalled(): boolean { + const runnerPath = getLiteParseRunnerPath(); + const liteparsePackagePath = getLiteParseDependencyPath(); + + if (!fs.existsSync(runnerPath)) return false; + if (!fs.existsSync(liteparsePackagePath)) return false; + + const nodeCheck = spawnSync("node", ["--version"], { + stdio: "pipe", + windowsHide: true, + }); + return nodeCheck.status === 0; +} diff --git a/electron/app/utils/puppeteer-check.ts b/electron/app/utils/puppeteer-check.ts index cabdd296..3664f90d 100644 --- a/electron/app/utils/puppeteer-check.ts +++ b/electron/app/utils/puppeteer-check.ts @@ -29,6 +29,7 @@ function shouldSkipDownload(): boolean { export interface SetupStatus { needsLibreOffice: boolean; needsChrome: boolean; + needsImageMagick: boolean; } /** diff --git a/electron/app/utils/setup-dependencies.ts b/electron/app/utils/setup-dependencies.ts index 5f99f13b..f014c095 100644 --- a/electron/app/utils/setup-dependencies.ts +++ b/electron/app/utils/setup-dependencies.ts @@ -1,9 +1,10 @@ /** * setup-dependencies.ts * - * Single installer window that ensures LibreOffice and Chrome (Puppeteer) are + * Single installer window that ensures LibreOffice, Chrome (Puppeteer), and + * ImageMagick are * available before the user starts creating presentations. Runs checks, then - * if either is missing shows one installer that runs LibreOffice then Chrome + * if any are missing shows one installer that runs dependency setup steps * in sequence (each with Install / Skip). */ @@ -15,6 +16,7 @@ import { isChromeInstalled, type SetupStatus, } from "./puppeteer-check"; +import { isImageMagickInstalled } from "./imagemagick-check"; export type { SetupStatus }; @@ -26,40 +28,44 @@ export function getSetupStatus(): SetupStatus | null { } /** - * Checks LibreOffice and Chrome. If both are present, returns immediately. - * If either is missing, opens one installer window that runs LibreOffice - * then Chrome in sequence. Returns true only when all required dependencies + * Checks LibreOffice, Chrome and ImageMagick. If all are present, returns + * immediately. If any are missing, opens one installer window that runs each + * missing setup step in sequence. Returns true only when all required dependencies * are installed; false when the installer is closed/skipped before completion. */ export async function checkDependenciesBeforeWindow(): Promise { - const [loResult, chromeInstalled] = await Promise.all([ + const [loResult, chromeInstalled, imageMagickInstalled] = await Promise.all([ isLibreOfficeInstalled(), isChromeInstalled(), + Promise.resolve(isImageMagickInstalled()), ]); const needsLibreOffice = !loResult.installed; const needsChrome = !chromeInstalled; + const needsImageMagick = !imageMagickInstalled; - if (!needsLibreOffice && !needsChrome) { + if (!needsLibreOffice && !needsChrome && !needsImageMagick) { return true; } currentSetupStatus = { needsLibreOffice, needsChrome, + needsImageMagick, }; await showSetupInstallerWindow(); // Re-check after installer closes; setup can only proceed when all // required dependencies are actually installed. - const [postLoResult, postChromeInstalled] = await Promise.all([ + const [postLoResult, postChromeInstalled, postImageMagickInstalled] = await Promise.all([ isLibreOfficeInstalled(), isChromeInstalled(), + Promise.resolve(isImageMagickInstalled()), ]); currentSetupStatus = null; - return postLoResult.installed && postChromeInstalled; + return postLoResult.installed && postChromeInstalled && postImageMagickInstalled; } /** diff --git a/electron/package-lock.json b/electron/package-lock.json index 06313aa0..61d3b1b2 100644 --- a/electron/package-lock.json +++ b/electron/package-lock.json @@ -9,6 +9,7 @@ "version": "0.6.3-beta", "hasInstallScript": true, "dependencies": { + "@llamaindex/liteparse": "^1.4.0", "@puppeteer/browsers": "^1.9.1", "@tailwindcss/cli": "^4.1.5", "@types/uuid": "^10.0.0", @@ -54,6 +55,16 @@ "node": ">=6.9.0" } }, + "node_modules/@borewit/text-codec": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/@borewit/text-codec/-/text-codec-0.2.2.tgz", + "integrity": "sha512-DDaRehssg1aNrH4+2hnj1B7vnUGEjU6OIlyRdkMd0aUdIUvKXrJfXsy8LVtXAy7DRvYVluWbMspsRhz2lcW0mQ==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, "node_modules/@develar/schema-utils": { "version": "2.6.5", "resolved": "https://registry.npmjs.org/@develar/schema-utils/-/schema-utils-2.6.5.tgz", @@ -507,6 +518,12 @@ "tslib": "^2.4.0" } }, + "node_modules/@hyzyla/pdfium": { + "version": "2.1.12", + "resolved": "https://registry.npmjs.org/@hyzyla/pdfium/-/pdfium-2.1.12.tgz", + "integrity": "sha512-2ezbrJk9V4foB3+U+eQ7234spsHmrufPU+9EV2cVZCnhTLLfelPz7wWshO0HjUNtcECNBaAfEzrdaQZOigkW+A==", + "license": "MIT" + }, "node_modules/@img/colour": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.0.0.tgz", @@ -1156,6 +1173,67 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@llamaindex/liteparse": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/@llamaindex/liteparse/-/liteparse-1.4.0.tgz", + "integrity": "sha512-58Tr4vAutcaf0Cxe7GK4cknpzcpN3tTzUhIAwWioWuSDqVPS3jpNhVVfqE5tV5PE4za07l07QFhGscCoVm/hRw==", + "license": "Apache-2.0", + "dependencies": { + "@hyzyla/pdfium": "^2.1.9", + "axios": "^1.7.0", + "commander": "^12.0.0", + "file-type": "^21.3.3", + "form-data": "^4.0.0", + "p-limit": "^7.3.0", + "sharp": "^0.34.5", + "tesseract.js": "^7.0.0", + "unified": "^11.0.0", + "zod": "^3.23.0" + }, + "bin": { + "lit": "dist/src/index.js", + "liteparse": "dist/src/index.js" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@llamaindex/liteparse/node_modules/commander": { + "version": "12.1.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz", + "integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/@llamaindex/liteparse/node_modules/p-limit": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-7.3.0.tgz", + "integrity": "sha512-7cIXg/Z0M5WZRblrsOla88S4wAK+zOQQWeBYfV3qJuJXMr+LnbYjaadrFaS0JILfEDPVqHyKnZ1Z/1d6J9VVUw==", + "license": "MIT", + "dependencies": { + "yocto-queue": "^1.2.1" + }, + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@llamaindex/liteparse/node_modules/yocto-queue": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz", + "integrity": "sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ==", + "license": "MIT", + "engines": { + "node": ">=12.20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/@malept/cross-spawn-promise": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/@malept/cross-spawn-promise/-/cross-spawn-promise-2.0.0.tgz", @@ -1975,6 +2053,29 @@ "node": ">= 10" } }, + "node_modules/@tokenizer/inflate": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@tokenizer/inflate/-/inflate-0.4.1.tgz", + "integrity": "sha512-2mAv+8pkG6GIZiF1kNg1jAjh27IDxEPKwdGul3snfztFerfPGI1LjDezZp3i7BElXompqEtPmoPx6c2wgtWsOA==", + "license": "MIT", + "dependencies": { + "debug": "^4.4.3", + "token-types": "^6.1.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, + "node_modules/@tokenizer/token": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz", + "integrity": "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==", + "license": "MIT" + }, "node_modules/@tootallnate/quickjs-emscripten": { "version": "0.23.0", "resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz", @@ -2070,6 +2171,12 @@ "@types/node": "*" } }, + "node_modules/@types/unist": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz", + "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==", + "license": "MIT" + }, "node_modules/@types/uuid": { "version": "10.0.0", "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-10.0.0.tgz", @@ -2366,7 +2473,6 @@ "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", - "dev": true, "license": "MIT" }, "node_modules/at-least-node": { @@ -2379,6 +2485,26 @@ "node": ">= 4.0.0" } }, + "node_modules/axios": { + "version": "1.14.0", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.14.0.tgz", + "integrity": "sha512-3Y8yrqLSwjuzpXuZ0oIYZ/XGgLwUIBU3uLvbcpb0pidD9ctpShJd43KSlEEkVQg6DS0G9NKyzOvBfUtDKEyHvQ==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.11", + "form-data": "^4.0.5", + "proxy-from-env": "^2.1.0" + } + }, + "node_modules/axios/node_modules/proxy-from-env": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-2.1.0.tgz", + "integrity": "sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA==", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, "node_modules/b4a": { "version": "1.8.0", "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.8.0.tgz", @@ -2393,6 +2519,16 @@ } } }, + "node_modules/bail": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/bail/-/bail-2.0.2.tgz", + "integrity": "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/balanced-match": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", @@ -2527,6 +2663,12 @@ "readable-stream": "^3.4.0" } }, + "node_modules/bmp-js": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/bmp-js/-/bmp-js-0.1.0.tgz", + "integrity": "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==", + "license": "MIT" + }, "node_modules/boolean": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz", @@ -2820,7 +2962,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", @@ -3005,7 +3146,6 @@ "version": "1.0.8", "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", - "dev": true, "license": "MIT", "dependencies": { "delayed-stream": "~1.0.0" @@ -3294,12 +3434,20 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", - "dev": true, "license": "MIT", "engines": { "node": ">=0.4.0" } }, + "node_modules/dequal": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", + "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/detect-libc": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", @@ -3317,6 +3465,19 @@ "license": "MIT", "optional": true }, + "node_modules/devlop": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz", + "integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==", + "license": "MIT", + "dependencies": { + "dequal": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/devtools-protocol": { "version": "0.0.1581282", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz", @@ -3461,7 +3622,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", - "dev": true, "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.1", @@ -3718,7 +3878,6 @@ "version": "0.1.13", "resolved": "https://registry.npmjs.org/encoding/-/encoding-0.1.13.tgz", "integrity": "sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==", - "dev": true, "license": "MIT", "optional": true, "dependencies": { @@ -3776,7 +3935,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -3786,7 +3944,6 @@ "version": "1.3.0", "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -3796,7 +3953,6 @@ "version": "1.1.1", "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0" @@ -3809,7 +3965,6 @@ "version": "2.1.0", "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", @@ -3920,6 +4075,12 @@ "dev": true, "license": "Apache-2.0" }, + "node_modules/extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", + "license": "MIT" + }, "node_modules/extract-zip": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz", @@ -3998,6 +4159,24 @@ } } }, + "node_modules/file-type": { + "version": "21.3.4", + "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.4.tgz", + "integrity": "sha512-Ievi/yy8DS3ygGvT47PjSfdFoX+2isQueoYP1cntFW1JLYAuS4GD7NUPGg4zv2iZfV52uDyk5w5Z0TdpRS6Q1g==", + "license": "MIT", + "dependencies": { + "@tokenizer/inflate": "^0.4.1", + "strtok3": "^10.3.4", + "token-types": "^6.1.1", + "uint8array-extras": "^1.4.0" + }, + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sindresorhus/file-type?sponsor=1" + } + }, "node_modules/filelist": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/filelist/-/filelist-1.0.4.tgz", @@ -4031,6 +4210,26 @@ "node": ">=10" } }, + "node_modules/follow-redirects": { + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, "node_modules/foreground-child": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", @@ -4065,7 +4264,6 @@ "version": "4.0.5", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", - "dev": true, "license": "MIT", "dependencies": { "asynckit": "^0.4.0", @@ -4117,7 +4315,6 @@ "version": "1.1.2", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", - "dev": true, "license": "MIT", "funding": { "url": "https://github.com/sponsors/ljharb" @@ -4136,7 +4333,6 @@ "version": "1.3.0", "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", - "dev": true, "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.2", @@ -4161,7 +4357,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", - "dev": true, "license": "MIT", "dependencies": { "dunder-proto": "^1.0.1", @@ -4290,7 +4485,6 @@ "version": "1.2.0", "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -4359,7 +4553,6 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -4372,7 +4565,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", - "dev": true, "license": "MIT", "dependencies": { "has-symbols": "^1.0.3" @@ -4388,7 +4580,6 @@ "version": "2.0.2", "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", - "dev": true, "license": "MIT", "dependencies": { "function-bind": "^1.1.2" @@ -4487,7 +4678,7 @@ "version": "0.6.3", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", - "dev": true, + "devOptional": true, "license": "MIT", "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" @@ -4496,6 +4687,12 @@ "node": ">=0.10.0" } }, + "node_modules/idb-keyval": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/idb-keyval/-/idb-keyval-6.2.2.tgz", + "integrity": "sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==", + "license": "Apache-2.0" + }, "node_modules/ieee754": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", @@ -4616,6 +4813,18 @@ "node": ">=8" } }, + "node_modules/is-plain-obj": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-4.1.0.tgz", + "integrity": "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/is-unicode-supported": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz", @@ -4629,6 +4838,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/is-url": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/is-url/-/is-url-1.2.4.tgz", + "integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==", + "license": "MIT" + }, "node_modules/isbinaryfile": { "version": "5.0.7", "resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-5.0.7.tgz", @@ -5133,7 +5348,6 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -5156,7 +5370,6 @@ "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.6" @@ -5166,7 +5379,6 @@ "version": "2.1.35", "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", - "dev": true, "license": "MIT", "dependencies": { "mime-db": "1.52.0" @@ -5467,6 +5679,26 @@ "node": ">=10" } }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, "node_modules/node-gyp": { "version": "11.5.0", "resolved": "https://registry.npmjs.org/node-gyp/-/node-gyp-11.5.0.tgz", @@ -5607,6 +5839,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/opencollective-postinstall": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz", + "integrity": "sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==", + "license": "MIT", + "bin": { + "opencollective-postinstall": "index.js" + } + }, "node_modules/ora": { "version": "5.4.1", "resolved": "https://registry.npmjs.org/ora/-/ora-5.4.1.tgz", @@ -6205,6 +6446,12 @@ "node": ">= 6" } }, + "node_modules/regenerator-runtime": { + "version": "0.13.11", + "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz", + "integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==", + "license": "MIT" + }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", @@ -6344,7 +6591,7 @@ "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", - "dev": true, + "devOptional": true, "license": "MIT" }, "node_modules/sanitize-filename": { @@ -6754,6 +7001,22 @@ "node": ">=8" } }, + "node_modules/strtok3": { + "version": "10.3.5", + "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-10.3.5.tgz", + "integrity": "sha512-ki4hZQfh5rX0QDLLkOCj+h+CVNkqmp/CMf8v8kZpkNVK6jGQooMytqzLZYUVYIZcFZ6yDB70EfD8POcFXiF5oA==", + "license": "MIT", + "dependencies": { + "@tokenizer/token": "^0.3.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, "node_modules/sumchecker": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/sumchecker/-/sumchecker-3.0.1.tgz", @@ -6991,6 +7254,30 @@ "mkdirp": "bin/cmd.js" } }, + "node_modules/tesseract.js": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/tesseract.js/-/tesseract.js-7.0.0.tgz", + "integrity": "sha512-exPBkd+z+wM1BuMkx/Bjv43OeLBxhL5kKWsz/9JY+DXcXdiBjiAch0V49QR3oAJqCaL5qURE0vx9Eo+G5YE7mA==", + "hasInstallScript": true, + "license": "Apache-2.0", + "dependencies": { + "bmp-js": "^0.1.0", + "idb-keyval": "^6.2.0", + "is-url": "^1.2.4", + "node-fetch": "^2.6.9", + "opencollective-postinstall": "^2.0.3", + "regenerator-runtime": "^0.13.3", + "tesseract.js-core": "^7.0.0", + "wasm-feature-detect": "^1.8.0", + "zlibjs": "^0.3.1" + } + }, + "node_modules/tesseract.js-core": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-7.0.0.tgz", + "integrity": "sha512-WnNH518NzmbSq9zgTPeoF8c+xmilS8rFIl1YKbk/ptuuc7p6cLNELNuPAzcmsYw450ca6bLa8j3t0VAtq435Vw==", + "license": "Apache-2.0" + }, "node_modules/text-decoder": { "version": "1.2.7", "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.7.tgz", @@ -7063,6 +7350,30 @@ "tmp": "^0.2.0" } }, + "node_modules/token-types": { + "version": "6.1.2", + "resolved": "https://registry.npmjs.org/token-types/-/token-types-6.1.2.tgz", + "integrity": "sha512-dRXchy+C0IgK8WPC6xvCHFRIWYUbqqdEIKPaKo/AcTUNzwLTK6AH7RjdLWsEZcAN/TBdtfUw3PYEgPr5VPr6ww==", + "license": "MIT", + "dependencies": { + "@borewit/text-codec": "^0.2.1", + "@tokenizer/token": "^0.3.0", + "ieee754": "^1.2.1" + }, + "engines": { + "node": ">=14.16" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, "node_modules/tree-kill": { "version": "1.2.2", "resolved": "https://registry.npmjs.org/tree-kill/-/tree-kill-1.2.2.tgz", @@ -7072,6 +7383,16 @@ "tree-kill": "cli.js" } }, + "node_modules/trough": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/trough/-/trough-2.2.0.tgz", + "integrity": "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/truncate-utf8-bytes": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz", @@ -7122,6 +7443,18 @@ "node": ">=14.17" } }, + "node_modules/uint8array-extras": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/uint8array-extras/-/uint8array-extras-1.5.0.tgz", + "integrity": "sha512-rvKSBiC5zqCCiDZ9kAOszZcDvdAHwwIKJG33Ykj43OKcWsnmcBRL09YTU4nOeHZ8Y2a7l1MgTd08SBe9A8Qj6A==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/unbzip2-stream": { "version": "1.4.3", "resolved": "https://registry.npmjs.org/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz", @@ -7139,6 +7472,25 @@ "devOptional": true, "license": "MIT" }, + "node_modules/unified": { + "version": "11.0.5", + "resolved": "https://registry.npmjs.org/unified/-/unified-11.0.5.tgz", + "integrity": "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "bail": "^2.0.0", + "devlop": "^1.0.0", + "extend": "^3.0.0", + "is-plain-obj": "^4.0.0", + "trough": "^2.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/unique-filename": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/unique-filename/-/unique-filename-4.0.0.tgz", @@ -7165,6 +7517,19 @@ "node": "^18.17.0 || >=20.5.0" } }, + "node_modules/unist-util-stringify-position": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-4.0.0.tgz", + "integrity": "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/universalify": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz", @@ -7228,6 +7593,40 @@ "node": ">=0.6.0" } }, + "node_modules/vfile": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz", + "integrity": "sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/vfile-message": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.3.tgz", + "integrity": "sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-stringify-position": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/wasm-feature-detect": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/wasm-feature-detect/-/wasm-feature-detect-1.8.0.tgz", + "integrity": "sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==", + "license": "Apache-2.0" + }, "node_modules/wcwidth": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/wcwidth/-/wcwidth-1.0.1.tgz", @@ -7244,6 +7643,22 @@ "integrity": "sha512-ARrjNjtWRRs2w4Tk7nqrf2gBI0QXWuOmMCx2hU+1jUt6d00MjMxURrhxhGbrsoiZKJrhTSTzbIrc554iKI10qw==", "license": "Apache-2.0" }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, "node_modules/which": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/which/-/which-5.0.0.tgz", @@ -7399,6 +7814,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/zlibjs": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/zlibjs/-/zlibjs-0.3.1.tgz", + "integrity": "sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==", + "license": "MIT", + "engines": { + "node": "*" + } + }, "node_modules/zod": { "version": "3.25.76", "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", diff --git a/electron/package.json b/electron/package.json index 10dd69d2..ecbf96c8 100644 --- a/electron/package.json +++ b/electron/package.json @@ -26,10 +26,10 @@ "automation" ], "scripts": { - "start": "electron .", + "start": "electron . --no-sandbox", "dist": "electron-builder", "postinstall": "electron-builder install-app-deps", - "dev": "rm -rf app_dist && tsc && electron .", + "dev": "rm -rf app_dist && tsc && electron . --no-sandbox", "setup:env": "npm install && cd servers/fastapi && uv sync && cd ../../servers/nextjs && npm install && cd ../.. && npm run setup:export-runtime", "install:pyinstaller": "cd servers/fastapi && echo 'pyinstaller already in dependencies'", "build:ts": "rm -rf app_dist && tsc", @@ -51,6 +51,7 @@ "email": "suraj@presenton.ai" }, "dependencies": { + "@llamaindex/liteparse": "^1.4.0", "@puppeteer/browsers": "^1.9.1", "@tailwindcss/cli": "^4.1.5", "@types/uuid": "^10.0.0", diff --git a/electron/resources/document-extraction/liteparse_runner.mjs b/electron/resources/document-extraction/liteparse_runner.mjs new file mode 100644 index 00000000..d8250744 --- /dev/null +++ b/electron/resources/document-extraction/liteparse_runner.mjs @@ -0,0 +1,147 @@ +#!/usr/bin/env node +/** + * CLI bridge for Python: one JSON line on stdout for LiteParse extraction. + * + * OCR follows LlamaIndex LiteParse guidance (built-in Tesseract by default): + * https://developers.llamaindex.ai/liteparse/guides/ocr/ + * + * - ISO 639-3 for Tesseract (eng, fra, deu, jpn, …); multi-lang as "deu+eng" or "deu,eng". + * - Parallel workers ≈ CPU cores − 1 (override --num-workers). + * - Optional HTTP OCR: --ocr-server-url or LITEPARSE_OCR_SERVER_URL. + * - Optional local models: --tessdata-path or LITEPARSE_TESSDATA_PATH (else TESSDATA_PREFIX / CDN). + */ + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { LiteParse } from "@llamaindex/liteparse"; + +function readArg(name) { + const idx = process.argv.indexOf(name); + if (idx === -1) return null; + return process.argv[idx + 1] ?? null; +} + +function parseBool(value, fallback) { + if (value == null || value === "") return fallback; + const s = String(value).trim().toLowerCase(); + if (["1", "true", "yes", "on"].includes(s)) return true; + if (["0", "false", "no", "off"].includes(s)) return false; + return fallback; +} + +function toNumber(value, fallback, min, max) { + if (value == null || value === "") return fallback; + const parsed = Number(value); + if (Number.isNaN(parsed)) return fallback; + return Math.min(Math.max(parsed, min), max); +} + +/** Tesseract accepts "deu+eng"; allow comma-separated CLI/env for convenience. */ +function normalizeOcrLanguage(raw) { + const s = String(raw ?? "").trim(); + if (!s) return "eng"; + if (s.includes(",")) { + return s + .split(",") + .map((p) => p.trim()) + .filter(Boolean) + .join("+"); + } + return s; +} + +function emit(result, exitCode = 0) { + process.stdout.write(`${JSON.stringify(result)}\n`); + process.exit(exitCode); +} + +const filePath = readArg("--file"); +if (!filePath) { + emit({ ok: false, error: "Missing required --file argument" }, 2); +} + +const resolvedPath = path.resolve(filePath); +if (!fs.existsSync(resolvedPath)) { + emit({ ok: false, error: `File not found: ${resolvedPath}` }, 2); +} + +const ocrEnabled = parseBool(readArg("--ocr-enabled"), true); +const dpi = toNumber(readArg("--dpi"), 150, 72, 600); +const numWorkers = toNumber( + readArg("--num-workers"), + Math.max(os.cpus().length - 1, 1), + 1, + 64 +); + +const cliOcrLanguage = readArg("--ocr-language"); +const ocrLanguageRaw = + (process.env.LITEPARSE_OCR_LANGUAGE && String(process.env.LITEPARSE_OCR_LANGUAGE).trim()) || + (cliOcrLanguage && String(cliOcrLanguage).trim()) || + ""; +const ocrLanguage = normalizeOcrLanguage(ocrLanguageRaw || "eng"); + +const outputFormatRaw = (readArg("--output-format") || "text").trim().toLowerCase(); +const outputFormat = outputFormatRaw === "json" ? "json" : "text"; + +const ocrServerUrlArg = readArg("--ocr-server-url"); +const ocrServerUrl = + (ocrServerUrlArg && String(ocrServerUrlArg).trim()) || + (process.env.LITEPARSE_OCR_SERVER_URL && String(process.env.LITEPARSE_OCR_SERVER_URL).trim()) || + undefined; + +const tessdataArg = readArg("--tessdata-path"); +const tessdataPath = + (tessdataArg && String(tessdataArg).trim()) || + (process.env.LITEPARSE_TESSDATA_PATH && String(process.env.LITEPARSE_TESSDATA_PATH).trim()) || + (process.env.TESSDATA_PREFIX && String(process.env.TESSDATA_PREFIX).trim()) || + undefined; + +try { + const config = { + ocrEnabled, + ocrLanguage, + outputFormat, + dpi, + numWorkers, + }; + if (ocrServerUrl) { + config.ocrServerUrl = ocrServerUrl; + } + if (tessdataPath) { + config.tessdataPath = tessdataPath; + } + + const parser = new LiteParse(config); + + const result = await parser.parse(resolvedPath, true); + const text = result?.text ?? ""; + emit({ + ok: true, + filePath: resolvedPath, + text, + pageCount: Array.isArray(result?.pages) ? result.pages.length : 0, + ocr: { + engine: ocrServerUrl ? "http" : "tesseract", + ocrLanguage, + ocrEnabled, + dpi, + numWorkers, + }, + }); +} catch (error) { + const message = error instanceof Error ? error.message : String(error); + const stack = error instanceof Error ? error.stack : undefined; + if (stack) { + process.stderr.write(`${stack}\n`); + } + emit( + { + ok: false, + filePath: resolvedPath, + error: message, + }, + 1 + ); +} diff --git a/electron/resources/document-extraction/package.json b/electron/resources/document-extraction/package.json new file mode 100644 index 00000000..89e4c182 --- /dev/null +++ b/electron/resources/document-extraction/package.json @@ -0,0 +1,8 @@ +{ + "name": "presenton-document-extraction", + "private": true, + "type": "module", + "dependencies": { + "@llamaindex/liteparse": "^1.4.0" + } +} diff --git a/electron/resources/ui/homepage/script.js b/electron/resources/ui/homepage/script.js index 04ba9044..a047db7e 100644 --- a/electron/resources/ui/homepage/script.js +++ b/electron/resources/ui/homepage/script.js @@ -12,6 +12,7 @@ window.addEventListener("DOMContentLoaded", () => { const labelMap = { libreoffice: "LibreOffice", puppeteer: "Chromium", + imagemagick: "ImageMagick", }; const dependenciesEl = document.getElementById("status-dependencies"); @@ -24,6 +25,7 @@ window.addEventListener("DOMContentLoaded", () => { const currentStatus = { libreoffice: "checking", puppeteer: "checking", + imagemagick: "checking", }; function setStatus(name, status) { @@ -83,6 +85,7 @@ window.addEventListener("DOMContentLoaded", () => { if (!statusMap) return; if (statusMap.libreoffice) setStatus("libreoffice", statusMap.libreoffice); if (statusMap.puppeteer) setStatus("puppeteer", statusMap.puppeteer); + if (statusMap.imagemagick) setStatus("imagemagick", statusMap.imagemagick); }); } }); \ No newline at end of file diff --git a/electron/resources/ui/setup-installer/index.html b/electron/resources/ui/setup-installer/index.html index 156311cb..5ff1397b 100644 --- a/electron/resources/ui/setup-installer/index.html +++ b/electron/resources/ui/setup-installer/index.html @@ -141,7 +141,7 @@
📦

Dependencies required

-

Presenton needs LibreOffice and Chrome to create and export presentations. Install them now so everything works.

+

Presenton needs LibreOffice, Chrome, and ImageMagick to create and export presentations reliably. Install them now so everything works.

@@ -212,8 +212,9 @@ diff --git a/electron/servers/fastapi/api/v1/ppt/endpoints/files.py b/electron/servers/fastapi/api/v1/ppt/endpoints/files.py index 5e517aa8..993d8049 100644 --- a/electron/servers/fastapi/api/v1/ppt/endpoints/files.py +++ b/electron/servers/fastapi/api/v1/ppt/endpoints/files.py @@ -4,6 +4,7 @@ from typing import Annotated, List, Optional from fastapi import APIRouter, Body, File, UploadFile from constants.documents import UPLOAD_ACCEPTED_FILE_TYPES +from models.decompose_files_body import DecomposeFilesBody from models.decomposed_file_info import DecomposedFileInfo from services.temp_file_service import TEMP_FILE_SERVICE from services.documents_loader import DocumentsLoader @@ -38,18 +39,21 @@ async def upload_files(files: Optional[List[UploadFile]]): @FILES_ROUTER.post("/decompose", response_model=List[DecomposedFileInfo]) -async def decompose_files(file_paths: Annotated[List[str], Body(embed=True)]): +async def decompose_files(body: DecomposeFilesBody): temp_dir = TEMP_FILE_SERVICE.create_temp_dir(str(uuid.uuid4())) txt_files = [] other_files = [] - for file_path in file_paths: + for file_path in body.file_paths: if file_path.endswith(".txt"): txt_files.append(file_path) else: other_files.append(file_path) - documents_loader = DocumentsLoader(file_paths=other_files) + documents_loader = DocumentsLoader( + file_paths=other_files, + presentation_language=body.language, + ) await documents_loader.load_documents(temp_dir) parsed_documents = documents_loader.documents diff --git a/electron/servers/fastapi/api/v1/ppt/endpoints/outlines.py b/electron/servers/fastapi/api/v1/ppt/endpoints/outlines.py index 764f5b27..e28d1f42 100644 --- a/electron/servers/fastapi/api/v1/ppt/endpoints/outlines.py +++ b/electron/servers/fastapi/api/v1/ppt/endpoints/outlines.py @@ -43,7 +43,10 @@ async def stream_outlines( additional_context = "" if presentation.file_paths: - documents_loader = DocumentsLoader(file_paths=presentation.file_paths) + documents_loader = DocumentsLoader( + file_paths=presentation.file_paths, + presentation_language=presentation.language, + ) await documents_loader.load_documents(temp_dir) documents = documents_loader.documents if documents: diff --git a/electron/servers/fastapi/api/v1/ppt/endpoints/presentation.py b/electron/servers/fastapi/api/v1/ppt/endpoints/presentation.py index cb616c15..fa52de74 100644 --- a/electron/servers/fastapi/api/v1/ppt/endpoints/presentation.py +++ b/electron/servers/fastapi/api/v1/ppt/endpoints/presentation.py @@ -518,7 +518,10 @@ async def generate_presentation_handler( await sql_session.commit() if request.files: - documents_loader = DocumentsLoader(file_paths=request.files) + documents_loader = DocumentsLoader( + file_paths=request.files, + presentation_language=request.language, + ) await documents_loader.load_documents() documents = documents_loader.documents if documents: diff --git a/electron/servers/fastapi/constants/documents.py b/electron/servers/fastapi/constants/documents.py index 9d5fef16..2bb4c3ed 100644 --- a/electron/servers/fastapi/constants/documents.py +++ b/electron/servers/fastapi/constants/documents.py @@ -13,8 +13,9 @@ SPREADSHEET_TYPES = ["text/csv", "application/csv"] PNG_MIME_TYPES = ["image/png"] JPEG_MIME_TYPES = ["image/jpeg"] WEBP_MIME_TYPES = ["image/webp"] +IMAGE_MIME_TYPES = PNG_MIME_TYPES + JPEG_MIME_TYPES + WEBP_MIME_TYPES UPLOAD_ACCEPTED_FILE_TYPES = ( - PDF_MIME_TYPES + TEXT_MIME_TYPES + POWERPOINT_TYPES + WORD_TYPES + PDF_MIME_TYPES + TEXT_MIME_TYPES + POWERPOINT_TYPES + WORD_TYPES + IMAGE_MIME_TYPES ) diff --git a/electron/servers/fastapi/models/decompose_files_body.py b/electron/servers/fastapi/models/decompose_files_body.py new file mode 100644 index 00000000..9caeb88c --- /dev/null +++ b/electron/servers/fastapi/models/decompose_files_body.py @@ -0,0 +1,11 @@ +from typing import List, Optional + +from pydantic import BaseModel, Field + + +class DecomposeFilesBody(BaseModel): + file_paths: List[str] + language: Optional[str] = Field( + default=None, + description="Presentation language from the UI; used as LiteParse/Tesseract OCR language hint.", + ) diff --git a/electron/servers/fastapi/pyproject.toml b/electron/servers/fastapi/pyproject.toml index 18efd476..460739d7 100644 --- a/electron/servers/fastapi/pyproject.toml +++ b/electron/servers/fastapi/pyproject.toml @@ -1,47 +1,45 @@ -[project] -name = "presenton-backend" -version = "0.1.0" -description = "Add your description here" -requires-python = ">=3.11,<3.12" -dependencies = [ - "alembic>=1.14.0", - "aiohttp>=3.12.15", - "aiomysql>=0.2.0", - "aiosqlite>=0.21.0", - "anthropic>=0.60.0", - "asyncpg>=0.30.0", - "dirtyjson>=1.0.8", - # Platform-specific: docling for Linux/macOS only - "docling>=2.43.0; sys_platform != 'win32'", - "fastapi[standard]>=0.116.1", - "fastembed-vectorstore>=0.5.2", - "fastmcp>=2.11.0", - "google-genai>=1.28.0", - # Platform-specific: greenlet for macOS only (critical for SQLAlchemy async) - "greenlet>=3.0.0; sys_platform == 'darwin'", - "nltk>=3.9.1", - "openai>=1.98.0", - "pathvalidate>=3.3.1", - "pdfplumber>=0.11.7", - # Platform-specific: docx2everything for DOCX/Markdown extraction on Windows - "docx2everything>=1.0.0; sys_platform == 'win32'", - "pyinstaller>=6.18.0", - "pytest>=8.4.1", - "python-pptx>=1.0.2; sys_platform == 'win32'", - "redis>=6.2.0", - "sqlmodel>=0.0.24", -] - -[tool.uv] -index-strategy = "unsafe-best-match" - -[[tool.uv.index]] -url = "https://download.pytorch.org/whl/cpu" - -[dependency-groups] -dev = [ -] - -[tool.setuptools.packages.find] -where = ["."] -include = ["api*", "enums*", "models*", "services*", "constants*", "utils*"] +[project] +name = "presenton-backend" +version = "0.1.0" +description = "Add your description here" +requires-python = ">=3.11,<3.12" +dependencies = [ + "alembic>=1.14.0", + "aiohttp>=3.12.15", + "aiomysql>=0.2.0", + "aiosqlite>=0.21.0", + "anthropic>=0.60.0", + "asyncpg>=0.30.0", + "dirtyjson>=1.0.8", + "fastapi[standard]>=0.116.1", + "fastembed-vectorstore>=0.5.2", + "fastmcp>=2.11.0", + "google-genai>=1.28.0", + # Platform-specific: greenlet for macOS only (critical for SQLAlchemy async) + "greenlet>=3.0.0; sys_platform == 'darwin'", + "nltk>=3.9.1", + "openai>=1.98.0", + "pathvalidate>=3.3.1", + "pdfplumber>=0.11.7", + # Platform-specific: docx2everything for DOCX/Markdown extraction on Windows + "docx2everything>=1.0.0; sys_platform == 'win32'", + "pyinstaller>=6.18.0", + "pytest>=8.4.1", + "python-pptx>=1.0.2", + "redis>=6.2.0", + "sqlmodel>=0.0.24", +] + +[tool.uv] +index-strategy = "unsafe-best-match" + +[[tool.uv.index]] +url = "https://download.pytorch.org/whl/cpu" + +[dependency-groups] +dev = [ +] + +[tool.setuptools.packages.find] +where = ["."] +include = ["api*", "enums*", "models*", "services*", "constants*", "utils*"] diff --git a/electron/servers/fastapi/runtime_hook_docling.py b/electron/servers/fastapi/runtime_hook_docling.py deleted file mode 100644 index b13aed51..00000000 --- a/electron/servers/fastapi/runtime_hook_docling.py +++ /dev/null @@ -1,78 +0,0 @@ -""" -Runtime hook to fix docling metadata lookup and python-docx template path resolution in PyInstaller builds. - -PyInstaller doesn't always preserve package metadata (dist-info) in a way that -importlib.metadata can find it. This hook patches the version lookup to return -a default version if metadata isn't found, allowing docling to import successfully. - -Additionally, python-docx uses __file__ to locate template files, which doesn't work -correctly in PyInstaller bundles. This hook patches the path resolution to use -sys._MEIPASS to find the templates. -""" -import sys -import os - -# Only apply this fix when running in PyInstaller bundle -if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'): - try: - import importlib.metadata - - # Store original version function - _original_version = importlib.metadata.version - - def _patched_version(package_name): - """Patched version that handles missing metadata gracefully.""" - try: - return _original_version(package_name) - except importlib.metadata.PackageNotFoundError: - # For docling packages, return a default version if metadata not found - if package_name in ('docling', 'docling-core', 'docling-parse', 'docling-ibm-models'): - # Return a reasonable default version to allow import to proceed - return '2.43.0' - raise - - # Patch the version function - importlib.metadata.version = _patched_version - - except Exception: - # If patching fails, continue anyway - pass - - # Fix python-docx template path resolution - try: - import docx.parts.hdrftr as hdrftr_module - - # Store the original _default_header_xml function - if hasattr(hdrftr_module, '_default_header_xml'): - _original_default_header_xml = hdrftr_module._default_header_xml - - def _patched_default_header_xml(): - """Patched function that resolves template path correctly in PyInstaller bundle.""" - # Try to find the template file in the bundle - template_path = os.path.join(sys._MEIPASS, 'docx', 'templates', 'default-header.xml') - if os.path.exists(template_path): - with open(template_path, 'rb') as f: - return f.read() - # Fallback to original implementation - return _original_default_header_xml() - - # Patch the function - hdrftr_module._default_header_xml = _patched_default_header_xml - - # Also patch _default_footer_xml if it exists - if hasattr(hdrftr_module, '_default_footer_xml'): - _original_default_footer_xml = hdrftr_module._default_footer_xml - - def _patched_default_footer_xml(): - """Patched function that resolves template path correctly in PyInstaller bundle.""" - template_path = os.path.join(sys._MEIPASS, 'docx', 'templates', 'default-footer.xml') - if os.path.exists(template_path): - with open(template_path, 'rb') as f: - return f.read() - return _original_default_footer_xml() - - hdrftr_module._default_footer_xml = _patched_default_footer_xml - - except Exception: - # If patching fails, continue anyway - pass \ No newline at end of file diff --git a/electron/servers/fastapi/server.spec b/electron/servers/fastapi/server.spec index b4220572..bb556d92 100644 --- a/electron/servers/fastapi/server.spec +++ b/electron/servers/fastapi/server.spec @@ -17,18 +17,6 @@ datas_docx2everything, binaries_docx2everything, hiddenimports_docx2everything = # collect_all returns empty lists if package not installed, so safe to call always datas_greenlet, binaries_greenlet, hiddenimports_greenlet = collect_all('greenlet') -# Collect docling - only installed on Linux/macOS (via pyproject.toml) -# collect_all returns empty lists if package not installed, so safe to call always -datas_docling, binaries_docling, hiddenimports_docling = collect_all('docling') -# Also collect docling dependencies which are needed for metadata lookup -datas_docling_core, binaries_docling_core, hiddenimports_docling_core = collect_all('docling-core') -datas_docling_parse, binaries_docling_parse, hiddenimports_docling_parse = collect_all('docling-parse') -datas_docling_ibm, binaries_docling_ibm, hiddenimports_docling_ibm = collect_all('docling-ibm-models') - -# Collect python-docx (dependency of docling) - needed for Word document processing on Linux/macOS -# collect_all returns empty lists if package not installed, so safe to call conditionally -datas_docx, binaries_docx, hiddenimports_docx = collect_all('docx') - # fastembed_cache is created at runtime when models are first used; include only if present (e.g. local dev) datas_fastembed_cache = [('fastembed_cache', 'fastembed_cache')] if os.path.isdir('fastembed_cache') else [] @@ -37,12 +25,12 @@ excludes = [] a = Analysis( ['server.py'], pathex=[], - binaries=binaries_fastembed + binaries_fastembed_vs + binaries_onnx + binaries_pptx + binaries_docx2everything + binaries_greenlet + binaries_docling + binaries_docling_core + binaries_docling_parse + binaries_docling_ibm + binaries_docx, + binaries=binaries_fastembed + binaries_fastembed_vs + binaries_onnx + binaries_pptx + binaries_docx2everything + binaries_greenlet, datas=[ ('assets', 'assets'), ('static', 'static'), ('alembic', 'alembic'), - ] + datas_fastembed_cache + datas_fastembed + datas_fastembed_vs + datas_onnx + datas_pptx + datas_docx2everything + datas_greenlet + datas_docling + datas_docling_core + datas_docling_parse + datas_docling_ibm + datas_docx, + ] + datas_fastembed_cache + datas_fastembed + datas_fastembed_vs + datas_onnx + datas_pptx + datas_docx2everything + datas_greenlet, hiddenimports=[ 'aiosqlite', 'alembic', @@ -52,10 +40,10 @@ a = Analysis( 'greenlet', 'greenlet._greenlet', 'importlib.metadata', - ] + hiddenimports_fastembed + hiddenimports_fastembed_vs + hiddenimports_onnx + hiddenimports_pptx + hiddenimports_docx2everything + hiddenimports_greenlet + hiddenimports_docling + hiddenimports_docling_core + hiddenimports_docling_parse + hiddenimports_docling_ibm + hiddenimports_docx, + ] + hiddenimports_fastembed + hiddenimports_fastembed_vs + hiddenimports_onnx + hiddenimports_pptx + hiddenimports_docx2everything + hiddenimports_greenlet, hookspath=[], hooksconfig={}, - runtime_hooks=['runtime_hook_docling.py'], + runtime_hooks=[], excludes=excludes, noarchive=False, optimize=0, diff --git a/electron/servers/fastapi/services/docling_service.py b/electron/servers/fastapi/services/docling_service.py deleted file mode 100644 index a04430f9..00000000 --- a/electron/servers/fastapi/services/docling_service.py +++ /dev/null @@ -1,38 +0,0 @@ -from docling.document_converter import ( - DocumentConverter, - PdfFormatOption, - PowerpointFormatOption, - WordFormatOption, -) -from docling.datamodel.pipeline_options import PdfPipelineOptions -from docling.datamodel.base_models import InputFormat -from utils.path_helpers import patch_python_docx_templates - - -class DoclingService: - def __init__(self): - # Patch python-docx template path resolution before initializing converter - # This is safe to call in any environment (Docker, development, PyInstaller) - patch_python_docx_templates() - - self.pipeline_options = PdfPipelineOptions() - self.pipeline_options.do_ocr = False - - self.converter = DocumentConverter( - allowed_formats=[InputFormat.PPTX, InputFormat.PDF, InputFormat.DOCX], - format_options={ - InputFormat.DOCX: WordFormatOption( - pipeline_options=self.pipeline_options, - ), - InputFormat.PPTX: PowerpointFormatOption( - pipeline_options=self.pipeline_options, - ), - InputFormat.PDF: PdfFormatOption( - pipeline_options=self.pipeline_options, - ), - }, - ) - - def parse_to_markdown(self, file_path: str) -> str: - result = self.converter.convert(file_path) - return result.document.export_to_markdown() diff --git a/electron/servers/fastapi/services/documents_loader.py b/electron/servers/fastapi/services/documents_loader.py index a84e393a..f9bc6b23 100644 --- a/electron/servers/fastapi/services/documents_loader.py +++ b/electron/servers/fastapi/services/documents_loader.py @@ -1,45 +1,37 @@ import mimetypes -import sys from fastapi import HTTPException import os, asyncio from typing import List, Optional, Tuple import pdfplumber from constants.documents import ( + IMAGE_MIME_TYPES, PDF_MIME_TYPES, POWERPOINT_TYPES, TEXT_MIME_TYPES, WORD_TYPES, ) +from services.liteparse_service import LiteParseError, LiteParseService +from utils.ocr_language import presentation_language_to_ocr_code -# Platform-specific document service imports -is_windows = sys.platform == 'win32' -if not is_windows: - from services.docling_service import DoclingService - DocumentService = None -else: - DoclingService = None +# Optional fallback converter (primarily useful on Windows) +try: from services.lightweight_document_service import DocumentService +except Exception: + DocumentService = None class DocumentsLoader: - def __init__(self, file_paths: List[str]): + def __init__( + self, + file_paths: List[str], + presentation_language: Optional[str] = None, + ): self._file_paths = file_paths - - # Initialize document service based on platform - if not is_windows and DoclingService is not None: - # Use DoclingService on Linux/macOS - self.docling_service = DoclingService() - self.document_service = None - elif is_windows and DocumentService is not None: - # Use lightweight DocumentService on Windows - self.docling_service = None - self.document_service = DocumentService() - else: - # Fallback if neither is available - self.docling_service = None - self.document_service = None + self._ocr_language = presentation_language_to_ocr_code(presentation_language) + self.liteparse_service = LiteParseService() + self.document_service = DocumentService() if DocumentService is not None else None self._documents: List[str] = [] self._images: List[List[str]] = [] @@ -83,6 +75,8 @@ class DocumentsLoader: document = self.load_powerpoint(file_path) elif mime_type in WORD_TYPES: document = self.load_msword(file_path) + elif mime_type in IMAGE_MIME_TYPES: + document = self.load_image(file_path) documents.append(document) images.append(imgs) @@ -101,43 +95,43 @@ class DocumentsLoader: document: str = "" if load_text: - document = await self.load_text_from_pdf_locally(file_path) + document = await asyncio.to_thread(self._parse_with_liteparse, file_path) if load_images: image_paths = await self.get_page_images_from_pdf_async(file_path, temp_dir) return document, image_paths - async def load_text_from_pdf_locally(self, file_path: str) -> str: - return await asyncio.to_thread(self._extract_text_from_pdf, file_path) - - @staticmethod - def _extract_text_from_pdf(file_path: str) -> str: - texts: List[str] = [] - with pdfplumber.open(file_path) as pdf: - for idx, page in enumerate(pdf.pages): - page_text = f"## Page {idx + 1}\n" - page_text += page.extract_text() or "" - texts.append(page_text) - return "\n\n".join(texts) - async def load_text(self, file_path: str) -> str: with open(file_path, "r", encoding="utf-8") as file: return await asyncio.to_thread(file.read) def load_msword(self, file_path: str) -> str: - if self.docling_service is not None: - return self.docling_service.parse_to_markdown(file_path) - elif self.document_service is not None: - return self.document_service.parse_to_markdown(file_path) - return "" # Document service not available + return self._parse_with_liteparse(file_path) def load_powerpoint(self, file_path: str) -> str: - if self.docling_service is not None: - return self.docling_service.parse_to_markdown(file_path) - elif self.document_service is not None: - return self.document_service.parse_to_markdown(file_path) - return "" # Document service not available + return self._parse_with_liteparse(file_path) + + def load_image(self, file_path: str) -> str: + return self._parse_with_liteparse(file_path) + + def _parse_with_liteparse(self, file_path: str) -> str: + try: + return self.liteparse_service.parse_to_markdown( + file_path, + ocr_enabled=True, + ocr_language=self._ocr_language, + ) + except LiteParseError as exc: + if self.document_service is not None: + try: + return self.document_service.parse_to_markdown(file_path) + except Exception: + pass + raise HTTPException( + status_code=500, + detail=f"Failed to parse document {os.path.basename(file_path)}: {exc}", + ) from exc @classmethod def get_page_images_from_pdf(cls, file_path: str, temp_dir: str) -> List[str]: diff --git a/electron/servers/fastapi/services/lightweight_document_service.py b/electron/servers/fastapi/services/lightweight_document_service.py index 8aaf24d0..b89e4325 100644 --- a/electron/servers/fastapi/services/lightweight_document_service.py +++ b/electron/servers/fastapi/services/lightweight_document_service.py @@ -1,177 +1,177 @@ -""" -Lightweight document converter for Windows/MSIX compatibility. -Uses pure-Python libraries: pdfplumber for PDF, docx2txt for DOCX, python-pptx for PPTX. -No subprocess, no external runtimes, MSIX/Appx safe. -""" -import os -from typing import List, Optional - -import docx2everything -import pdfplumber -from pptx import Presentation - - -class LightweightDocumentConverter: - """Lightweight document converter supporting PDF, DOCX, and PPTX.""" - - def convert(self, file_path: str) -> str: - """ - Convert document to markdown text. - - Args: - file_path: Path to the document file - - Returns: - Extracted text in markdown format - - Raises: - ValueError: If file format is not supported - FileNotFoundError: If file does not exist - """ - if not os.path.exists(file_path): - raise FileNotFoundError(f"File not found: {file_path}") - - file_ext = os.path.splitext(file_path)[1].lower() - - if file_ext == '.pdf': - return self._convert_pdf(file_path) - elif file_ext == '.docx': - return self._convert_docx(file_path) - elif file_ext == '.pptx': - return self._convert_pptx(file_path) - else: - raise ValueError(f"Unsupported file format: {file_ext}") - - def _convert_pdf(self, path: str) -> str: - """ - Convert PDF to markdown using pdfplumber. - - Args: - path: Path to PDF file - - Returns: - Extracted text in markdown format - """ - texts: List[str] = [] - with pdfplumber.open(path) as pdf: - for idx, page in enumerate(pdf.pages): - page_text = f"## Page {idx + 1}\n" - page_text += page.extract_text() or "" - texts.append(page_text) - return "\n\n".join(texts) - - def _convert_docx(self, path: str) -> str: - """ - Extract markdown from DOCX using docx2everything (no images). - - Args: - path: Path to DOCX file - - Returns: - Extracted markdown (no images) - """ - # Use the correct API: process_to_markdown(path) without img_dir extracts markdown without images - markdown = docx2everything.process_to_markdown(path) - return markdown if markdown else "" - - def _convert_pptx(self, path: str) -> str: - """ - Convert PPTX to markdown using python-pptx. - - Args: - path: Path to PPTX file - - Returns: - Extracted text in markdown format - """ - prs = Presentation(path) - markdown_parts = [] - - for slide_num, slide in enumerate(prs.slides, start=1): - slide_parts = [] - - # Extract slide title (usually first shape with title placeholder) - title_text = None - for shape in slide.shapes: - if hasattr(shape, "placeholder"): - if shape.placeholder.placeholder_format.type == 1: # Title placeholder - if hasattr(shape, "text") and shape.text.strip(): - title_text = shape.text.strip() - break - - # If no title placeholder found, try to find text box at top - if not title_text: - for shape in slide.shapes: - if hasattr(shape, "text") and shape.text.strip(): - # Check if it's likely a title (first text shape, short text) - text = shape.text.strip() - if len(text) < 200: # Heuristic: titles are usually short - title_text = text - break - - # Add slide title - if title_text: - slide_parts.append(f"# {title_text}") - else: - slide_parts.append(f"# Slide {slide_num}") - - # Extract content (bullet points and text) - for shape in slide.shapes: - if not hasattr(shape, "text"): - continue - - text = shape.text.strip() - if not text: - continue - - # Skip if this is the title we already added - if title_text and text == title_text: - continue - - # Check if it's a text frame with paragraphs (bullet points) - if hasattr(shape, "text_frame"): - paragraphs = shape.text_frame.paragraphs - if len(paragraphs) > 1: - # Multiple paragraphs - likely bullet points - for para in paragraphs: - para_text = para.text.strip() - if para_text: - # Check bullet level - level = para.level - indent = " " * level - slide_parts.append(f"{indent}- {para_text}") - else: - # Single paragraph - if text and text != title_text: - slide_parts.append(text) - else: - # Plain text shape - if text and text != title_text: - slide_parts.append(text) - - if slide_parts: - markdown_parts.append("\n".join(slide_parts)) - - return "\n\n---\n\n".join(markdown_parts) - - -class DocumentService: - """ - Document service wrapper providing parse_to_markdown interface. - Compatible with DoclingService interface for easy swapping. - """ - - def __init__(self): - self.converter = LightweightDocumentConverter() - - def parse_to_markdown(self, file_path: str) -> str: - """ - Parse document to markdown format. - - Args: - file_path: Path to the document file - - Returns: - Extracted text in markdown format - """ - return self.converter.convert(file_path) +""" +Lightweight document converter for Windows/MSIX compatibility. +Uses pure-Python libraries: pdfplumber for PDF, docx2txt for DOCX, python-pptx for PPTX. +No subprocess, no external runtimes, MSIX/Appx safe. +""" +import os +from typing import List, Optional + +import docx2everything +import pdfplumber +from pptx import Presentation + + +class LightweightDocumentConverter: + """Lightweight document converter supporting PDF, DOCX, and PPTX.""" + + def convert(self, file_path: str) -> str: + """ + Convert document to markdown text. + + Args: + file_path: Path to the document file + + Returns: + Extracted text in markdown format + + Raises: + ValueError: If file format is not supported + FileNotFoundError: If file does not exist + """ + if not os.path.exists(file_path): + raise FileNotFoundError(f"File not found: {file_path}") + + file_ext = os.path.splitext(file_path)[1].lower() + + if file_ext == '.pdf': + return self._convert_pdf(file_path) + elif file_ext == '.docx': + return self._convert_docx(file_path) + elif file_ext == '.pptx': + return self._convert_pptx(file_path) + else: + raise ValueError(f"Unsupported file format: {file_ext}") + + def _convert_pdf(self, path: str) -> str: + """ + Convert PDF to markdown using pdfplumber. + + Args: + path: Path to PDF file + + Returns: + Extracted text in markdown format + """ + texts: List[str] = [] + with pdfplumber.open(path) as pdf: + for idx, page in enumerate(pdf.pages): + page_text = f"## Page {idx + 1}\n" + page_text += page.extract_text() or "" + texts.append(page_text) + return "\n\n".join(texts) + + def _convert_docx(self, path: str) -> str: + """ + Extract markdown from DOCX using docx2everything (no images). + + Args: + path: Path to DOCX file + + Returns: + Extracted markdown (no images) + """ + # Use the correct API: process_to_markdown(path) without img_dir extracts markdown without images + markdown = docx2everything.process_to_markdown(path) + return markdown if markdown else "" + + def _convert_pptx(self, path: str) -> str: + """ + Convert PPTX to markdown using python-pptx. + + Args: + path: Path to PPTX file + + Returns: + Extracted text in markdown format + """ + prs = Presentation(path) + markdown_parts = [] + + for slide_num, slide in enumerate(prs.slides, start=1): + slide_parts = [] + + # Extract slide title (usually first shape with title placeholder) + title_text = None + for shape in slide.shapes: + if hasattr(shape, "placeholder"): + if shape.placeholder.placeholder_format.type == 1: # Title placeholder + if hasattr(shape, "text") and shape.text.strip(): + title_text = shape.text.strip() + break + + # If no title placeholder found, try to find text box at top + if not title_text: + for shape in slide.shapes: + if hasattr(shape, "text") and shape.text.strip(): + # Check if it's likely a title (first text shape, short text) + text = shape.text.strip() + if len(text) < 200: # Heuristic: titles are usually short + title_text = text + break + + # Add slide title + if title_text: + slide_parts.append(f"# {title_text}") + else: + slide_parts.append(f"# Slide {slide_num}") + + # Extract content (bullet points and text) + for shape in slide.shapes: + if not hasattr(shape, "text"): + continue + + text = shape.text.strip() + if not text: + continue + + # Skip if this is the title we already added + if title_text and text == title_text: + continue + + # Check if it's a text frame with paragraphs (bullet points) + if hasattr(shape, "text_frame"): + paragraphs = shape.text_frame.paragraphs + if len(paragraphs) > 1: + # Multiple paragraphs - likely bullet points + for para in paragraphs: + para_text = para.text.strip() + if para_text: + # Check bullet level + level = para.level + indent = " " * level + slide_parts.append(f"{indent}- {para_text}") + else: + # Single paragraph + if text and text != title_text: + slide_parts.append(text) + else: + # Plain text shape + if text and text != title_text: + slide_parts.append(text) + + if slide_parts: + markdown_parts.append("\n".join(slide_parts)) + + return "\n\n---\n\n".join(markdown_parts) + + +class DocumentService: + """ + Document service wrapper providing parse_to_markdown interface. + Same parse_to_markdown entry point as LiteParseService for optional Windows fallback. + """ + + def __init__(self): + self.converter = LightweightDocumentConverter() + + def parse_to_markdown(self, file_path: str) -> str: + """ + Parse document to markdown format. + + Args: + file_path: Path to the document file + + Returns: + Extracted text in markdown format + """ + return self.converter.convert(file_path) diff --git a/electron/servers/fastapi/services/liteparse_service.py b/electron/servers/fastapi/services/liteparse_service.py new file mode 100644 index 00000000..eacaaca6 --- /dev/null +++ b/electron/servers/fastapi/services/liteparse_service.py @@ -0,0 +1,197 @@ +import json +import os +import subprocess +from typing import Any, Dict, Tuple + + +class LiteParseError(Exception): + pass + + +class LiteParseService: + def __init__(self, timeout_seconds: int = 180): + self.timeout_seconds = timeout_seconds + self.node_binary = os.getenv("LITEPARSE_NODE_BINARY", "node") + self.runner_path = os.getenv("LITEPARSE_RUNNER_PATH", self._resolve_runner_path()) + self.runner_dir = os.path.dirname(self.runner_path) + self._npm_project_root = self._resolve_npm_project_root() + + def _resolve_npm_project_root(self) -> str: + """Directory whose node_modules contains @llamaindex/liteparse (runner dir or Electron app root).""" + local_nm = os.path.join( + self.runner_dir, "node_modules", "@llamaindex", "liteparse" + ) + if os.path.isdir(local_nm): + return self.runner_dir + electron_nm = os.path.abspath( + os.path.join(self.runner_dir, "..", "..", "node_modules", "@llamaindex", "liteparse") + ) + if os.path.isdir(electron_nm): + return os.path.abspath(os.path.join(self.runner_dir, "..", "..")) + return os.path.abspath(os.path.join(self.runner_dir, "..", "..")) + + @staticmethod + def _resolve_runner_path() -> str: + cwd = os.path.abspath(".") + candidates = [ + # electron/servers/fastapi → electron/resources/... + os.path.abspath( + os.path.join( + cwd, "..", "..", "resources", "document-extraction", "liteparse_runner.mjs" + ) + ), + # servers/fastapi (repo root layout) → electron/resources/... + os.path.abspath( + os.path.join( + cwd, + "..", + "..", + "electron", + "resources", + "document-extraction", + "liteparse_runner.mjs", + ) + ), + # PyInstaller bundle layout + os.path.abspath( + os.path.join( + cwd, "..", "..", "app", "resources", "document-extraction", "liteparse_runner.mjs" + ) + ), + # Docker / explicit layout + "/app/document-extraction-liteparse/liteparse_runner.mjs", + ] + for path in candidates: + if os.path.isfile(path): + return path + return candidates[0] + + def check_runtime_ready(self) -> Tuple[bool, str]: + if not os.path.isfile(self.runner_path): + return False, f"LiteParse runner not found at: {self.runner_path}" + + try: + subprocess.run( + [self.node_binary, "--version"], + cwd=self.runner_dir, + check=True, + capture_output=True, + text=True, + timeout=10, + ) + except Exception as exc: + return False, f"Node.js runtime is unavailable: {exc}" + + liteparse_dir = os.path.join( + self._npm_project_root, "node_modules", "@llamaindex", "liteparse" + ) + if not os.path.isdir(liteparse_dir): + return ( + False, + f"LiteParse npm package missing at {liteparse_dir}. Run npm install in the Electron app directory.", + ) + + # @llamaindex/liteparse is ESM-only; require.resolve() fails. Use dynamic import. + try: + subprocess.run( + [ + self.node_binary, + "--input-type=module", + "-e", + "import '@llamaindex/liteparse'", + ], + cwd=self._npm_project_root, + check=True, + capture_output=True, + text=True, + timeout=20, + ) + except Exception as exc: + return False, f"LiteParse dependency is unavailable: {exc}" + + return True, "ok" + + def parse_to_markdown( + self, + file_path: str, + ocr_enabled: bool = True, + ocr_language: str = "eng", + ) -> str: + result = self.parse( + file_path=file_path, + ocr_enabled=ocr_enabled, + ocr_language=ocr_language, + ) + return str(result.get("text") or "") + + def parse( + self, + file_path: str, + ocr_enabled: bool = True, + ocr_language: str = "eng", + ) -> Dict[str, Any]: + is_ready, reason = self.check_runtime_ready() + if not is_ready: + raise LiteParseError(reason) + + command = [ + self.node_binary, + self.runner_path, + "--file", + file_path, + "--ocr-enabled", + "true" if ocr_enabled else "false", + "--ocr-language", + ocr_language, + ] + ocr_server = (os.getenv("LITEPARSE_OCR_SERVER_URL") or "").strip() + if ocr_server: + command.extend(["--ocr-server-url", ocr_server]) + tessdata = (os.getenv("LITEPARSE_TESSDATA_PATH") or "").strip() + if tessdata: + command.extend(["--tessdata-path", tessdata]) + + process = subprocess.run( + command, + cwd=self._npm_project_root, + capture_output=True, + text=True, + timeout=self.timeout_seconds, + env=os.environ.copy(), + ) + payload = self._decode_runner_output(process.stdout) + + if process.returncode != 0: + message = payload.get("error") or process.stderr.strip() or "Unknown error" + raise LiteParseError(message) + + if not payload.get("ok"): + raise LiteParseError(payload.get("error") or "LiteParse parse failed") + + return payload + + @staticmethod + def _decode_runner_output(stdout: str) -> Dict[str, Any]: + raw = (stdout or "").lstrip("\ufeff").strip() + if not raw: + raise LiteParseError("LiteParse runner returned empty output") + + # Prefer the last line that parses as JSON (handles stray log lines before our payload). + lines = [line.strip() for line in raw.splitlines() if line.strip()] + for line in reversed(lines): + try: + parsed = json.loads(line) + if isinstance(parsed, dict): + return parsed + except json.JSONDecodeError: + continue + + # Single blob without newlines (entire stdout is one JSON object). + try: + parsed = json.loads(raw) + if isinstance(parsed, dict): + return parsed + except json.JSONDecodeError: + pass + + raise LiteParseError("LiteParse runner returned invalid JSON output") diff --git a/electron/servers/fastapi/utils/ocr_language.py b/electron/servers/fastapi/utils/ocr_language.py new file mode 100644 index 00000000..aa988f27 --- /dev/null +++ b/electron/servers/fastapi/utils/ocr_language.py @@ -0,0 +1,126 @@ +""" +Map presentation UI language strings (LanguageType enum values from Next.js) to +Tesseract / LiteParse OCR language codes (ISO 639-3 where applicable). + +Keep keys in sync with: +electron/servers/nextjs/app/(presentation-generator)/upload/type.ts → LanguageType +""" + +from __future__ import annotations + +import re +from typing import Optional + +# Values must match `LanguageType` string literals in the upload UI. +PRESENTATION_LANGUAGE_TO_TESSERACT: dict[str, str] = { + "English": "eng", + "Spanish (Español)": "spa", + "French (Français)": "fra", + "German (Deutsch)": "deu", + "Portuguese (Português)": "por", + "Italian (Italiano)": "ita", + "Dutch (Nederlands)": "nld", + "Russian (Русский)": "rus", + "Chinese (Simplified - 中文, 汉语)": "chi_sim", + "Chinese (Traditional - 中文, 漢語)": "chi_tra", + "Japanese (日本語)": "jpn", + "Korean (한국어)": "kor", + "Arabic (العربية)": "ara", + "Hindi (हिन्दी)": "hin", + "Bengali (বাংলা)": "ben", + "Polish (Polski)": "pol", + "Czech (Čeština)": "ces", + "Slovak (Slovenčina)": "slk", + "Hungarian (Magyar)": "hun", + "Romanian (Română)": "ron", + "Bulgarian (Български)": "bul", + "Greek (Ελληνικά)": "ell", + "Serbian (Српски / Srpski)": "srp", + "Croatian (Hrvatski)": "hrv", + "Bosnian (Bosanski)": "bos", + "Slovenian (Slovenščina)": "slv", + "Finnish (Suomi)": "fin", + "Swedish (Svenska)": "swe", + "Danish (Dansk)": "dan", + "Norwegian (Norsk)": "nor", + "Icelandic (Íslenska)": "isl", + "Lithuanian (Lietuvių)": "lit", + "Latvian (Latviešu)": "lav", + "Estonian (Eesti)": "est", + "Maltese (Malti)": "mlt", + "Welsh (Cymraeg)": "cym", + "Irish (Gaeilge)": "gle", + "Scottish Gaelic (Gàidhlig)": "gla", + "Ukrainian (Українська)": "ukr", + "Hebrew (עברית)": "heb", + "Persian/Farsi (فارسی)": "fas", + "Turkish (Türkçe)": "tur", + "Kurdish (Kurdî / کوردی)": "kmr", + "Pashto (پښتو)": "pus", + "Dari (دری)": "prs", + "Uzbek (Oʻzbek)": "uzb", + "Kazakh (Қазақша)": "kaz", + "Tajik (Тоҷикӣ)": "tgk", + "Turkmen (Türkmençe)": "tuk", + "Azerbaijani (Azərbaycan dili)": "aze", + "Urdu (اردو)": "urd", + "Tamil (தமிழ்)": "tam", + "Telugu (తెలుగు)": "tel", + "Marathi (मराठी)": "mar", + "Punjabi (ਪੰਜਾਬੀ / پنجابی)": "pan", + "Gujarati (ગુજરાતી)": "guj", + "Malayalam (മലയാളം)": "mal", + "Kannada (ಕನ್ನಡ)": "kan", + "Odia (ଓଡ଼ିଆ)": "ori", + "Sinhala (සිංහල)": "sin", + "Nepali (नेपाली)": "nep", + "Thai (ไทย)": "tha", + "Vietnamese (Tiếng Việt)": "vie", + "Lao (ລາວ)": "lao", + "Khmer (ភាសាខ្មែរ)": "khm", + "Burmese (မြန်မာစာ)": "mya", + "Tagalog/Filipino (Tagalog/Filipino)": "tgl", + "Javanese (Basa Jawa)": "jav", + "Sundanese (Basa Sunda)": "sun", + "Malay (Bahasa Melayu)": "msa", + "Mongolian (Монгол)": "mon", + "Swahili (Kiswahili)": "swa", + "Hausa (Hausa)": "hau", + "Yoruba (Yorùbá)": "yor", + "Igbo (Igbo)": "ibo", + "Amharic (አማርኛ)": "amh", + "Zulu (isiZulu)": "zul", + "Xhosa (isiXhosa)": "xho", + "Shona (ChiShona)": "sna", + "Somali (Soomaaliga)": "som", + "Basque (Euskara)": "eus", + "Catalan (Català)": "cat", + "Galician (Galego)": "glg", + "Quechua (Runasimi)": "que", + "Nahuatl (Nāhuatl)": "nah", + "Hawaiian (ʻŌlelo Hawaiʻi)": "haw", + "Maori (Te Reo Māori)": "mri", + # No dedicated Tahitian traineddata in default Tesseract bundles. + "Tahitian (Reo Tahiti)": "eng", + "Samoan (Gagana Samoa)": "smo", +} + +_LOWER_MAP = {k.lower(): v for k, v in PRESENTATION_LANGUAGE_TO_TESSERACT.items()} + +_OCR_CODE_RE = re.compile(r"^[a-zA-Z0-9_,+]+$") + + +def presentation_language_to_ocr_code(language: Optional[str]) -> str: + """Resolve UI language label to a Tesseract language code; default English.""" + if language is None: + return "eng" + s = str(language).strip() + if not s: + return "eng" + if s in PRESENTATION_LANGUAGE_TO_TESSERACT: + code = PRESENTATION_LANGUAGE_TO_TESSERACT[s] + else: + code = _LOWER_MAP.get(s.lower(), "eng") + if not _OCR_CODE_RE.fullmatch(code): + return "eng" + return code diff --git a/electron/servers/fastapi/utils/path_helpers.py b/electron/servers/fastapi/utils/path_helpers.py index 60bff599..5cfbfb79 100644 --- a/electron/servers/fastapi/utils/path_helpers.py +++ b/electron/servers/fastapi/utils/path_helpers.py @@ -156,7 +156,7 @@ def patch_python_docx_templates(): - Docker/Development: Returns immediately without patching (no-op) - PyInstaller: Patches the template loading functions - Note: This should be called before using docling service in PyInstaller bundles. + Note: Call before any code path that uses python-docx inside a PyInstaller bundle. """ # Only patch if running in PyInstaller bundle # This check ensures Docker and development environments are unaffected diff --git a/electron/servers/fastapi/uv.lock b/electron/servers/fastapi/uv.lock index 74a53a69..3a6c2973 100644 --- a/electron/servers/fastapi/uv.lock +++ b/electron/servers/fastapi/uv.lock @@ -7,25 +7,6 @@ resolution-markers = [ "sys_platform == 'darwin'", ] -[[package]] -name = "accelerate" -version = "1.12.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "huggingface-hub" }, - { name = "numpy" }, - { name = "packaging" }, - { name = "psutil" }, - { name = "pyyaml" }, - { name = "safetensors" }, - { name = "torch", version = "2.10.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/4a/8e/ac2a9566747a93f8be36ee08532eb0160558b07630a081a6056a9f89bf1d/accelerate-1.12.0.tar.gz", hash = "sha256:70988c352feb481887077d2ab845125024b2a137a5090d6d7a32b57d03a45df6", size = 398399, upload-time = "2025-11-21T11:27:46.973Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9f/d2/c581486aa6c4fbd7394c23c47b83fa1a919d34194e16944241daf9e762dd/accelerate-1.12.0-py3-none-any.whl", hash = "sha256:3e2091cd341423207e2f084a6654b1efcd250dc326f2a37d6dde446e07cabb11", size = 380935, upload-time = "2025-11-21T11:27:44.522Z" }, -] - [[package]] name = "aiohappyeyeballs" version = "2.6.1" @@ -156,12 +137,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/bb/d84f287fb1c217b30c328af987cf8bbe3897edf0518dcc5fa39412f794ec/anthropic-0.60.0-py3-none-any.whl", hash = "sha256:65ad1f088a960217aaf82ba91ff743d6c89e9d811c6d64275b9a7c59ee9ac3c6", size = 293116, upload-time = "2025-07-28T19:53:45.944Z" }, ] -[[package]] -name = "antlr4-python3-runtime" -version = "4.9.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d19459005ca000b6e7012f2f1ca597746cbcd1fbfe5e/antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b", size = 117034, upload-time = "2021-11-06T17:52:23.524Z" } - [[package]] name = "anyio" version = "4.9.0" @@ -222,19 +197,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f9/58/cc6a08053f822f98f334d38a27687b69c6655fb05cd74a7a5e70a2aeed95/authlib-1.6.1-py2.py3-none-any.whl", hash = "sha256:e9d2031c34c6309373ab845afc24168fe9e93dc52d252631f52642f21f5ed06e", size = 239299, upload-time = "2025-07-20T07:38:39.259Z" }, ] -[[package]] -name = "beautifulsoup4" -version = "4.14.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "soupsieve" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c3/b0/1c6a16426d389813b48d95e26898aff79abbde42ad353958ad95cc8c9b21/beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86", size = 627737, upload-time = "2025-11-30T15:08:26.084Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" }, -] - [[package]] name = "certifi" version = "2025.8.3" @@ -309,18 +271,6 @@ wheels = [ { url = "https://download.pytorch.org/whl/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" }, ] -[[package]] -name = "colorlog" -version = "6.10.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a2/61/f083b5ac52e505dfc1c624eafbf8c7589a0d7f32daa398d2e7590efa5fda/colorlog-6.10.1.tar.gz", hash = "sha256:eb4ae5cb65fe7fec7773c2306061a8e63e02efc2c72eba9d27b0fa23c94f1321", size = 17162, upload-time = "2025-10-16T16:14:11.978Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6d/c1/e419ef3723a074172b68aaa89c9f3de486ed4c2399e2dbd8113a4fdcaf9e/colorlog-6.10.1-py3-none-any.whl", hash = "sha256:2d7e8348291948af66122cff006c9f8da6255d224e7cf8e37d8de2df3bad8c9c", size = 11743, upload-time = "2025-10-16T16:14:10.512Z" }, -] - [[package]] name = "cryptography" version = "45.0.5" @@ -377,24 +327,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/df/e5/a7b6db64f08cfe065e531ec6b508fa7dac704fab70d05adb5bc0c2c1d1b6/cyclopts-3.22.5-py3-none-any.whl", hash = "sha256:92efb4a094d9812718d7efe0bffa319a19cb661f230dbf24406c18cd8809fb82", size = 84994, upload-time = "2025-07-31T18:18:35.939Z" }, ] -[[package]] -name = "defusedxml" -version = "0.7.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, -] - -[[package]] -name = "dill" -version = "0.4.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/81/e1/56027a71e31b02ddc53c7d65b01e68edf64dea2932122fe7746a516f75d5/dill-0.4.1.tar.gz", hash = "sha256:423092df4182177d4d8ba8290c8a5b640c66ab35ec7da59ccfa00f6fa3eea5fa", size = 187315, upload-time = "2026-01-19T02:36:56.85Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/77/dc8c558f7593132cf8fefec57c4f60c83b16941c574ac5f619abb3ae7933/dill-0.4.1-py3-none-any.whl", hash = "sha256:1e1ce33e978ae97fcfcff5638477032b801c46c7c65cf717f95fbc2248f79a9d", size = 120019, upload-time = "2026-01-19T02:36:55.663Z" }, -] - [[package]] name = "dirtyjson" version = "1.0.8" @@ -422,122 +354,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/68/1b/e0a87d256e40e8c888847551b20a017a6b98139178505dc7ffb96f04e954/dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86", size = 313632, upload-time = "2024-10-05T20:14:57.687Z" }, ] -[[package]] -name = "docling" -version = "2.73.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "accelerate" }, - { name = "beautifulsoup4" }, - { name = "certifi" }, - { name = "docling-core", extra = ["chunking"] }, - { name = "docling-ibm-models" }, - { name = "docling-parse" }, - { name = "filetype" }, - { name = "huggingface-hub" }, - { name = "lxml" }, - { name = "marko" }, - { name = "ocrmac", marker = "sys_platform == 'darwin'" }, - { name = "openpyxl" }, - { name = "pandas" }, - { name = "pillow" }, - { name = "pluggy" }, - { name = "polyfactory" }, - { name = "pydantic" }, - { name = "pydantic-settings" }, - { name = "pylatexenc" }, - { name = "pypdfium2" }, - { name = "python-docx" }, - { name = "python-pptx" }, - { name = "rapidocr" }, - { name = "requests" }, - { name = "rtree" }, - { name = "scipy" }, - { name = "tqdm" }, - { name = "typer" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/bb/e2/1492d9078b716c29e6de41de03e3641f3b7741b180801a2e735542e163a0/docling-2.73.1.tar.gz", hash = "sha256:76d2e787cfdc1f2780214066ffbf841c65566be255b5a1e5fd68fb9611e4c051", size = 344997, upload-time = "2026-02-13T15:36:07.361Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/5e/0514dec786d055d8fa26d88ad29d80fee4264d7cb328180ffb8fd375c4d2/docling-2.73.1-py3-none-any.whl", hash = "sha256:31e762166be0c3c3e97e28b1727e3aad09703160e04443ed1c24866977e157c1", size = 371533, upload-time = "2026-02-13T15:36:05.482Z" }, -] - -[[package]] -name = "docling-core" -version = "2.65.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "defusedxml" }, - { name = "jsonref" }, - { name = "jsonschema" }, - { name = "latex2mathml" }, - { name = "pandas" }, - { name = "pillow" }, - { name = "pydantic" }, - { name = "pyyaml" }, - { name = "tabulate" }, - { name = "typer" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/ca/f0/54e61a05728f6e44f45092ab115b11b24464b64274f8a5fe8fcfe90ac70d/docling_core-2.65.1.tar.gz", hash = "sha256:3a143adb9cc613c503380eff92f5895078fc5a00fc7264f327d0d85ff60176cd", size = 253164, upload-time = "2026-02-13T12:23:06.472Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/54/ee/2a450f8cf9a153dd080551b1ff09d45f200d629c30917b2047ecb8e1f6b6/docling_core-2.65.1-py3-none-any.whl", hash = "sha256:fcdb30254bc5046b52b8bec5919de3b6cdbeed915399cede5351ff328cdd020d", size = 240086, upload-time = "2026-02-13T12:23:04.289Z" }, -] - -[package.optional-dependencies] -chunking = [ - { name = "semchunk" }, - { name = "transformers" }, - { name = "tree-sitter" }, - { name = "tree-sitter-c" }, - { name = "tree-sitter-javascript" }, - { name = "tree-sitter-python" }, - { name = "tree-sitter-typescript" }, -] - -[[package]] -name = "docling-ibm-models" -version = "3.11.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "accelerate" }, - { name = "docling-core" }, - { name = "huggingface-hub" }, - { name = "jsonlines" }, - { name = "numpy" }, - { name = "pillow" }, - { name = "pydantic" }, - { name = "rtree" }, - { name = "safetensors", extra = ["torch"] }, - { name = "torch", version = "2.10.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, - { name = "torchvision", version = "0.25.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torchvision", version = "0.25.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, - { name = "tqdm" }, - { name = "transformers" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b6/91/f883e0a2b3466e1126dfd4463f386c70f5b90d271c27b6f5a97d2f8312e6/docling_ibm_models-3.11.0.tar.gz", hash = "sha256:454401563a8e79cb33b718bc559d9bacca8a0183583e48f8e616c9184c1f5eb1", size = 87721, upload-time = "2026-01-23T12:29:35.384Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/5d/97e9c2e10fbd3ee1723ac82c335f8211a9633c0397cc11ed057c3ba4006e/docling_ibm_models-3.11.0-py3-none-any.whl", hash = "sha256:68f7961069d643bfdab21b1c9ef24a979db293496f4c2283d95b1025a9ac5347", size = 87352, upload-time = "2026-01-23T12:29:34.045Z" }, -] - -[[package]] -name = "docling-parse" -version = "4.7.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "docling-core" }, - { name = "pillow" }, - { name = "pydantic" }, - { name = "pywin32", marker = "sys_platform == 'win32'" }, - { name = "tabulate" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/bb/7a/653c3b11920113217724fab9b4740f9f8964864f92a2a27590accecec5ac/docling_parse-4.7.3.tar.gz", hash = "sha256:5936e6bcb7969c2a13f38ecc75cada3b0919422dc845e96da4b0b7b3bbc394ce", size = 67646746, upload-time = "2026-01-14T14:18:19.376Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6c/81/dd317e0bce475153dc08a60a9a8615b1a04d4d3c9803175e6cb7b7e9b49b/docling_parse-4.7.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:66896bbe925073e4d48f18ec29dcd611a390d6b2378fae72125e77b020cd5664", size = 14615974, upload-time = "2026-01-14T14:17:30.246Z" }, - { url = "https://files.pythonhosted.org/packages/3a/b5/088590e0b32fd0a393ca419c644d1435a1c99fa6b2a87888eef4d0fdea33/docling_parse-4.7.3-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:281347b3e937c1a5ffa6f8774ee603b64a0899fe8a6885573dec7eb48a3421d8", size = 14981051, upload-time = "2026-01-14T14:17:32.426Z" }, - { url = "https://files.pythonhosted.org/packages/b7/63/2b6c9127924487573d5419d58ec77955f0b7c0a923c8232ad461d71039aa/docling_parse-4.7.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3d86c51f9ce35a1b40b2f410f7271d9bd5fc58e7240f4cae7fdd2cef757e671", size = 15092586, upload-time = "2026-01-14T14:17:34.634Z" }, -] - [[package]] name = "docstring-parser" version = "0.17.0" @@ -578,15 +394,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/ee/bf0adb559ad3c786f12bcbc9296b3f5675f529199bef03e2df281fa1fadb/email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631", size = 33521, upload-time = "2024-06-20T11:30:28.248Z" }, ] -[[package]] -name = "et-xmlfile" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" }, -] - [[package]] name = "exceptiongroup" version = "1.3.0" @@ -599,18 +406,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674, upload-time = "2025-05-10T17:42:49.33Z" }, ] -[[package]] -name = "faker" -version = "40.4.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "tzdata", marker = "sys_platform == 'win32'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/fc/7e/dccb7013c9f3d66f2e379383600629fec75e4da2698548bdbf2041ea4b51/faker-40.4.0.tar.gz", hash = "sha256:76f8e74a3df28c3e2ec2caafa956e19e37a132fdc7ea067bc41783affcfee364", size = 1952221, upload-time = "2026-02-06T23:30:15.515Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ac/63/58efa67c10fb27810d34351b7a10f85f109a7f7e2a07dc3773952459c47b/faker-40.4.0-py3-none-any.whl", hash = "sha256:486d43c67ebbb136bc932406418744f9a0bdf2c07f77703ea78b58b77e9aa443", size = 1987060, upload-time = "2026-02-06T23:30:13.44Z" }, -] - [[package]] name = "fastapi" version = "0.116.1" @@ -738,15 +533,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/04/a94ebfb4eaaa08db56725a40de2887e95de4e8641b9e902c311bfa00aa39/filelock-3.24.2-py3-none-any.whl", hash = "sha256:667d7dc0b7d1e1064dd5f8f8e80bdac157a6482e8d2e02cd16fd3b6b33bd6556" }, ] -[[package]] -name = "filetype" -version = "1.2.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bb/29/745f7d30d47fe0f251d3ad3dc2978a23141917661998763bebb6da007eb1/filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb", size = 998020, upload-time = "2022-11-02T17:34:04.141Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/18/79/1b8fa1bb3568781e84c9200f951c735f3f157429f44be0495da55894d620/filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25", size = 19970, upload-time = "2022-11-02T17:34:01.425Z" }, -] - [[package]] name = "flatbuffers" version = "25.12.19" @@ -1007,27 +793,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7d/4f/1195bbac8e0c2acc5f740661631d8d750dc38d4a32b23ee5df3cde6f4e0d/joblib-1.5.1-py3-none-any.whl", hash = "sha256:4719a31f054c7d766948dcd83e9613686b27114f190f717cec7eaa2084f8a74a", size = 307746, upload-time = "2025-05-23T12:04:35.124Z" }, ] -[[package]] -name = "jsonlines" -version = "4.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "attrs" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/35/87/bcda8e46c88d0e34cad2f09ee2d0c7f5957bccdb9791b0b934ec84d84be4/jsonlines-4.0.0.tar.gz", hash = "sha256:0c6d2c09117550c089995247f605ae4cf77dd1533041d366351f6f298822ea74", size = 11359, upload-time = "2023-09-01T12:34:44.187Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/62/d9ba6323b9202dd2fe166beab8a86d29465c41a0288cbe229fac60c1ab8d/jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55", size = 8701, upload-time = "2023-09-01T12:34:42.563Z" }, -] - -[[package]] -name = "jsonref" -version = "1.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/aa/0d/c1f3277e90ccdb50d33ed5ba1ec5b3f0a242ed8c1b1a85d3afeb68464dca/jsonref-1.1.0.tar.gz", hash = "sha256:32fe8e1d85af0fdefbebce950af85590b22b60f9e95443176adbde4e1ecea552", size = 8814, upload-time = "2023-01-16T16:10:04.455Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0c/ec/e1db9922bceb168197a558a2b8c03a7963f1afe93517ddd3cf99f202f996/jsonref-1.1.0-py3-none-any.whl", hash = "sha256:590dc7773df6c21cbf948b5dac07a72a251db28b0238ceecce0a2abfa8ec30a9", size = 9425, upload-time = "2023-01-16T16:10:02.255Z" }, -] - [[package]] name = "jsonschema" version = "4.25.0" @@ -1070,15 +835,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/01/0e/b27cdbaccf30b890c40ed1da9fd4a3593a5cf94dae54fb34f8a4b74fcd3f/jsonschema_specifications-2025.4.1-py3-none-any.whl", hash = "sha256:4653bffbd6584f7de83a67e0d620ef16900b390ddc7939d56684d6c81e33f1af", size = 18437, upload-time = "2025-04-23T12:34:05.422Z" }, ] -[[package]] -name = "latex2mathml" -version = "3.78.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1a/26/57b1034c08922d0aefea79430a5e0006ffaee4f0ec59d566613f667ab2f7/latex2mathml-3.78.1.tar.gz", hash = "sha256:f941db80bf41db33f31df87b304e8b588f8166b813b0257c11c98f7a9d0aac71", size = 74030, upload-time = "2025-08-29T23:34:23.178Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3e/76/d661ea2e529c3d464f9efd73f9ac31626b45279eb4306e684054ea20e3d4/latex2mathml-3.78.1-py3-none-any.whl", hash = "sha256:f089b6d75e85b937f99693c93e8c16c0804008672c3dd2a3d25affd36f238100", size = 73892, upload-time = "2025-08-29T23:34:21.98Z" }, -] - [[package]] name = "lazy-object-proxy" version = "1.11.0" @@ -1164,15 +920,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528, upload-time = "2023-06-03T06:41:11.019Z" }, ] -[[package]] -name = "marko" -version = "2.2.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e3/2f/050b6d485f052ddf17d76a41f9334d6fb2a8a85df35347a12d97ed3bc5c1/marko-2.2.2.tar.gz", hash = "sha256:6940308e655f63733ca518c47a68ec9510279dbb916c83616e4c4b5829f052e8", size = 143641, upload-time = "2026-01-05T11:04:41.935Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/83/f8/36d79bac5701e6786f9880c61bbe57574760a13c1af84ab71e5ed21faecc/marko-2.2.2-py3-none-any.whl", hash = "sha256:f064ae8c10416285ad1d96048dc11e98ef04e662d3342ae416f662b70aa7959e", size = 42701, upload-time = "2026-01-05T11:04:40.75Z" }, -] - [[package]] name = "markupsafe" version = "3.0.1" @@ -1255,25 +1002,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2b/9f/7ba6f94fc1e9ac3d2b853fdff3035fb2fa5afbed898c4a72b8a020610594/more_itertools-10.7.0-py3-none-any.whl", hash = "sha256:d43980384673cb07d2f7d2d918c616b30c659c089ee23953f601d6609c67510e", size = 65278, upload-time = "2025-04-22T14:17:40.49Z" }, ] -[[package]] -name = "mpire" -version = "2.10.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pygments" }, - { name = "pywin32", marker = "sys_platform == 'win32'" }, - { name = "tqdm" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/3a/93/80ac75c20ce54c785648b4ed363c88f148bf22637e10c9863db4fbe73e74/mpire-2.10.2.tar.gz", hash = "sha256:f66a321e93fadff34585a4bfa05e95bd946cf714b442f51c529038eb45773d97", size = 271270, upload-time = "2024-05-07T14:00:31.815Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/20/14/1db1729ad6db4999c3a16c47937d601fcb909aaa4224f5eca5a2f145a605/mpire-2.10.2-py3-none-any.whl", hash = "sha256:d627707f7a8d02aa4c7f7d59de399dec5290945ddf7fbd36cbb1d6ebb37a51fb", size = 272756, upload-time = "2024-05-07T14:00:29.633Z" }, -] - -[package.optional-dependencies] -dill = [ - { name = "multiprocess" }, -] - [[package]] name = "mpmath" version = "1.3.0" @@ -1310,32 +1038,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d8/30/9aec301e9772b098c1f5c0ca0279237c9766d94b97802e9888010c64b0ed/multidict-6.6.3-py3-none-any.whl", hash = "sha256:8db10f29c7541fc5da4defd8cd697e1ca429db743fa716325f236079b96f775a", size = 12313, upload-time = "2025-06-30T15:53:45.437Z" }, ] -[[package]] -name = "multiprocess" -version = "0.70.19" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "dill" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a2/f2/e783ac7f2aeeed14e9e12801f22529cc7e6b7ab80928d6dcce4e9f00922d/multiprocess-0.70.19.tar.gz", hash = "sha256:952021e0e6c55a4a9fe4cd787895b86e239a40e76802a789d6305398d3975897", size = 2079989, upload-time = "2026-01-19T06:47:39.744Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/aa/714635c727dbfc251139226fa4eaf1b07f00dc12d9cd2eb25f931adaf873/multiprocess-0.70.19-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1bbf1b69af1cf64cd05f65337d9215b88079ec819cd0ea7bac4dab84e162efe7", size = 144743, upload-time = "2026-01-19T06:47:24.562Z" }, - { url = "https://files.pythonhosted.org/packages/0f/e1/155f6abf5e6b5d9cef29b6d0167c180846157a4aca9b9bee1a217f67c959/multiprocess-0.70.19-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:5be9ec7f0c1c49a4f4a6fd20d5dda4aeabc2d39a50f4ad53720f1cd02b3a7c2e", size = 144738, upload-time = "2026-01-19T06:47:26.636Z" }, - { url = "https://files.pythonhosted.org/packages/af/cb/f421c2869d75750a4f32301cc20c4b63fab6376e9a75c8e5e655bdeb3d9b/multiprocess-0.70.19-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1c3dce098845a0db43b32a0b76a228ca059a668071cfeaa0f40c36c0b1585d45", size = 144741, upload-time = "2026-01-19T06:47:27.985Z" }, - { url = "https://files.pythonhosted.org/packages/e3/45/8004d1e6b9185c1a444d6b55ac5682acf9d98035e54386d967366035a03a/multiprocess-0.70.19-py310-none-any.whl", hash = "sha256:97404393419dcb2a8385910864eedf47a3cadf82c66345b44f036420eb0b5d87", size = 134948, upload-time = "2026-01-19T06:47:32.325Z" }, - { url = "https://files.pythonhosted.org/packages/86/c2/dec9722dc3474c164a0b6bcd9a7ed7da542c98af8cabce05374abab35edd/multiprocess-0.70.19-py311-none-any.whl", hash = "sha256:928851ae7973aea4ce0eaf330bbdafb2e01398a91518d5c8818802845564f45c", size = 144457, upload-time = "2026-01-19T06:47:33.711Z" }, - { url = "https://files.pythonhosted.org/packages/7e/82/69e539c4c2027f1e1697e09aaa2449243085a0edf81ae2c6341e84d769b6/multiprocess-0.70.19-py39-none-any.whl", hash = "sha256:0d4b4397ed669d371c81dcd1ef33fd384a44d6c3de1bd0ca7ac06d837720d3c5", size = 133477, upload-time = "2026-01-19T06:47:38.619Z" }, -] - -[[package]] -name = "networkx" -version = "3.6.1" -source = { registry = "https://download.pytorch.org/whl/cpu" } -sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762" }, -] - [[package]] name = "nltk" version = "3.9.1" @@ -1377,33 +1079,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/e5/b7d20451657664b07986c2f6e3be564433f5dcaf3482d68eaecd79afaf03/numpy-2.4.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:be71bf1edb48ebbbf7f6337b5bfd2f895d1902f6335a5830b20141fc126ffba0" }, ] -[[package]] -name = "ocrmac" -version = "1.0.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click", marker = "sys_platform == 'darwin'" }, - { name = "pillow", marker = "sys_platform == 'darwin'" }, - { name = "pyobjc-framework-vision", marker = "sys_platform == 'darwin'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5e/07/3e15ab404f75875c5e48c47163300eb90b7409044d8711fc3aaf52503f2e/ocrmac-1.0.1.tar.gz", hash = "sha256:507fe5e4cbd67b2d03f6729a52bbc11f9d0b58241134eb958a5daafd4b9d93d9", size = 1454317, upload-time = "2026-01-08T16:44:26.412Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/37/15/7cc16507a2aca927abe395f1c545f17ae76b1f8ed44f43ebe4e8670ee203/ocrmac-1.0.1-py3-none-any.whl", hash = "sha256:1cef25426f7ae6bbd57fe3dc5553b25461ae8ad0d2b428a9bbadbf5907349024", size = 9955, upload-time = "2026-01-08T16:44:25.555Z" }, -] - -[[package]] -name = "omegaconf" -version = "2.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "antlr4-python3-runtime" }, - { name = "pyyaml" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/09/48/6388f1bb9da707110532cb70ec4d2822858ddfb44f1cdf1233c20a80ea4b/omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7", size = 3298120, upload-time = "2022-12-08T20:59:22.753Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500, upload-time = "2022-12-08T20:59:19.686Z" }, -] - [[package]] name = "onnxruntime" version = "1.24.1" @@ -1502,34 +1177,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/dd/b3fd642260cb17532f66cc1e8250f3507d1e580483e209dc1e9d13bd980d/openapi_spec_validator-0.7.2-py3-none-any.whl", hash = "sha256:4bbdc0894ec85f1d1bea1d6d9c8b2c3c8d7ccaa13577ef40da9c006c9fd0eb60", size = 39713, upload-time = "2025-06-07T14:48:54.077Z" }, ] -[[package]] -name = "opencv-python" -version = "4.13.0.92" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/fc/6f/5a28fef4c4a382be06afe3938c64cc168223016fa520c5abaf37e8862aa5/opencv_python-4.13.0.92-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:caf60c071ec391ba51ed00a4a920f996d0b64e3e46068aac1f646b5de0326a19", size = 46247052, upload-time = "2026-02-05T07:01:25.046Z" }, - { url = "https://files.pythonhosted.org/packages/08/ac/6c98c44c650b8114a0fb901691351cfb3956d502e8e9b5cd27f4ee7fbf2f/opencv_python-4.13.0.92-cp37-abi3-macosx_14_0_x86_64.whl", hash = "sha256:5868a8c028a0b37561579bfb8ac1875babdc69546d236249fff296a8c010ccf9", size = 32568781, upload-time = "2026-02-05T07:01:41.379Z" }, - { url = "https://files.pythonhosted.org/packages/3e/51/82fed528b45173bf629fa44effb76dff8bc9f4eeaee759038362dfa60237/opencv_python-4.13.0.92-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0bc2596e68f972ca452d80f444bc404e08807d021fbba40df26b61b18e01838a", size = 47685527, upload-time = "2026-02-05T06:59:11.24Z" }, - { url = "https://files.pythonhosted.org/packages/db/07/90b34a8e2cf9c50fe8ed25cac9011cde0676b4d9d9c973751ac7616223a2/opencv_python-4.13.0.92-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:402033cddf9d294693094de5ef532339f14ce821da3ad7df7c9f6e8316da32cf", size = 70460872, upload-time = "2026-02-05T06:59:19.162Z" }, - { url = "https://files.pythonhosted.org/packages/02/6d/7a9cc719b3eaf4377b9c2e3edeb7ed3a81de41f96421510c0a169ca3cfd4/opencv_python-4.13.0.92-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:bccaabf9eb7f897ca61880ce2869dcd9b25b72129c28478e7f2a5e8dee945616", size = 46708208, upload-time = "2026-02-05T06:59:15.419Z" }, - { url = "https://files.pythonhosted.org/packages/fd/55/b3b49a1b97aabcfbbd6c7326df9cb0b6fa0c0aefa8e89d500939e04aa229/opencv_python-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:620d602b8f7d8b8dab5f4b99c6eb353e78d3fb8b0f53db1bd258bb1aa001c1d5", size = 72927042, upload-time = "2026-02-05T06:59:23.389Z" }, -] - -[[package]] -name = "openpyxl" -version = "3.1.5" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "et-xmlfile" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" }, -] - [[package]] name = "packaging" version = "25.0" @@ -1539,26 +1186,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] -[[package]] -name = "pandas" -version = "2.3.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, - { name = "python-dateutil" }, - { name = "pytz" }, - { name = "tzdata" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/fa/7ac648108144a095b4fb6aa3de1954689f7af60a14cf25583f4960ecb878/pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523", size = 11578790, upload-time = "2025-09-29T23:18:30.065Z" }, - { url = "https://files.pythonhosted.org/packages/9b/35/74442388c6cf008882d4d4bdfc4109be87e9b8b7ccd097ad1e7f006e2e95/pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45", size = 10833831, upload-time = "2025-09-29T23:38:56.071Z" }, - { url = "https://files.pythonhosted.org/packages/fe/e4/de154cbfeee13383ad58d23017da99390b91d73f8c11856f2095e813201b/pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66", size = 12199267, upload-time = "2025-09-29T23:18:41.627Z" }, - { url = "https://files.pythonhosted.org/packages/bf/c9/63f8d545568d9ab91476b1818b4741f521646cbdd151c6efebf40d6de6f7/pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b", size = 12789281, upload-time = "2025-09-29T23:18:56.834Z" }, - { url = "https://files.pythonhosted.org/packages/f2/00/a5ac8c7a0e67fd1a6059e40aa08fa1c52cc00709077d2300e210c3ce0322/pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791", size = 13240453, upload-time = "2025-09-29T23:19:09.247Z" }, - { url = "https://files.pythonhosted.org/packages/27/4d/5c23a5bc7bd209231618dd9e606ce076272c9bc4f12023a70e03a86b4067/pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151", size = 13890361, upload-time = "2025-09-29T23:19:25.342Z" }, -] - [[package]] name = "parse" version = "1.20.2" @@ -1657,19 +1284,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] -[[package]] -name = "polyfactory" -version = "3.2.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "faker" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/97/92/e90639b1d2abe982749eba7e734571a343ea062f7d486498b1c2b852f019/polyfactory-3.2.0.tar.gz", hash = "sha256:879242f55208f023eee1de48522de5cb1f9fd2d09b2314e999a9592829d596d1", size = 346878, upload-time = "2025-12-21T11:18:51.017Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d9/21/93363d7b802aa904f8d4169bc33e0e316d06d26ee68d40fe0355057da98c/polyfactory-3.2.0-py3-none-any.whl", hash = "sha256:5945799cce4c56cd44ccad96fb0352996914553cc3efaa5a286930599f569571", size = 62181, upload-time = "2025-12-21T11:18:49.311Z" }, -] - [[package]] name = "presenton-backend" version = "0.1.0" @@ -1682,7 +1296,6 @@ dependencies = [ { name = "anthropic" }, { name = "asyncpg" }, { name = "dirtyjson" }, - { name = "docling", marker = "sys_platform != 'win32'" }, { name = "docx2everything", marker = "sys_platform == 'win32'" }, { name = "fastapi", extra = ["standard"] }, { name = "fastembed-vectorstore" }, @@ -1695,7 +1308,7 @@ dependencies = [ { name = "pdfplumber" }, { name = "pyinstaller" }, { name = "pytest" }, - { name = "python-pptx", marker = "sys_platform == 'win32'" }, + { name = "python-pptx" }, { name = "redis" }, { name = "sqlmodel" }, ] @@ -1709,7 +1322,6 @@ requires-dist = [ { name = "anthropic", specifier = ">=0.60.0" }, { name = "asyncpg", specifier = ">=0.30.0" }, { name = "dirtyjson", specifier = ">=1.0.8" }, - { name = "docling", marker = "sys_platform != 'win32'", specifier = ">=2.43.0" }, { name = "docx2everything", marker = "sys_platform == 'win32'", specifier = ">=1.0.0" }, { name = "fastapi", extras = ["standard"], specifier = ">=0.116.1" }, { name = "fastembed-vectorstore", specifier = ">=0.5.2" }, @@ -1722,7 +1334,7 @@ requires-dist = [ { name = "pdfplumber", specifier = ">=0.11.7" }, { name = "pyinstaller", specifier = ">=6.18.0" }, { name = "pytest", specifier = ">=8.4.1" }, - { name = "python-pptx", marker = "sys_platform == 'win32'", specifier = ">=1.0.2" }, + { name = "python-pptx", specifier = ">=1.0.2" }, { name = "redis", specifier = ">=6.2.0" }, { name = "sqlmodel", specifier = ">=0.0.24" }, ] @@ -1770,20 +1382,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/57/bf/2086963c69bdac3d7cff1cc7ff79b8ce5ea0bec6797a017e1be338a46248/protobuf-6.33.5-py3-none-any.whl", hash = "sha256:69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02", size = 170687, upload-time = "2026-01-29T21:51:32.557Z" }, ] -[[package]] -name = "psutil" -version = "7.2.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" }, - { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" }, - { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" }, - { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" }, - { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" }, - { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" }, -] - [[package]] name = "py-rust-stemmers" version = "0.1.5" @@ -1823,19 +1421,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" }, ] -[[package]] -name = "pyclipper" -version = "1.4.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f6/21/3c06205bb407e1f79b73b7b4dfb3950bd9537c4f625a68ab5cc41177f5bc/pyclipper-1.4.0.tar.gz", hash = "sha256:9882bd889f27da78add4dd6f881d25697efc740bf840274e749988d25496c8e1", size = 54489, upload-time = "2025-12-01T13:15:35.015Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/de/e3/64cf7794319b088c288706087141e53ac259c7959728303276d18adc665d/pyclipper-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:adcb7ca33c5bdc33cd775e8b3eadad54873c802a6d909067a57348bcb96e7a2d", size = 264281, upload-time = "2025-12-01T13:14:55.47Z" }, - { url = "https://files.pythonhosted.org/packages/34/cd/44ec0da0306fa4231e76f1c2cb1fa394d7bde8db490a2b24d55b39865f69/pyclipper-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fd24849d2b94ec749ceac7c34c9f01010d23b6e9d9216cf2238b8481160e703d", size = 139426, upload-time = "2025-12-01T13:14:56.683Z" }, - { url = "https://files.pythonhosted.org/packages/ad/88/d8f6c6763ea622fe35e19c75d8b39ed6c55191ddc82d65e06bc46b26cb8e/pyclipper-1.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1b6c8d75ba20c6433c9ea8f1a0feb7e4d3ac06a09ad1fd6d571afc1ddf89b869", size = 989649, upload-time = "2025-12-01T13:14:58.28Z" }, - { url = "https://files.pythonhosted.org/packages/ff/e9/ea7d68c8c4af3842d6515bedcf06418610ad75f111e64c92c1d4785a1513/pyclipper-1.4.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:58e29d7443d7cc0e83ee9daf43927730386629786d00c63b04fe3b53ac01462c", size = 962842, upload-time = "2025-12-01T13:15:00.044Z" }, - { url = "https://files.pythonhosted.org/packages/18/59/81050abdc9e5b90ffc2c765738c5e40e9abd8e44864aaa737b600f16c562/pyclipper-1.4.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98b2a40f98e1fc1b29e8a6094072e7e0c7dfe901e573bf6cfc6eb7ce84a7ae87", size = 126495, upload-time = "2025-12-01T13:15:33.743Z" }, -] - [[package]] name = "pycparser" version = "2.22" @@ -1963,12 +1548,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d5/b1/9da6ec3e88696018ee7bb9dc4a7310c2cfaebf32923a19598cd342767c10/pyinstaller_hooks_contrib-2026.0-py3-none-any.whl", hash = "sha256:0590db8edeba3e6c30c8474937021f5cd39c0602b4d10f74a064c73911efaca5", size = 452318, upload-time = "2026-01-20T00:15:21.88Z" }, ] -[[package]] -name = "pylatexenc" -version = "2.10" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5d/ab/34ec41718af73c00119d0351b7a2531d2ebddb51833a36448fc7b862be60/pylatexenc-2.10.tar.gz", hash = "sha256:3dd8fd84eb46dc30bee1e23eaab8d8fb5a7f507347b23e5f38ad9675c84f40d3", size = 162597, upload-time = "2021-04-06T07:56:07.854Z" } - [[package]] name = "pymysql" version = "1.1.1" @@ -1978,68 +1557,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/94/e4181a1f6286f545507528c78016e00065ea913276888db2262507693ce5/PyMySQL-1.1.1-py3-none-any.whl", hash = "sha256:4de15da4c61dc132f4fb9ab763063e693d521a80fd0e87943b9a453dd4c19d6c", size = 44972, upload-time = "2024-05-21T11:03:41.216Z" }, ] -[[package]] -name = "pyobjc-core" -version = "12.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b8/b6/d5612eb40be4fd5ef88c259339e6313f46ba67577a95d86c3470b951fce0/pyobjc_core-12.1.tar.gz", hash = "sha256:2bb3903f5387f72422145e1466b3ac3f7f0ef2e9960afa9bcd8961c5cbf8bd21", size = 1000532, upload-time = "2025-11-14T10:08:28.292Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/95/df/d2b290708e9da86d6e7a9a2a2022b91915cf2e712a5a82e306cb6ee99792/pyobjc_core-12.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c918ebca280925e7fcb14c5c43ce12dcb9574a33cccb889be7c8c17f3bcce8b6", size = 671263, upload-time = "2025-11-14T09:31:35.231Z" }, -] - -[[package]] -name = "pyobjc-framework-cocoa" -version = "12.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyobjc-core", marker = "sys_platform == 'darwin'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/02/a3/16ca9a15e77c061a9250afbae2eae26f2e1579eb8ca9462ae2d2c71e1169/pyobjc_framework_cocoa-12.1.tar.gz", hash = "sha256:5556c87db95711b985d5efdaaf01c917ddd41d148b1e52a0c66b1a2e2c5c1640", size = 2772191, upload-time = "2025-11-14T10:13:02.069Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3f/07/5760735c0fffc65107e648eaf7e0991f46da442ac4493501be5380e6d9d4/pyobjc_framework_cocoa-12.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f52228bcf38da64b77328787967d464e28b981492b33a7675585141e1b0a01e6", size = 383812, upload-time = "2025-11-14T09:40:53.169Z" }, -] - -[[package]] -name = "pyobjc-framework-coreml" -version = "12.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyobjc-core", marker = "sys_platform == 'darwin'" }, - { name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/30/2d/baa9ea02cbb1c200683cb7273b69b4bee5070e86f2060b77e6a27c2a9d7e/pyobjc_framework_coreml-12.1.tar.gz", hash = "sha256:0d1a4216891a18775c9e0170d908714c18e4f53f9dc79fb0f5263b2aa81609ba", size = 40465, upload-time = "2025-11-14T10:14:02.265Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/34/0f/f55369da4a33cfe1db38a3512aac4487602783d3a1d572d2c8c4ccce6abc/pyobjc_framework_coreml-12.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:16dafcfb123f022e62f47a590a7eccf7d0cb5957a77fd5f062b5ee751cb5a423", size = 11331, upload-time = "2025-11-14T09:45:50.445Z" }, -] - -[[package]] -name = "pyobjc-framework-quartz" -version = "12.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyobjc-core", marker = "sys_platform == 'darwin'" }, - { name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/94/18/cc59f3d4355c9456fc945eae7fe8797003c4da99212dd531ad1b0de8a0c6/pyobjc_framework_quartz-12.1.tar.gz", hash = "sha256:27f782f3513ac88ec9b6c82d9767eef95a5cf4175ce88a1e5a65875fee799608", size = 3159099, upload-time = "2025-11-14T10:21:24.31Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/ef/dcd22b743e38b3c430fce4788176c2c5afa8bfb01085b8143b02d1e75201/pyobjc_framework_quartz-12.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:19f99ac49a0b15dd892e155644fe80242d741411a9ed9c119b18b7466048625a", size = 217795, upload-time = "2025-11-14T09:59:46.922Z" }, -] - -[[package]] -name = "pyobjc-framework-vision" -version = "12.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyobjc-core", marker = "sys_platform == 'darwin'" }, - { name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" }, - { name = "pyobjc-framework-coreml", marker = "sys_platform == 'darwin'" }, - { name = "pyobjc-framework-quartz", marker = "sys_platform == 'darwin'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c2/5a/08bb3e278f870443d226c141af14205ff41c0274da1e053b72b11dfc9fb2/pyobjc_framework_vision-12.1.tar.gz", hash = "sha256:a30959100e85dcede3a786c544e621ad6eb65ff6abf85721f805822b8c5fe9b0", size = 59538, upload-time = "2025-11-14T10:23:21.979Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bd/37/e30cf4eef2b4c7e20ccadc1249117c77305fbc38b2e5904eb42e3753f63c/pyobjc_framework_vision-12.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1edbf2fc18ce3b31108f845901a88f2236783ae6bf0bc68438d7ece572dc2a29", size = 21432, upload-time = "2025-11-14T10:06:42.373Z" }, -] - [[package]] name = "pypdfium2" version = "4.30.0" @@ -2082,31 +1599,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" }, ] -[[package]] -name = "python-dateutil" -version = "2.9.0.post0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "six" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, -] - -[[package]] -name = "python-docx" -version = "1.2.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "lxml" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a9/f7/eddfe33871520adab45aaa1a71f0402a2252050c14c7e3009446c8f4701c/python_docx-1.2.0.tar.gz", hash = "sha256:7bc9d7b7d8a69c9c02ca09216118c86552704edc23bac179283f2e38f86220ce", size = 5723256, upload-time = "2025-06-16T20:46:27.921Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d0/00/1e03a4989fa5795da308cd774f05b704ace555a70f9bf9d3be057b680bcf/python_docx-1.2.0-py3-none-any.whl", hash = "sha256:3fd478f3250fbbbfd3b94fe1e985955737c145627498896a8a6bf81f4baf66c7", size = 252987, upload-time = "2025-06-16T20:46:22.506Z" }, -] - [[package]] name = "python-dotenv" version = "1.1.1" @@ -2140,15 +1632,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d9/4f/00be2196329ebbff56ce564aa94efb0fbc828d00de250b1980de1a34ab49/python_pptx-1.0.2-py3-none-any.whl", hash = "sha256:160838e0b8565a8b1f67947675886e9fea18aa5e795db7ae531606d68e785cba", size = 472788, upload-time = "2024-08-07T17:33:28.192Z" }, ] -[[package]] -name = "pytz" -version = "2025.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, -] - [[package]] name = "pywin32" version = "311" @@ -2185,27 +1668,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ed/23/8da0bbe2ab9dcdd11f4f4557ccaf95c10b9811b13ecced089d43ce59c3c8/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", size = 161980, upload-time = "2024-08-06T20:32:21.273Z" }, ] -[[package]] -name = "rapidocr" -version = "3.6.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorlog" }, - { name = "numpy" }, - { name = "omegaconf" }, - { name = "opencv-python" }, - { name = "pillow" }, - { name = "pyclipper" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "shapely" }, - { name = "six" }, - { name = "tqdm" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/e0/fd/0d025466f0f84552634f2a94c018df34568fe55cc97184a6bb2c719c5b3a/rapidocr-3.6.0-py3-none-any.whl", hash = "sha256:d16b43872fc4dfa1e60996334dcd0dc3e3f1f64161e2332bc1873b9f65754e6b", size = 15067340, upload-time = "2026-01-28T14:45:04.271Z" }, -] - [[package]] name = "redis" version = "6.2.0" @@ -2386,80 +1848,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c8/ed/9de62c2150ca8e2e5858acf3f4f4d0d180a38feef9fdab4078bea63d8dba/rpds_py-0.26.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:e99685fc95d386da368013e7fb4269dd39c30d99f812a8372d62f244f662709c", size = 555334, upload-time = "2025-07-01T15:56:51.703Z" }, ] -[[package]] -name = "rtree" -version = "1.4.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/95/09/7302695875a019514de9a5dd17b8320e7a19d6e7bc8f85dcfb79a4ce2da3/rtree-1.4.1.tar.gz", hash = "sha256:c6b1b3550881e57ebe530cc6cffefc87cd9bf49c30b37b894065a9f810875e46", size = 52425, upload-time = "2025-08-13T19:32:01.413Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/d9/108cd989a4c0954e60b3cdc86fd2826407702b5375f6dfdab2802e5fed98/rtree-1.4.1-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d672184298527522d4914d8ae53bf76982b86ca420b0acde9298a7a87d81d4a4", size = 468484, upload-time = "2025-08-13T19:31:50.593Z" }, - { url = "https://files.pythonhosted.org/packages/f3/cf/2710b6fd6b07ea0aef317b29f335790ba6adf06a28ac236078ed9bd8a91d/rtree-1.4.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a7e48d805e12011c2cf739a29d6a60ae852fb1de9fc84220bbcef67e6e595d7d", size = 436325, upload-time = "2025-08-13T19:31:52.367Z" }, - { url = "https://files.pythonhosted.org/packages/55/e1/4d075268a46e68db3cac51846eb6a3ab96ed481c585c5a1ad411b3c23aad/rtree-1.4.1-py3-none-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:efa8c4496e31e9ad58ff6c7df89abceac7022d906cb64a3e18e4fceae6b77f65", size = 459789, upload-time = "2025-08-13T19:31:53.926Z" }, - { url = "https://files.pythonhosted.org/packages/d1/75/e5d44be90525cd28503e7f836d077ae6663ec0687a13ba7810b4114b3668/rtree-1.4.1-py3-none-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:12de4578f1b3381a93a655846900be4e3d5f4cd5e306b8b00aa77c1121dc7e8c", size = 507644, upload-time = "2025-08-13T19:31:55.164Z" }, - { url = "https://files.pythonhosted.org/packages/fd/85/b8684f769a142163b52859a38a486493b05bafb4f2fb71d4f945de28ebf9/rtree-1.4.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b558edda52eca3e6d1ee629042192c65e6b7f2c150d6d6cd207ce82f85be3967", size = 1454478, upload-time = "2025-08-13T19:31:56.808Z" }, - { url = "https://files.pythonhosted.org/packages/e9/a4/c2292b95246b9165cc43a0c3757e80995d58bc9b43da5cb47ad6e3535213/rtree-1.4.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:f155bc8d6bac9dcd383481dee8c130947a4866db1d16cb6dff442329a038a0dc", size = 1555140, upload-time = "2025-08-13T19:31:58.031Z" }, -] - -[[package]] -name = "safetensors" -version = "0.7.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload-time = "2025-11-19T15:18:43.199Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload-time = "2025-11-19T15:18:35.84Z" }, - { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload-time = "2025-11-19T15:18:34.416Z" }, - { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload-time = "2025-11-19T15:18:09.79Z" }, - { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload-time = "2025-11-19T15:18:16.145Z" }, - { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload-time = "2025-11-19T15:18:21.11Z" }, - { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload-time = "2025-11-19T15:18:25.719Z" }, - { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload-time = "2025-11-19T15:18:33.023Z" }, - { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload-time = "2025-11-19T15:18:31.075Z" }, - { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload-time = "2025-11-19T15:18:37.211Z" }, - { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload-time = "2025-11-19T15:18:38.689Z" }, - { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload-time = "2025-11-19T15:18:40.162Z" }, - { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" }, -] - -[package.optional-dependencies] -torch = [ - { name = "numpy" }, - { name = "packaging" }, - { name = "torch", version = "2.10.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, -] - -[[package]] -name = "scipy" -version = "1.17.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/56/3e/9cca699f3486ce6bc12ff46dc2031f1ec8eb9ccc9a320fdaf925f1417426/scipy-1.17.0.tar.gz", hash = "sha256:2591060c8e648d8b96439e111ac41fd8342fdeff1876be2e19dea3fe8930454e", size = 30396830, upload-time = "2026-01-10T21:34:23.009Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/4b/c89c131aa87cad2b77a54eb0fb94d633a842420fa7e919dc2f922037c3d8/scipy-1.17.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:2abd71643797bd8a106dff97894ff7869eeeb0af0f7a5ce02e4227c6a2e9d6fd", size = 31381316, upload-time = "2026-01-10T21:24:33.42Z" }, - { url = "https://files.pythonhosted.org/packages/5e/5f/a6b38f79a07d74989224d5f11b55267714707582908a5f1ae854cf9a9b84/scipy-1.17.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:ef28d815f4d2686503e5f4f00edc387ae58dfd7a2f42e348bb53359538f01558", size = 27966760, upload-time = "2026-01-10T21:24:38.911Z" }, - { url = "https://files.pythonhosted.org/packages/c1/20/095ad24e031ee8ed3c5975954d816b8e7e2abd731e04f8be573de8740885/scipy-1.17.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:272a9f16d6bb4667e8b50d25d71eddcc2158a214df1b566319298de0939d2ab7", size = 20138701, upload-time = "2026-01-10T21:24:43.249Z" }, - { url = "https://files.pythonhosted.org/packages/89/11/4aad2b3858d0337756f3323f8960755704e530b27eb2a94386c970c32cbe/scipy-1.17.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:7204fddcbec2fe6598f1c5fdf027e9f259106d05202a959a9f1aecf036adc9f6", size = 22480574, upload-time = "2026-01-10T21:24:47.266Z" }, - { url = "https://files.pythonhosted.org/packages/85/bd/f5af70c28c6da2227e510875cadf64879855193a687fb19951f0f44cfd6b/scipy-1.17.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fc02c37a5639ee67d8fb646ffded6d793c06c5622d36b35cfa8fe5ececb8f042", size = 32862414, upload-time = "2026-01-10T21:24:52.566Z" }, - { url = "https://files.pythonhosted.org/packages/ef/df/df1457c4df3826e908879fe3d76bc5b6e60aae45f4ee42539512438cfd5d/scipy-1.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dac97a27520d66c12a34fd90a4fe65f43766c18c0d6e1c0a80f114d2260080e4", size = 35112380, upload-time = "2026-01-10T21:24:58.433Z" }, - { url = "https://files.pythonhosted.org/packages/5f/bb/88e2c16bd1dd4de19d80d7c5e238387182993c2fb13b4b8111e3927ad422/scipy-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ebb7446a39b3ae0fe8f416a9a3fdc6fba3f11c634f680f16a239c5187bc487c0", size = 34922676, upload-time = "2026-01-10T21:25:04.287Z" }, - { url = "https://files.pythonhosted.org/packages/02/ba/5120242cc735f71fc002cff0303d536af4405eb265f7c60742851e7ccfe9/scipy-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:474da16199f6af66601a01546144922ce402cb17362e07d82f5a6cf8f963e449", size = 37507599, upload-time = "2026-01-10T21:25:09.851Z" }, -] - -[[package]] -name = "semchunk" -version = "2.2.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "mpire", extra = ["dill"] }, - { name = "tqdm" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/62/96/c418c322730b385e81d4ab462e68dd48bb2dbda4d8efa17cad2ca468d9ac/semchunk-2.2.2.tar.gz", hash = "sha256:940e89896e64eeb01de97ba60f51c8c7b96c6a3951dfcf574f25ce2146752f52", size = 12271, upload-time = "2024-12-17T22:54:30.332Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/76/84/94ca7896c7df20032bcb09973e9a4d14c222507c0aadf22e89fa76bb0a04/semchunk-2.2.2-py3-none-any.whl", hash = "sha256:94ca19020c013c073abdfd06d79a7c13637b91738335f3b8cdb5655ee7cc94d2", size = 10271, upload-time = "2024-12-17T22:54:27.689Z" }, -] - [[package]] name = "sentry-sdk" version = "2.34.1" @@ -2482,23 +1870,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/b8/f1f62a5e3c0ad2ff1d189590bfa4c46b4f3b6e49cef6f26c6ee4e575394d/setuptools-80.10.2-py3-none-any.whl", hash = "sha256:95b30ddfb717250edb492926c92b5221f7ef3fbcc2b07579bcd4a27da21d0173", size = 1064234, upload-time = "2026-01-25T22:38:15.216Z" }, ] -[[package]] -name = "shapely" -version = "2.1.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/4d/bc/0989043118a27cccb4e906a46b7565ce36ca7b57f5a18b78f4f1b0f72d9d/shapely-2.1.2.tar.gz", hash = "sha256:2ed4ecb28320a433db18a5bf029986aa8afcfd740745e78847e330d5d94922a9", size = 315489, upload-time = "2025-09-24T13:51:41.432Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/8d/1ff672dea9ec6a7b5d422eb6d095ed886e2e523733329f75fdcb14ee1149/shapely-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:91121757b0a36c9aac3427a651a7e6567110a4a67c97edf04f8d55d4765f6618", size = 1820038, upload-time = "2025-09-24T13:50:15.628Z" }, - { url = "https://files.pythonhosted.org/packages/4f/ce/28fab8c772ce5db23a0d86bf0adaee0c4c79d5ad1db766055fa3dab442e2/shapely-2.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:16a9c722ba774cf50b5d4541242b4cce05aafd44a015290c82ba8a16931ff63d", size = 1626039, upload-time = "2025-09-24T13:50:16.881Z" }, - { url = "https://files.pythonhosted.org/packages/70/8b/868b7e3f4982f5006e9395c1e12343c66a8155c0374fdc07c0e6a1ab547d/shapely-2.1.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cc4f7397459b12c0b196c9efe1f9d7e92463cbba142632b4cc6d8bbbbd3e2b09", size = 3001519, upload-time = "2025-09-24T13:50:18.606Z" }, - { url = "https://files.pythonhosted.org/packages/13/02/58b0b8d9c17c93ab6340edd8b7308c0c5a5b81f94ce65705819b7416dba5/shapely-2.1.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:136ab87b17e733e22f0961504d05e77e7be8c9b5a8184f685b4a91a84efe3c26", size = 3110842, upload-time = "2025-09-24T13:50:21.77Z" }, - { url = "https://files.pythonhosted.org/packages/af/61/8e389c97994d5f331dcffb25e2fa761aeedfb52b3ad9bcdd7b8671f4810a/shapely-2.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:16c5d0fc45d3aa0a69074979f4f1928ca2734fb2e0dde8af9611e134e46774e7", size = 4021316, upload-time = "2025-09-24T13:50:23.626Z" }, - { url = "https://files.pythonhosted.org/packages/d3/d4/9b2a9fe6039f9e42ccf2cb3e84f219fd8364b0c3b8e7bbc857b5fbe9c14c/shapely-2.1.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ddc759f72b5b2b0f54a7e7cde44acef680a55019eb52ac63a7af2cf17cb9cd2", size = 4178586, upload-time = "2025-09-24T13:50:25.443Z" }, -] - [[package]] name = "shellingham" version = "1.5.4" @@ -2526,15 +1897,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] -[[package]] -name = "soupsieve" -version = "2.8.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7b/ae/2d9c981590ed9999a0d91755b47fc74f74de286b0f5cee14c9269041e6c4/soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349", size = 118627, upload-time = "2026-01-20T04:27:02.457Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" }, -] - [[package]] name = "sqlalchemy" version = "2.0.42" @@ -2606,15 +1968,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5" }, ] -[[package]] -name = "tabulate" -version = "0.9.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090, upload-time = "2022-10-06T17:21:48.54Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, -] - [[package]] name = "tenacity" version = "8.5.0" @@ -2650,84 +2003,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" }, ] -[[package]] -name = "torch" -version = "2.10.0" -source = { registry = "https://download.pytorch.org/whl/cpu" } -resolution-markers = [ - "sys_platform == 'darwin'", -] -dependencies = [ - { name = "filelock", marker = "sys_platform == 'darwin'" }, - { name = "fsspec", marker = "sys_platform == 'darwin'" }, - { name = "jinja2", marker = "sys_platform == 'darwin'" }, - { name = "networkx", marker = "sys_platform == 'darwin'" }, - { name = "sympy", marker = "sys_platform == 'darwin'" }, - { name = "typing-extensions", marker = "sys_platform == 'darwin'" }, -] -wheels = [ - { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:0826ac8e409551e12b2360ac18b4161a838cbd111933e694752f351191331d09" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:90821a3194b8806d9fa9fdaa9308c1bc73df0c26808274b14129a97c99f35794" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:4584ab167995c0479f6821e3dceaf199c8166c811d3adbba5d8eedbbfa6764fd" }, -] - -[[package]] -name = "torch" -version = "2.10.0+cpu" -source = { registry = "https://download.pytorch.org/whl/cpu" } -resolution-markers = [ - "platform_machine == 'aarch64' and sys_platform == 'linux'", - "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", -] -dependencies = [ - { name = "filelock", marker = "sys_platform != 'darwin'" }, - { name = "fsspec", marker = "sys_platform != 'darwin'" }, - { name = "jinja2", marker = "sys_platform != 'darwin'" }, - { name = "networkx", marker = "sys_platform != 'darwin'" }, - { name = "sympy", marker = "sys_platform != 'darwin'" }, - { name = "typing-extensions", marker = "sys_platform != 'darwin'" }, -] -wheels = [ - { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-linux_aarch64.whl", hash = "sha256:ce5c113d1f55f8c1f5af05047a24e50d11d293e0cbbb5bf7a75c6c761edd6eaa" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-linux_s390x.whl", hash = "sha256:0e286fcf6ce0cc7b204396c9b4ea0d375f1f0c3e752f68ce3d3aeb265511db8c" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:1cfcb9b1558c6e52dffd0d4effce83b13c5ae5d97338164c372048c21f9cfccb" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.10.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:b7cb1ec66cefb90fd7b676eac72cfda3b8d4e4d0cacd7a531963bc2e0a9710ab" }, -] - -[[package]] -name = "torchvision" -version = "0.25.0" -source = { registry = "https://download.pytorch.org/whl/cpu" } -resolution-markers = [ - "sys_platform == 'darwin'", -] -dependencies = [ - { name = "numpy", marker = "sys_platform == 'darwin'" }, - { name = "pillow", marker = "sys_platform == 'darwin'" }, - { name = "torch", version = "2.10.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, -] -wheels = [ - { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a76ce7b8d4fce291a25721ee2f921c783acc6dbd4fc32dc741ed2a1d5a8dde2f" }, -] - -[[package]] -name = "torchvision" -version = "0.25.0+cpu" -source = { registry = "https://download.pytorch.org/whl/cpu" } -resolution-markers = [ - "platform_machine == 'aarch64' and sys_platform == 'linux'", - "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", -] -dependencies = [ - { name = "numpy", marker = "sys_platform != 'darwin'" }, - { name = "pillow", marker = "sys_platform != 'darwin'" }, - { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, -] -wheels = [ - { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:59be99d1c470ef470b134468aa6afa6f968081a503acb4ee883d70332f822e35" }, - { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:aa016ab73e06a886f72edc8929ed2ed4c85aaaa6e10500ecdef921b03129b19e" }, -] - [[package]] name = "tqdm" version = "4.67.1" @@ -2740,94 +2015,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, ] -[[package]] -name = "transformers" -version = "4.57.6" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "filelock" }, - { name = "huggingface-hub" }, - { name = "numpy" }, - { name = "packaging" }, - { name = "pyyaml" }, - { name = "regex" }, - { name = "requests" }, - { name = "safetensors" }, - { name = "tokenizers" }, - { name = "tqdm" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c4/35/67252acc1b929dc88b6602e8c4a982e64f31e733b804c14bc24b47da35e6/transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3", size = 10134912, upload-time = "2026-01-16T10:38:39.284Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498, upload-time = "2026-01-16T10:38:31.289Z" }, -] - -[[package]] -name = "tree-sitter" -version = "0.25.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/66/7c/0350cfc47faadc0d3cf7d8237a4e34032b3014ddf4a12ded9933e1648b55/tree-sitter-0.25.2.tar.gz", hash = "sha256:fe43c158555da46723b28b52e058ad444195afd1db3ca7720c59a254544e9c20", size = 177961, upload-time = "2025-09-25T17:37:59.751Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7c/22/88a1e00b906d26fa8a075dd19c6c3116997cb884bf1b3c023deb065a344d/tree_sitter-0.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8ca72d841215b6573ed0655b3a5cd1133f9b69a6fa561aecad40dca9029d75b", size = 146752, upload-time = "2025-09-25T17:37:24.775Z" }, - { url = "https://files.pythonhosted.org/packages/57/1c/22cc14f3910017b7a76d7358df5cd315a84fe0c7f6f7b443b49db2e2790d/tree_sitter-0.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc0351cfe5022cec5a77645f647f92a936b38850346ed3f6d6babfbeeeca4d26", size = 137765, upload-time = "2025-09-25T17:37:26.103Z" }, - { url = "https://files.pythonhosted.org/packages/1c/0c/d0de46ded7d5b34631e0f630d9866dab22d3183195bf0f3b81de406d6622/tree_sitter-0.25.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1799609636c0193e16c38f366bda5af15b1ce476df79ddaae7dd274df9e44266", size = 604643, upload-time = "2025-09-25T17:37:27.398Z" }, - { url = "https://files.pythonhosted.org/packages/34/38/b735a58c1c2f60a168a678ca27b4c1a9df725d0bf2d1a8a1c571c033111e/tree_sitter-0.25.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e65ae456ad0d210ee71a89ee112ac7e72e6c2e5aac1b95846ecc7afa68a194c", size = 632229, upload-time = "2025-09-25T17:37:28.463Z" }, - { url = "https://files.pythonhosted.org/packages/32/f6/cda1e1e6cbff5e28d8433578e2556d7ba0b0209d95a796128155b97e7693/tree_sitter-0.25.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:49ee3c348caa459244ec437ccc7ff3831f35977d143f65311572b8ba0a5f265f", size = 629861, upload-time = "2025-09-25T17:37:29.593Z" }, -] - -[[package]] -name = "tree-sitter-c" -version = "0.24.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f1/f5/ba8cd08d717277551ade8537d3aa2a94b907c6c6e0fbcf4e4d8b1c747fa3/tree_sitter_c-0.24.1.tar.gz", hash = "sha256:7d2d0cda0b8dda428c81440c1e94367f9f13548eedca3f49768bde66b1422ad6", size = 228014, upload-time = "2025-05-24T17:32:58.384Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/15/c7/c817be36306e457c2d36cc324789046390d9d8c555c38772429ffdb7d361/tree_sitter_c-0.24.1-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9c06ac26a1efdcc8b26a8a6970fbc6997c4071857359e5837d4c42892d45fe1e", size = 80940, upload-time = "2025-05-24T17:32:49.967Z" }, - { url = "https://files.pythonhosted.org/packages/7a/42/283909467290b24fdbc29bb32ee20e409a19a55002b43175d66d091ca1a4/tree_sitter_c-0.24.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:942bcd7cbecd810dcf7ca6f8f834391ebf0771a89479646d891ba4ca2fdfdc88", size = 86304, upload-time = "2025-05-24T17:32:51.271Z" }, - { url = "https://files.pythonhosted.org/packages/94/53/fb4f61d4e5f15ec3da85774a4df8e58d3b5b73036cf167f0203b4dd9d158/tree_sitter_c-0.24.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a74cfd7a11ca5a961fafd4d751892ee65acae667d2818968a6f079397d8d28c", size = 109996, upload-time = "2025-05-24T17:32:52.119Z" }, - { url = "https://files.pythonhosted.org/packages/5e/e8/fc541d34ee81c386c5453c2596c1763e8e9cd7cb0725f39d7dfa2276afa4/tree_sitter_c-0.24.1-cp310-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6a807705a3978911dc7ee26a7ad36dcfacb6adfc13c190d496660ec9bd66707", size = 98137, upload-time = "2025-05-24T17:32:53.361Z" }, - { url = "https://files.pythonhosted.org/packages/32/c6/d0563319cae0d5b5780a92e2806074b24afea2a07aa4c10599b899bda3ec/tree_sitter_c-0.24.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:789781afcb710df34144f7e2a20cd80e325114b9119e3956c6bd1dd2d365df98", size = 94148, upload-time = "2025-05-24T17:32:54.855Z" }, -] - -[[package]] -name = "tree-sitter-javascript" -version = "0.25.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/59/e0/e63103c72a9d3dfd89a31e02e660263ad84b7438e5f44ee82e443e65bbde/tree_sitter_javascript-0.25.0.tar.gz", hash = "sha256:329b5414874f0588a98f1c291f1b28138286617aa907746ffe55adfdcf963f38", size = 132338, upload-time = "2025-09-01T07:13:44.792Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/df/5106ac250cd03661ebc3cc75da6b3d9f6800a3606393a0122eca58038104/tree_sitter_javascript-0.25.0-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b70f887fb269d6e58c349d683f59fa647140c410cfe2bee44a883b20ec92e3dc", size = 64052, upload-time = "2025-09-01T07:13:36.865Z" }, - { url = "https://files.pythonhosted.org/packages/b1/8f/6b4b2bc90d8ab3955856ce852cc9d1e82c81d7ab9646385f0e75ffd5b5d3/tree_sitter_javascript-0.25.0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:8264a996b8845cfce06965152a013b5d9cbb7d199bc3503e12b5682e62bb1de1", size = 66440, upload-time = "2025-09-01T07:13:37.962Z" }, - { url = "https://files.pythonhosted.org/packages/5f/c4/7da74ecdcd8a398f88bd003a87c65403b5fe0e958cdd43fbd5fd4a398fcf/tree_sitter_javascript-0.25.0-cp310-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9dc04ba91fc8583344e57c1f1ed5b2c97ecaaf47480011b92fbeab8dda96db75", size = 99728, upload-time = "2025-09-01T07:13:38.755Z" }, - { url = "https://files.pythonhosted.org/packages/96/c8/97da3af4796495e46421e9344738addb3602fa6426ea695be3fcbadbee37/tree_sitter_javascript-0.25.0-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:199d09985190852e0912da2b8d26c932159be314bc04952cf917ed0e4c633e6b", size = 106072, upload-time = "2025-09-01T07:13:39.798Z" }, - { url = "https://files.pythonhosted.org/packages/13/be/c964e8130be08cc9bd6627d845f0e4460945b158429d39510953bbcb8fcc/tree_sitter_javascript-0.25.0-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:dfcf789064c58dc13c0a4edb550acacfc6f0f280577f1e7a00de3e89fc7f8ddc", size = 104388, upload-time = "2025-09-01T07:13:40.866Z" }, - { url = "https://files.pythonhosted.org/packages/ee/89/9b773dee0f8961d1bb8d7baf0a204ab587618df19897c1ef260916f318ec/tree_sitter_javascript-0.25.0-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1b852d3aee8a36186dbcc32c798b11b4869f9b5041743b63b65c2ef793db7a54", size = 98377, upload-time = "2025-09-01T07:13:41.838Z" }, -] - -[[package]] -name = "tree-sitter-python" -version = "0.25.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b8/8b/c992ff0e768cb6768d5c96234579bf8842b3a633db641455d86dd30d5dac/tree_sitter_python-0.25.0.tar.gz", hash = "sha256:b13e090f725f5b9c86aa455a268553c65cadf325471ad5b65cd29cac8a1a68ac", size = 159845, upload-time = "2025-09-11T06:47:58.159Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cf/64/a4e503c78a4eb3ac46d8e72a29c1b1237fa85238d8e972b063e0751f5a94/tree_sitter_python-0.25.0-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:14a79a47ddef72f987d5a2c122d148a812169d7484ff5c75a3db9609d419f361", size = 73790, upload-time = "2025-09-11T06:47:47.652Z" }, - { url = "https://files.pythonhosted.org/packages/e6/1d/60d8c2a0cc63d6ec4ba4e99ce61b802d2e39ef9db799bdf2a8f932a6cd4b/tree_sitter_python-0.25.0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:480c21dbd995b7fe44813e741d71fed10ba695e7caab627fb034e3828469d762", size = 76691, upload-time = "2025-09-11T06:47:49.038Z" }, - { url = "https://files.pythonhosted.org/packages/aa/cb/d9b0b67d037922d60cbe0359e0c86457c2da721bc714381a63e2c8e35eba/tree_sitter_python-0.25.0-cp310-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:86f118e5eecad616ecdb81d171a36dde9bef5a0b21ed71ea9c3e390813c3baf5", size = 108133, upload-time = "2025-09-11T06:47:50.499Z" }, - { url = "https://files.pythonhosted.org/packages/40/bd/bf4787f57e6b2860f3f1c8c62f045b39fb32d6bac4b53d7a9e66de968440/tree_sitter_python-0.25.0-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be71650ca2b93b6e9649e5d65c6811aad87a7614c8c1003246b303f6b150f61b", size = 110603, upload-time = "2025-09-11T06:47:51.985Z" }, - { url = "https://files.pythonhosted.org/packages/5d/25/feff09f5c2f32484fbce15db8b49455c7572346ce61a699a41972dea7318/tree_sitter_python-0.25.0-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e6d5b5799628cc0f24691ab2a172a8e676f668fe90dc60468bee14084a35c16d", size = 108998, upload-time = "2025-09-11T06:47:53.046Z" }, - { url = "https://files.pythonhosted.org/packages/75/69/4946da3d6c0df316ccb938316ce007fb565d08f89d02d854f2d308f0309f/tree_sitter_python-0.25.0-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:71959832fc5d9642e52c11f2f7d79ae520b461e63334927e93ca46cd61cd9683", size = 107268, upload-time = "2025-09-11T06:47:54.388Z" }, -] - -[[package]] -name = "tree-sitter-typescript" -version = "0.23.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1e/fc/bb52958f7e399250aee093751e9373a6311cadbe76b6e0d109b853757f35/tree_sitter_typescript-0.23.2.tar.gz", hash = "sha256:7b167b5827c882261cb7a50dfa0fb567975f9b315e87ed87ad0a0a3aedb3834d", size = 773053, upload-time = "2024-11-11T02:36:11.396Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/28/95/4c00680866280e008e81dd621fd4d3f54aa3dad1b76b857a19da1b2cc426/tree_sitter_typescript-0.23.2-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:3cd752d70d8e5371fdac6a9a4df9d8924b63b6998d268586f7d374c9fba2a478", size = 286677, upload-time = "2024-11-11T02:35:58.839Z" }, - { url = "https://files.pythonhosted.org/packages/8f/2f/1f36fda564518d84593f2740d5905ac127d590baf5c5753cef2a88a89c15/tree_sitter_typescript-0.23.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:c7cc1b0ff5d91bac863b0e38b1578d5505e718156c9db577c8baea2557f66de8", size = 302008, upload-time = "2024-11-11T02:36:00.733Z" }, - { url = "https://files.pythonhosted.org/packages/96/2d/975c2dad292aa9994f982eb0b69cc6fda0223e4b6c4ea714550477d8ec3a/tree_sitter_typescript-0.23.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b1eed5b0b3a8134e86126b00b743d667ec27c63fc9de1b7bb23168803879e31", size = 351987, upload-time = "2024-11-11T02:36:02.669Z" }, - { url = "https://files.pythonhosted.org/packages/49/d1/a71c36da6e2b8a4ed5e2970819b86ef13ba77ac40d9e333cb17df6a2c5db/tree_sitter_typescript-0.23.2-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e96d36b85bcacdeb8ff5c2618d75593ef12ebaf1b4eace3477e2bdb2abb1752c", size = 344960, upload-time = "2024-11-11T02:36:04.443Z" }, - { url = "https://files.pythonhosted.org/packages/7f/cb/f57b149d7beed1a85b8266d0c60ebe4c46e79c9ba56bc17b898e17daf88e/tree_sitter_typescript-0.23.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8d4f0f9bcb61ad7b7509d49a1565ff2cc363863644a234e1e0fe10960e55aea0", size = 340245, upload-time = "2024-11-11T02:36:06.473Z" }, -] - [[package]] name = "typer" version = "0.16.0" @@ -2864,15 +2051,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" }, ] -[[package]] -name = "tzdata" -version = "2025.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5e/a7/c202b344c5ca7daf398f3b8a477eeb205cf3b6f32e7ec3a6bac0629ca975/tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7", size = 196772, upload-time = "2025-12-13T17:45:35.667Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" }, -] - [[package]] name = "urllib3" version = "2.5.0" diff --git a/electron/servers/nextjs/app/(presentation-generator)/services/api/presentation-generation.ts b/electron/servers/nextjs/app/(presentation-generator)/services/api/presentation-generation.ts index b6eea9b6..675c1fcf 100644 --- a/electron/servers/nextjs/app/(presentation-generator)/services/api/presentation-generation.ts +++ b/electron/servers/nextjs/app/(presentation-generator)/services/api/presentation-generation.ts @@ -29,7 +29,10 @@ export class PresentationGenerationApi { } } - static async decomposeDocuments(documentKeys: string[]) { + static async decomposeDocuments( + documentKeys: string[], + language?: string | null + ) { try { const response = await fetch( getApiUrl(`/api/v1/ppt/files/decompose`), @@ -38,6 +41,7 @@ export class PresentationGenerationApi { headers: getHeader(), body: JSON.stringify({ file_paths: documentKeys, + language: language ?? null, }), cache: "no-cache", } diff --git a/electron/servers/nextjs/app/(presentation-generator)/upload/components/UploadPage.tsx b/electron/servers/nextjs/app/(presentation-generator)/upload/components/UploadPage.tsx index 2dd3012a..44ccf537 100644 --- a/electron/servers/nextjs/app/(presentation-generator)/upload/components/UploadPage.tsx +++ b/electron/servers/nextjs/app/(presentation-generator)/upload/components/UploadPage.tsx @@ -132,7 +132,12 @@ const UploadPage = () => { if (documents.length > 0) { trackEvent(MixpanelEvent.Upload_Decompose_Documents_API_Call); - promises.push(PresentationGenerationApi.decomposeDocuments(documents)); + promises.push( + PresentationGenerationApi.decomposeDocuments( + documents, + config?.language ?? null + ) + ); } const responses = await Promise.all(promises); dispatch(setPptGenUploadState({ From 6a6afc7ab79df4112315912d524034f81ee95999 Mon Sep 17 00:00:00 2001 From: sudipnext Date: Sun, 29 Mar 2026 18:58:16 +0545 Subject: [PATCH 2/3] feat: enhance document handling with new conversion service and MIME type support --- electron/app/utils/liteparse-check.ts | 26 +-- .../document-extraction/liteparse_runner.mjs | 2 +- .../document-extraction/package.json | 8 - .../api/v1/ppt/endpoints/pptx_slides.py | 6 +- .../servers/fastapi/constants/documents.py | 93 ++++++++-- .../services/document_conversion_service.py | 165 ++++++++++++++++++ .../fastapi/services/documents_loader.py | 87 ++++++--- electron/servers/fastapi/utils/validators.py | 22 ++- .../upload/components/SupportingDoc.tsx | 57 +++--- 9 files changed, 369 insertions(+), 97 deletions(-) delete mode 100644 electron/resources/document-extraction/package.json create mode 100644 electron/servers/fastapi/services/document_conversion_service.py diff --git a/electron/app/utils/liteparse-check.ts b/electron/app/utils/liteparse-check.ts index 8384dd9c..3c42a74b 100644 --- a/electron/app/utils/liteparse-check.ts +++ b/electron/app/utils/liteparse-check.ts @@ -1,28 +1,6 @@ -import fs from "fs"; import path from "path"; -import { spawnSync } from "child_process"; -import { baseDir, isDev } from "./constants"; +import { baseDir } from "./constants"; export function getLiteParseRunnerPath(): string { - return isDev - ? path.join(baseDir, "resources", "document-extraction", "liteparse_runner.mjs") - : path.join(baseDir, "resources", "document-extraction", "liteparse_runner.mjs"); -} - -export function getLiteParseDependencyPath(): string { - return path.join(baseDir, "node_modules", "@llamaindex", "liteparse"); -} - -export function isLiteParseInstalled(): boolean { - const runnerPath = getLiteParseRunnerPath(); - const liteparsePackagePath = getLiteParseDependencyPath(); - - if (!fs.existsSync(runnerPath)) return false; - if (!fs.existsSync(liteparsePackagePath)) return false; - - const nodeCheck = spawnSync("node", ["--version"], { - stdio: "pipe", - windowsHide: true, - }); - return nodeCheck.status === 0; + return path.join(baseDir, "resources", "document-extraction", "liteparse_runner.mjs"); } diff --git a/electron/resources/document-extraction/liteparse_runner.mjs b/electron/resources/document-extraction/liteparse_runner.mjs index d8250744..6793610a 100644 --- a/electron/resources/document-extraction/liteparse_runner.mjs +++ b/electron/resources/document-extraction/liteparse_runner.mjs @@ -70,7 +70,7 @@ const ocrEnabled = parseBool(readArg("--ocr-enabled"), true); const dpi = toNumber(readArg("--dpi"), 150, 72, 600); const numWorkers = toNumber( readArg("--num-workers"), - Math.max(os.cpus().length - 1, 1), + Math.max(os.cpus().length - 4, 1), 1, 64 ); diff --git a/electron/resources/document-extraction/package.json b/electron/resources/document-extraction/package.json deleted file mode 100644 index 89e4c182..00000000 --- a/electron/resources/document-extraction/package.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "name": "presenton-document-extraction", - "private": true, - "type": "module", - "dependencies": { - "@llamaindex/liteparse": "^1.4.0" - } -} diff --git a/electron/servers/fastapi/api/v1/ppt/endpoints/pptx_slides.py b/electron/servers/fastapi/api/v1/ppt/endpoints/pptx_slides.py index fd81bfcd..65200b7f 100644 --- a/electron/servers/fastapi/api/v1/ppt/endpoints/pptx_slides.py +++ b/electron/servers/fastapi/api/v1/ppt/endpoints/pptx_slides.py @@ -15,7 +15,7 @@ import re from services.documents_loader import DocumentsLoader from utils.asset_directory_utils import get_images_directory import uuid -from constants.documents import POWERPOINT_TYPES +from constants.documents import PPTX_MIME_TYPES def _get_soffice_binary() -> str: @@ -330,7 +330,7 @@ async def process_pptx_slides( """ # Validate PPTX file - if pptx_file.content_type not in POWERPOINT_TYPES: + if pptx_file.content_type not in PPTX_MIME_TYPES: raise HTTPException( status_code=400, detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}", @@ -441,7 +441,7 @@ async def process_pptx_fonts( Uses the exact same font extraction and analysis utilities as the /pptx-slides endpoint. """ # Validate PPTX file - if pptx_file.content_type not in POWERPOINT_TYPES: + if pptx_file.content_type not in PPTX_MIME_TYPES: raise HTTPException( status_code=400, detail=f"Invalid file type. Expected PPTX file, got {pptx_file.content_type}", diff --git a/electron/servers/fastapi/constants/documents.py b/electron/servers/fastapi/constants/documents.py index 2bb4c3ed..0b87a29b 100644 --- a/electron/servers/fastapi/constants/documents.py +++ b/electron/servers/fastapi/constants/documents.py @@ -1,21 +1,90 @@ +PDF_EXTENSIONS = [".pdf"] +TEXT_EXTENSIONS = [".txt"] + +WORD_EXTENSIONS = [".doc", ".docx", ".docm", ".odt", ".rtf"] +POWERPOINT_EXTENSIONS = [".ppt", ".pptx", ".pptm", ".odp"] +SPREADSHEET_EXTENSIONS = [".xls", ".xlsx", ".xlsm", ".ods", ".csv", ".tsv"] + +JPEG_EXTENSIONS = [".jpg", ".jpeg"] +PNG_EXTENSIONS = [".png"] +GIF_EXTENSIONS = [".gif"] +BMP_EXTENSIONS = [".bmp"] +TIFF_EXTENSIONS = [".tiff", ".tif"] +WEBP_EXTENSIONS = [".webp"] +SVG_EXTENSIONS = [".svg"] +IMAGE_EXTENSIONS = ( + JPEG_EXTENSIONS + + PNG_EXTENSIONS + + GIF_EXTENSIONS + + BMP_EXTENSIONS + + TIFF_EXTENSIONS + + WEBP_EXTENSIONS + + SVG_EXTENSIONS +) + +OFFICE_EXTENSIONS = WORD_EXTENSIONS + POWERPOINT_EXTENSIONS + SPREADSHEET_EXTENSIONS + PDF_MIME_TYPES = ["application/pdf"] -TEXT_MIME_TYPES = ["text/plain"] -POWERPOINT_TYPES = [ - "application/vnd.openxmlformats-officedocument.presentationml.presentation" -] -WORD_TYPES = [ +TEXT_MIME_TYPES = ["text/plain", "text/markdown"] + +WORD_MIME_TYPES = [ "application/msword", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.ms-word.document.macroenabled.12", + "application/vnd.oasis.opendocument.text", + "application/rtf", + "text/rtf", ] -SPREADSHEET_TYPES = ["text/csv", "application/csv"] +POWERPOINT_MIME_TYPES = [ + "application/vnd.ms-powerpoint", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/vnd.ms-powerpoint.presentation.macroenabled.12", + "application/vnd.oasis.opendocument.presentation", +] -PNG_MIME_TYPES = ["image/png"] -JPEG_MIME_TYPES = ["image/jpeg"] -WEBP_MIME_TYPES = ["image/webp"] -IMAGE_MIME_TYPES = PNG_MIME_TYPES + JPEG_MIME_TYPES + WEBP_MIME_TYPES +SPREADSHEET_MIME_TYPES = [ + "application/vnd.ms-excel", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.ms-excel.sheet.macroenabled.12", + "application/vnd.oasis.opendocument.spreadsheet", + "text/csv", + "application/csv", + "text/tab-separated-values", + "text/tsv", +] +IMAGE_MIME_TYPES = [ + "image/jpeg", + "image/png", + "image/gif", + "image/bmp", + "image/tiff", + "image/webp", + "image/svg+xml", +] -UPLOAD_ACCEPTED_FILE_TYPES = ( - PDF_MIME_TYPES + TEXT_MIME_TYPES + POWERPOINT_TYPES + WORD_TYPES + IMAGE_MIME_TYPES +UPLOAD_ACCEPTED_MIME_TYPES = ( + PDF_MIME_TYPES + + TEXT_MIME_TYPES + + WORD_MIME_TYPES + + POWERPOINT_MIME_TYPES + + SPREADSHEET_MIME_TYPES + + IMAGE_MIME_TYPES ) + +UPLOAD_ACCEPTED_EXTENSIONS = ( + PDF_EXTENSIONS + TEXT_EXTENSIONS + OFFICE_EXTENSIONS + IMAGE_EXTENSIONS +) + +# Includes both MIME types and extensions because some clients upload legacy +# office files with generic content-type values. +UPLOAD_ACCEPTED_FILE_TYPES = UPLOAD_ACCEPTED_MIME_TYPES + UPLOAD_ACCEPTED_EXTENSIONS + +# Kept for endpoints that strictly require modern .pptx files. +PPTX_MIME_TYPES = ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] + +# Backward compatibility aliases used across existing modules. +POWERPOINT_TYPES = PPTX_MIME_TYPES +WORD_TYPES = WORD_MIME_TYPES +SPREADSHEET_TYPES = SPREADSHEET_MIME_TYPES diff --git a/electron/servers/fastapi/services/document_conversion_service.py b/electron/servers/fastapi/services/document_conversion_service.py new file mode 100644 index 00000000..497f12ec --- /dev/null +++ b/electron/servers/fastapi/services/document_conversion_service.py @@ -0,0 +1,165 @@ +import os +import subprocess +from pathlib import Path +from typing import Dict, List + + +class DocumentConversionError(Exception): + pass + + +def _windows_hidden_subprocess_kwargs() -> Dict[str, object]: + if os.name != "nt": + return {} + + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + return { + "creationflags": getattr(subprocess, "CREATE_NO_WINDOW", 0), + "startupinfo": startupinfo, + } + + +class DocumentConversionService: + def __init__(self): + self.soffice_binary = self._resolve_soffice_binary() + self.imagemagick_binary = self._resolve_imagemagick_binary() + + @staticmethod + def _resolve_soffice_binary() -> str: + configured = (os.getenv("SOFFICE_PATH") or "").strip() + if configured: + return configured + return "soffice.exe" if os.name == "nt" else "soffice" + + @staticmethod + def _can_execute(command: str, args: List[str]) -> bool: + try: + result = subprocess.run( + [command, *args], + capture_output=True, + text=True, + timeout=10, + check=False, + **_windows_hidden_subprocess_kwargs(), + ) + return result.returncode == 0 + except Exception: + return False + + def _resolve_imagemagick_binary(self) -> str: + configured = (os.getenv("IMAGEMAGICK_BINARY") or "").strip() + if configured: + return configured + + for candidate in ["magick", "convert"]: + if self._can_execute(candidate, ["-version"]): + return candidate + + return "magick" if os.name == "nt" else "convert" + + def convert_office_to_pdf( + self, + file_path: str, + output_dir: str, + timeout_seconds: int = 180, + ) -> str: + Path(output_dir).mkdir(parents=True, exist_ok=True) + + existing_pdfs = { + p.name for p in Path(output_dir).glob("*.pdf") if p.is_file() + } + + try: + subprocess.run( + [ + self.soffice_binary, + "--headless", + "--convert-to", + "pdf", + "--outdir", + output_dir, + file_path, + ], + check=True, + capture_output=True, + text=True, + timeout=timeout_seconds, + **_windows_hidden_subprocess_kwargs(), + ) + except subprocess.TimeoutExpired as exc: + raise DocumentConversionError( + f"LibreOffice conversion timed out for {os.path.basename(file_path)}" + ) from exc + except subprocess.CalledProcessError as exc: + stderr = (exc.stderr or "").strip() + stdout = (exc.stdout or "").strip() + details = stderr or stdout or str(exc) + raise DocumentConversionError( + f"LibreOffice conversion failed for {os.path.basename(file_path)}: {details}" + ) from exc + except Exception as exc: + raise DocumentConversionError( + f"LibreOffice conversion failed for {os.path.basename(file_path)}: {exc}" + ) from exc + + expected_pdf = Path(output_dir) / f"{Path(file_path).stem}.pdf" + if expected_pdf.is_file(): + return str(expected_pdf) + + generated_pdfs = [ + p + for p in Path(output_dir).glob("*.pdf") + if p.is_file() and p.name not in existing_pdfs + ] + if generated_pdfs: + newest = max(generated_pdfs, key=lambda p: p.stat().st_mtime) + return str(newest) + + raise DocumentConversionError( + f"LibreOffice did not create a PDF for {os.path.basename(file_path)}" + ) + + def convert_image_to_png( + self, + file_path: str, + output_dir: str, + timeout_seconds: int = 120, + ) -> str: + Path(output_dir).mkdir(parents=True, exist_ok=True) + + output_path = Path(output_dir) / f"{Path(file_path).stem}_converted.png" + + command = [self.imagemagick_binary, file_path, str(output_path)] + + try: + subprocess.run( + command, + check=True, + capture_output=True, + text=True, + timeout=timeout_seconds, + **_windows_hidden_subprocess_kwargs(), + ) + except subprocess.TimeoutExpired as exc: + raise DocumentConversionError( + f"ImageMagick conversion timed out for {os.path.basename(file_path)}" + ) from exc + except subprocess.CalledProcessError as exc: + stderr = (exc.stderr or "").strip() + stdout = (exc.stdout or "").strip() + details = stderr or stdout or str(exc) + raise DocumentConversionError( + f"ImageMagick conversion failed for {os.path.basename(file_path)}: {details}" + ) from exc + except Exception as exc: + raise DocumentConversionError( + f"ImageMagick conversion failed for {os.path.basename(file_path)}: {exc}" + ) from exc + + if not output_path.is_file(): + raise DocumentConversionError( + f"ImageMagick did not create a PNG for {os.path.basename(file_path)}" + ) + + return str(output_path) diff --git a/electron/servers/fastapi/services/documents_loader.py b/electron/servers/fastapi/services/documents_loader.py index f9bc6b23..6dbc3f5c 100644 --- a/electron/servers/fastapi/services/documents_loader.py +++ b/electron/servers/fastapi/services/documents_loader.py @@ -1,15 +1,21 @@ -import mimetypes -from fastapi import HTTPException -import os, asyncio +import asyncio +import os +import tempfile +from pathlib import Path from typing import List, Optional, Tuple import pdfplumber +from fastapi import HTTPException + from constants.documents import ( - IMAGE_MIME_TYPES, - PDF_MIME_TYPES, - POWERPOINT_TYPES, - TEXT_MIME_TYPES, - WORD_TYPES, + IMAGE_EXTENSIONS, + OFFICE_EXTENSIONS, + PDF_EXTENSIONS, + TEXT_EXTENSIONS, +) +from services.document_conversion_service import ( + DocumentConversionError, + DocumentConversionService, ) from services.liteparse_service import LiteParseError, LiteParseService from utils.ocr_language import presentation_language_to_ocr_code @@ -31,6 +37,7 @@ class DocumentsLoader: self._file_paths = file_paths self._ocr_language = presentation_language_to_ocr_code(presentation_language) self.liteparse_service = LiteParseService() + self.document_conversion_service = DocumentConversionService() self.document_service = DocumentService() if DocumentService is not None else None self._documents: List[str] = [] @@ -53,7 +60,7 @@ class DocumentsLoader: """If load_images is True, temp_dir must be provided""" documents: List[str] = [] - images: List[str] = [] + images: List[List[str]] = [] for file_path in self._file_paths: if not os.path.exists(file_path): @@ -64,19 +71,28 @@ class DocumentsLoader: document = "" imgs = [] - mime_type = mimetypes.guess_type(file_path)[0] - if mime_type in PDF_MIME_TYPES: + extension = Path(file_path).suffix.lower() + + if extension in PDF_EXTENSIONS: document, imgs = await self.load_pdf( file_path, load_text, load_images, temp_dir ) - elif mime_type in TEXT_MIME_TYPES: + elif extension in TEXT_EXTENSIONS: document = await self.load_text(file_path) - elif mime_type in POWERPOINT_TYPES: - document = self.load_powerpoint(file_path) - elif mime_type in WORD_TYPES: - document = self.load_msword(file_path) - elif mime_type in IMAGE_MIME_TYPES: - document = self.load_image(file_path) + elif extension in OFFICE_EXTENSIONS: + document = await asyncio.to_thread( + self.load_office_document, + file_path, + temp_dir, + ) + elif extension in IMAGE_EXTENSIONS: + document = await asyncio.to_thread( + self.load_image, + file_path, + temp_dir, + ) + else: + document = await asyncio.to_thread(self._parse_with_liteparse, file_path) documents.append(document) images.append(imgs) @@ -106,14 +122,35 @@ class DocumentsLoader: with open(file_path, "r", encoding="utf-8") as file: return await asyncio.to_thread(file.read) - def load_msword(self, file_path: str) -> str: - return self._parse_with_liteparse(file_path) + def load_office_document(self, file_path: str, temp_dir: Optional[str] = None) -> str: + if temp_dir: + converted_path = self.document_conversion_service.convert_office_to_pdf( + file_path, + temp_dir, + ) + return self._parse_with_liteparse(converted_path) - def load_powerpoint(self, file_path: str) -> str: - return self._parse_with_liteparse(file_path) + with tempfile.TemporaryDirectory(prefix="office-convert-") as conversion_dir: + converted_path = self.document_conversion_service.convert_office_to_pdf( + file_path, + conversion_dir, + ) + return self._parse_with_liteparse(converted_path) - def load_image(self, file_path: str) -> str: - return self._parse_with_liteparse(file_path) + def load_image(self, file_path: str, temp_dir: Optional[str] = None) -> str: + if temp_dir: + converted_path = self.document_conversion_service.convert_image_to_png( + file_path, + temp_dir, + ) + return self._parse_with_liteparse(converted_path) + + with tempfile.TemporaryDirectory(prefix="image-convert-") as conversion_dir: + converted_path = self.document_conversion_service.convert_image_to_png( + file_path, + conversion_dir, + ) + return self._parse_with_liteparse(converted_path) def _parse_with_liteparse(self, file_path: str) -> str: try: @@ -122,7 +159,7 @@ class DocumentsLoader: ocr_enabled=True, ocr_language=self._ocr_language, ) - except LiteParseError as exc: + except (LiteParseError, DocumentConversionError) as exc: if self.document_service is not None: try: return self.document_service.parse_to_markdown(file_path) diff --git a/electron/servers/fastapi/utils/validators.py b/electron/servers/fastapi/utils/validators.py index 6b405ccf..37e56209 100644 --- a/electron/servers/fastapi/utils/validators.py +++ b/electron/servers/fastapi/utils/validators.py @@ -1,9 +1,25 @@ +from pathlib import Path from typing import List from fastapi import HTTPException from fastapi import UploadFile +def _is_accepted_file_type(file: UploadFile, accepted_types: List[str]) -> bool: + accepted_mime_types = {t.lower() for t in accepted_types if not t.startswith(".")} + accepted_extensions = {t.lower() for t in accepted_types if t.startswith(".")} + + content_type = (file.content_type or "").strip().lower() + if content_type in accepted_mime_types: + return True + + extension = Path(file.filename or "").suffix.lower() + if extension in accepted_extensions: + return True + + return False + + def validate_files( field, nullable: bool, @@ -15,12 +31,14 @@ def validate_files( if field: files: List[UploadFile] = field if multiple else [field] for each_file in files: - if (max_size * 1024 * 1024) < each_file.size: + file_size = each_file.size or 0 + + if (max_size * 1024 * 1024) < file_size: raise HTTPException( 400, detail=f"File '{each_file.filename}' exceeded max upload size of {max_size} MB", ) - elif each_file.content_type not in accepted_types: + elif not _is_accepted_file_type(each_file, accepted_types): raise HTTPException( 400, detail=f"File '{each_file.filename}' not accepted. Accepted types: {accepted_types}", diff --git a/electron/servers/nextjs/app/(presentation-generator)/upload/components/SupportingDoc.tsx b/electron/servers/nextjs/app/(presentation-generator)/upload/components/SupportingDoc.tsx index dcf5b5c8..a9710c83 100644 --- a/electron/servers/nextjs/app/(presentation-generator)/upload/components/SupportingDoc.tsx +++ b/electron/servers/nextjs/app/(presentation-generator)/upload/components/SupportingDoc.tsx @@ -13,37 +13,50 @@ interface SupportingDocProps { const PDF_TYPES = ['.pdf'] const TEXT_TYPES = ['.txt'] -const POWERPOINT_TYPES = ['.pptx'] -const WORD_TYPES = ['.docx'] +const WORD_TYPES = ['.doc', '.docx', '.docm', '.odt', '.rtf'] +const POWERPOINT_TYPES = ['.ppt', '.pptx', '.pptm', '.odp'] +const SPREADSHEET_TYPES = ['.xls', '.xlsx', '.xlsm', '.ods', '.csv', '.tsv'] +const IMAGE_TYPES = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp', '.svg'] -const ACCEPT_DEFAULT = [ - 'application/pdf', - 'text/plain', - 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', - 'application/vnd.openxmlformats-officedocument.presentationml.presentation', - ...PDF_TYPES, - ...TEXT_TYPES, - ...POWERPOINT_TYPES, - ...WORD_TYPES, -].join(',') -const ALLOWED_MIME_PREFIXES: string[] = [] +const ALLOWED_MIME_PREFIXES: string[] = ['image/'] const ALLOWED_MIME_TYPES = [ 'application/pdf', - 'application/x-pdf', - 'application/acrobat', - 'applications/pdf', - 'text/pdf', - 'application/vnd.pdf', 'text/plain', + 'text/csv', + 'application/csv', + 'text/tab-separated-values', + 'text/tsv', + 'application/msword', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'application/vnd.ms-word.document.macroenabled.12', + 'application/vnd.oasis.opendocument.text', + 'application/rtf', + 'text/rtf', + 'application/vnd.ms-powerpoint', 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + 'application/vnd.ms-powerpoint.presentation.macroenabled.12', + 'application/vnd.oasis.opendocument.presentation', + 'application/vnd.ms-excel', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'application/vnd.ms-excel.sheet.macroenabled.12', + 'application/vnd.oasis.opendocument.spreadsheet', + 'image/jpeg', + 'image/png', + 'image/gif', + 'image/bmp', + 'image/tiff', + 'image/webp', + 'image/svg+xml', ] const ALLOWED_EXTENSIONS = [ ...PDF_TYPES, ...TEXT_TYPES, - ...POWERPOINT_TYPES, ...WORD_TYPES, + ...POWERPOINT_TYPES, + ...SPREADSHEET_TYPES, + ...IMAGE_TYPES, ] +const ACCEPT_DEFAULT = [...ALLOWED_MIME_TYPES, ...ALLOWED_EXTENSIONS].join(',') const SupportingDoc = ({ files, @@ -75,7 +88,7 @@ const SupportingDoc = ({ const disallowed = filesToReview.filter((file) => !isAllowedFile(file)) if (disallowed.length > 0) { toast.error('Some files are not supported', { - description: 'Only PDF, TXT, PPTX, and DOCX files are allowed.', + description: 'Supported: Word, PowerPoint, spreadsheets, PDF/TXT, and image files.', }) } } @@ -171,7 +184,7 @@ const SupportingDoc = ({

- Drag and drop PDF, TXT, PPTX, DOCX, or click to browse + Drag and drop Office docs, spreadsheets, images, PDF/TXT, or click to browse

@@ -214,7 +227,7 @@ const SupportingDoc = ({ {filteredFiles.length !== files.length && (

- Some files were skipped. Only PDF, TXT, PPTX, and DOCX files are supported. + Some files were skipped. Supported: Word, PowerPoint, spreadsheets, PDF/TXT, and image files.

)}
From 691d0f62e86239dc76c9002e000be73d9f724863 Mon Sep 17 00:00:00 2001 From: sudipnext Date: Mon, 30 Mar 2026 20:21:15 +0545 Subject: [PATCH 3/3] feat: enhance ImageMagick installation process and update documentation - Added functions to resolve Homebrew and Linux escalation commands for ImageMagick installation. - Improved error handling and logging for manual installation steps. - Updated download URLs for ImageMagick based on the platform. - Enhanced user interface messages to clarify installation steps for different operating systems. - Adjusted CPU worker count in document extraction for better performance. --- electron/.gitignore | 9 ++- electron/app/ipc/setup_install_handlers.ts | 77 ++++++++++++++++--- electron/app/utils/imagemagick-check.ts | 28 ++++++- .../document-extraction/liteparse_runner.mjs | 2 +- .../resources/ui/setup-installer/index.html | 15 +++- 5 files changed, 115 insertions(+), 16 deletions(-) diff --git a/electron/.gitignore b/electron/.gitignore index bd040a77..588efb1a 100644 --- a/electron/.gitignore +++ b/electron/.gitignore @@ -21,6 +21,13 @@ app_dist resources/fastapi resources/nextjs dist +eng.traineddata +servers/fastapi/build/ +servers/fastapi/dist/ servers/fastapi/fastembed_cache/ electron/.cache/ -electron/.cache/export-runtime/ \ No newline at end of file +electron/.cache/export-runtime/ +*.pkg +*.toc +*.zip +*.pyc \ No newline at end of file diff --git a/electron/app/ipc/setup_install_handlers.ts b/electron/app/ipc/setup_install_handlers.ts index 29ddb454..3ffa345b 100644 --- a/electron/app/ipc/setup_install_handlers.ts +++ b/electron/app/ipc/setup_install_handlers.ts @@ -20,6 +20,7 @@ import { import { getSetupStatus } from "../utils/setup-dependencies"; import { getImageMagickDownloadUrl, + getImageMagickManualInstallCommands, isImageMagickInstalled, } from "../utils/imagemagick-check"; @@ -72,6 +73,33 @@ function commandExists(command: string, versionArgs: string[] = ["--version"]): return result.status === 0; } +function resolveBrewCommand(): string | null { + if (commandExists("brew")) { + return "brew"; + } + + const candidates = ["/opt/homebrew/bin/brew", "/usr/local/bin/brew"]; + for (const candidate of candidates) { + if (fs.existsSync(candidate)) { + return candidate; + } + } + return null; +} + +function resolveLinuxEscalationCommand(): string | null { + if (commandExists("pkexec", ["--version"])) return "pkexec"; + if (commandExists("sudo", ["-V"])) return "sudo"; + return null; +} + +function logManualImageMagickCommands(wc: WebContents) { + for (const line of getImageMagickManualInstallCommands()) { + const level = line.endsWith(":") ? "info" : "cmd"; + sendImageMagickLog(wc, level, line); + } +} + function runInstallCommand( wc: WebContents, command: string, @@ -210,7 +238,18 @@ export function setupSetupInstallHandlers() { if (process.platform === "linux") { if (commandExists("apt-get")) { - await runInstallCommand(wc, "pkexec", [ + const escalator = resolveLinuxEscalationCommand(); + if (!escalator) { + throw new Error( + "Neither pkexec nor sudo is available to run apt-get install." + ); + } + + await runInstallCommand(wc, escalator, [ + "apt-get", + "update", + ]); + await runInstallCommand(wc, escalator, [ "apt-get", "install", "-y", @@ -218,17 +257,30 @@ export function setupSetupInstallHandlers() { ]); } else { throw new Error( - "apt-get is unavailable. Install ImageMagick manually from the official download page." + "apt-get is unavailable. Install ImageMagick manually using your package manager." ); } } else if (process.platform === "darwin") { - if (commandExists("brew")) { - await runInstallCommand(wc, "brew", ["install", "imagemagick"]); - } else { + let brewCommand = resolveBrewCommand(); + if (!brewCommand) { + sendImageMagickLog( + wc, + "info", + "Homebrew not found. Installing Homebrew first..." + ); + const installHomebrewCommand = + 'NONINTERACTIVE=1 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'; + await runInstallCommand(wc, "/bin/bash", ["-c", installHomebrewCommand]); + brewCommand = resolveBrewCommand(); + } + + if (!brewCommand) { throw new Error( - "Homebrew is not installed. Install ImageMagick manually from the official download page." + "Homebrew installation completed, but brew was not found on PATH." ); } + + await runInstallCommand(wc, brewCommand, ["install", "imagemagick"]); } else if (process.platform === "win32") { if (commandExists("choco", ["-v"])) { await runInstallCommand(wc, "choco", [ @@ -238,7 +290,7 @@ export function setupSetupInstallHandlers() { ]); } else { throw new Error( - "Chocolatey is not installed. Install ImageMagick manually from the official download page." + "Chocolatey is not installed. Falling back to direct installer download." ); } } else { @@ -253,14 +305,21 @@ export function setupSetupInstallHandlers() { const message = error instanceof Error ? error.message : "ImageMagick install failed"; sendImageMagickLog(wc, "error", message); + logManualImageMagickCommands(wc); const downloadUrl = getImageMagickDownloadUrl(); sendImageMagickLog( wc, "info", - `Falling back to manual install page: ${downloadUrl}` + `Opening manual install link: ${downloadUrl}` ); await shell.openExternal(downloadUrl); - return { ok: true }; + sendImageMagickProgress( + wc, + "error", + undefined, + "Finish manual installation, then click Retry." + ); + return { ok: false, error: message }; } } ); diff --git a/electron/app/utils/imagemagick-check.ts b/electron/app/utils/imagemagick-check.ts index 6495c7f8..38d01be5 100644 --- a/electron/app/utils/imagemagick-check.ts +++ b/electron/app/utils/imagemagick-check.ts @@ -18,10 +18,34 @@ export function isImageMagickInstalled(): boolean { export function getImageMagickDownloadUrl(): string { if (process.platform === "win32") { - return "https://imagemagick.org/script/download.php#windows"; + return "https://imagemagick.org/archive/binaries/ImageMagick-7.1.2-18-Q16-HDRI-x64-dll.exe"; } if (process.platform === "darwin") { - return "https://imagemagick.org/script/download.php#macosx"; + return "https://brew.sh/"; } return "https://imagemagick.org/script/download.php#linux"; } + +export function getImageMagickManualInstallCommands(): string[] { + if (process.platform === "win32") { + return [ + "Download and run the installer:", + getImageMagickDownloadUrl(), + ]; + } + + if (process.platform === "darwin") { + return [ + "Install Homebrew:", + '/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"', + "Install ImageMagick:", + "brew install imagemagick", + ]; + } + + return [ + "Install ImageMagick:", + "sudo apt-get update", + "sudo apt-get install -y imagemagick", + ]; +} diff --git a/electron/resources/document-extraction/liteparse_runner.mjs b/electron/resources/document-extraction/liteparse_runner.mjs index 6793610a..d7b68195 100644 --- a/electron/resources/document-extraction/liteparse_runner.mjs +++ b/electron/resources/document-extraction/liteparse_runner.mjs @@ -70,7 +70,7 @@ const ocrEnabled = parseBool(readArg("--ocr-enabled"), true); const dpi = toNumber(readArg("--dpi"), 150, 72, 600); const numWorkers = toNumber( readArg("--num-workers"), - Math.max(os.cpus().length - 4, 1), + Math.max(os.cpus().length - 2, 1), 1, 64 ); diff --git a/electron/resources/ui/setup-installer/index.html b/electron/resources/ui/setup-installer/index.html index 5ff1397b..2d871226 100644 --- a/electron/resources/ui/setup-installer/index.html +++ b/electron/resources/ui/setup-installer/index.html @@ -288,7 +288,7 @@ ? 'Presenton uses LibreOffice to generate custom templates from PPTX files.' : step === 'chrome' ? 'Presenton uses Chromium for export and slide rendering. Download it now (~150 MB).' - : 'Presenton uses ImageMagick for OCR/document conversion support. We will try automatic installation first, then open the download page if package manager tools are unavailable.'; + : 'Presenton uses ImageMagick for OCR/document conversion support. Linux uses apt, macOS installs Homebrew first (if needed) and then runs brew install imagemagick, and Windows uses Chocolatey with a direct installer fallback.'; document.getElementById('btn-install').onclick = () => startInstall(step); document.getElementById('btn-skip').onclick = () => handleSkip(); showState('prompt'); @@ -315,8 +315,17 @@ }); } else { document.getElementById('dl-heading').textContent = 'Installing ImageMagick'; - document.getElementById('dl-phase').textContent = 'Automatic install (apt/brew/choco) with fallback to manual download'; - window.setupInstaller.installImageMagick().then(() => { + document.getElementById('dl-phase').textContent = 'Linux: apt-get | macOS: Homebrew + brew install | Windows: choco or direct installer'; + window.setupInstaller.installImageMagick().then((installResult) => { + if (!installResult || !installResult.ok) { + if (currentStep !== 'imagemagick') return; + document.getElementById('err-msg').textContent = installResult?.error || 'ImageMagick installation needs manual completion. Follow the shown commands and then click Retry.'; + showState('error'); + document.getElementById('btn-retry').onclick = () => startInstall('imagemagick'); + document.getElementById('btn-skip-error').onclick = () => nextOrDone(); + return; + } + window.setupInstaller.checkImageMagick().then(res => { if (!res.ok && currentStep === 'imagemagick') { document.getElementById('err-msg').textContent = res.error || 'ImageMagick is not installed yet.';