diff --git a/servers/fastapi/image_processor/utils.py b/servers/fastapi/image_processor/utils.py index d7f4aa69..31827ca4 100644 --- a/servers/fastapi/image_processor/utils.py +++ b/servers/fastapi/image_processor/utils.py @@ -1,11 +1,16 @@ import asyncio +import os from api.services.instances import temp_file_service -from pdf2image import convert_from_path +import pdfplumber def get_page_images_from_pdf(document_path: str, temp_dir: str): images_temp_dir = temp_file_service.create_dir_in_dir(temp_dir) - return convert_from_path(document_path, output_folder=images_temp_dir) + + with pdfplumber.open(document_path) as pdf: + for page in pdf.pages: + img = page.to_image() + img.save(os.path.join(images_temp_dir, f"page_{page.page_number}.png")) async def get_page_images_from_pdf_async(document_path: str, temp_dir: str): diff --git a/servers/fastapi/poetry.lock b/servers/fastapi/poetry.lock index 08e0680e..2262ff24 100644 --- a/servers/fastapi/poetry.lock +++ b/servers/fastapi/poetry.lock @@ -2132,20 +2132,6 @@ files = [ {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] -[[package]] -name = "pdf2image" -version = "1.17.0" -description = "A wrapper around the pdftoppm and pdftocairo command line tools to convert PDF to a PIL Image list." -optional = false -python-versions = "*" -files = [ - {file = "pdf2image-1.17.0-py3-none-any.whl", hash = "sha256:ecdd58d7afb810dffe21ef2b1bbc057ef434dabbac6c33778a38a3f7744a27e2"}, - {file = "pdf2image-1.17.0.tar.gz", hash = "sha256:eaa959bc116b420dd7ec415fcae49b98100dda3dd18cd2fdfa86d09f112f6d57"}, -] - -[package.dependencies] -pillow = "*" - [[package]] name = "pdfminer-six" version = "20250327" @@ -4143,4 +4129,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "233cf6c5d7a077c578d0218a26b626f40cf0b8557c773aa19b544938756b0d33" +content-hash = "8fdb0f3b57c3821005dd2fec21a5c41b67d415b64790f5ddd7ea13b1df77b593" diff --git a/servers/fastapi/pyproject.toml b/servers/fastapi/pyproject.toml index d7dffdfe..fbffb4c5 100644 --- a/servers/fastapi/pyproject.toml +++ b/servers/fastapi/pyproject.toml @@ -16,7 +16,6 @@ python-docx = "^1.1.2" langchain-openai = "^0.3.16" langchain-google-genai = "^2.1.4" langchain-community = "^0.3.23" -pdf2image = "^1.17.0" duckduckgo-search = "^8.0.1" torch = { version = "^2.7.0+cpu", source = "pytorch-cpu" } langchain-huggingface = "^0.2.0"