Removes: pdf2image as python dependency

This commit is contained in:
sauravniraula 2025-05-10 23:38:46 +05:45
parent 2a4d4ce28a
commit 7cd0db2716
No known key found for this signature in database
GPG key ID: 60FCC1B5A5E83326
3 changed files with 8 additions and 18 deletions

View file

@ -1,11 +1,16 @@
import asyncio
import os
from api.services.instances import temp_file_service
from pdf2image import convert_from_path
import pdfplumber
def get_page_images_from_pdf(document_path: str, temp_dir: str):
images_temp_dir = temp_file_service.create_dir_in_dir(temp_dir)
return convert_from_path(document_path, output_folder=images_temp_dir)
with pdfplumber.open(document_path) as pdf:
for page in pdf.pages:
img = page.to_image()
img.save(os.path.join(images_temp_dir, f"page_{page.page_number}.png"))
async def get_page_images_from_pdf_async(document_path: str, temp_dir: str):

View file

@ -2132,20 +2132,6 @@ files = [
{file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
]
[[package]]
name = "pdf2image"
version = "1.17.0"
description = "A wrapper around the pdftoppm and pdftocairo command line tools to convert PDF to a PIL Image list."
optional = false
python-versions = "*"
files = [
{file = "pdf2image-1.17.0-py3-none-any.whl", hash = "sha256:ecdd58d7afb810dffe21ef2b1bbc057ef434dabbac6c33778a38a3f7744a27e2"},
{file = "pdf2image-1.17.0.tar.gz", hash = "sha256:eaa959bc116b420dd7ec415fcae49b98100dda3dd18cd2fdfa86d09f112f6d57"},
]
[package.dependencies]
pillow = "*"
[[package]]
name = "pdfminer-six"
version = "20250327"
@ -4143,4 +4129,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "233cf6c5d7a077c578d0218a26b626f40cf0b8557c773aa19b544938756b0d33"
content-hash = "8fdb0f3b57c3821005dd2fec21a5c41b67d415b64790f5ddd7ea13b1df77b593"

View file

@ -16,7 +16,6 @@ python-docx = "^1.1.2"
langchain-openai = "^0.3.16"
langchain-google-genai = "^2.1.4"
langchain-community = "^0.3.23"
pdf2image = "^1.17.0"
duckduckgo-search = "^8.0.1"
torch = { version = "^2.7.0+cpu", source = "pytorch-cpu" }
langchain-huggingface = "^0.2.0"