Removes: pdf2image as python dependency
This commit is contained in:
parent
2a4d4ce28a
commit
7cd0db2716
3 changed files with 8 additions and 18 deletions
|
|
@ -1,11 +1,16 @@
|
|||
import asyncio
|
||||
import os
|
||||
from api.services.instances import temp_file_service
|
||||
from pdf2image import convert_from_path
|
||||
import pdfplumber
|
||||
|
||||
|
||||
def get_page_images_from_pdf(document_path: str, temp_dir: str):
|
||||
images_temp_dir = temp_file_service.create_dir_in_dir(temp_dir)
|
||||
return convert_from_path(document_path, output_folder=images_temp_dir)
|
||||
|
||||
with pdfplumber.open(document_path) as pdf:
|
||||
for page in pdf.pages:
|
||||
img = page.to_image()
|
||||
img.save(os.path.join(images_temp_dir, f"page_{page.page_number}.png"))
|
||||
|
||||
|
||||
async def get_page_images_from_pdf_async(document_path: str, temp_dir: str):
|
||||
|
|
|
|||
16
servers/fastapi/poetry.lock
generated
16
servers/fastapi/poetry.lock
generated
|
|
@ -2132,20 +2132,6 @@ files = [
|
|||
{file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pdf2image"
|
||||
version = "1.17.0"
|
||||
description = "A wrapper around the pdftoppm and pdftocairo command line tools to convert PDF to a PIL Image list."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "pdf2image-1.17.0-py3-none-any.whl", hash = "sha256:ecdd58d7afb810dffe21ef2b1bbc057ef434dabbac6c33778a38a3f7744a27e2"},
|
||||
{file = "pdf2image-1.17.0.tar.gz", hash = "sha256:eaa959bc116b420dd7ec415fcae49b98100dda3dd18cd2fdfa86d09f112f6d57"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
pillow = "*"
|
||||
|
||||
[[package]]
|
||||
name = "pdfminer-six"
|
||||
version = "20250327"
|
||||
|
|
@ -4143,4 +4129,4 @@ cffi = ["cffi (>=1.11)"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "233cf6c5d7a077c578d0218a26b626f40cf0b8557c773aa19b544938756b0d33"
|
||||
content-hash = "8fdb0f3b57c3821005dd2fec21a5c41b67d415b64790f5ddd7ea13b1df77b593"
|
||||
|
|
|
|||
|
|
@ -16,7 +16,6 @@ python-docx = "^1.1.2"
|
|||
langchain-openai = "^0.3.16"
|
||||
langchain-google-genai = "^2.1.4"
|
||||
langchain-community = "^0.3.23"
|
||||
pdf2image = "^1.17.0"
|
||||
duckduckgo-search = "^8.0.1"
|
||||
torch = { version = "^2.7.0+cpu", source = "pytorch-cpu" }
|
||||
langchain-huggingface = "^0.2.0"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue