presenton/electron/servers/fastapi/services/docling_service.py
2026-02-20 12:02:23 +05:45

38 lines
1.4 KiB
Python

from docling.document_converter import (
DocumentConverter,
PdfFormatOption,
PowerpointFormatOption,
WordFormatOption,
)
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.datamodel.base_models import InputFormat
from utils.path_helpers import patch_python_docx_templates
class DoclingService:
def __init__(self):
# Patch python-docx template path resolution before initializing converter
# This is safe to call in any environment (Docker, development, PyInstaller)
patch_python_docx_templates()
self.pipeline_options = PdfPipelineOptions()
self.pipeline_options.do_ocr = False
self.converter = DocumentConverter(
allowed_formats=[InputFormat.PPTX, InputFormat.PDF, InputFormat.DOCX],
format_options={
InputFormat.DOCX: WordFormatOption(
pipeline_options=self.pipeline_options,
),
InputFormat.PPTX: PowerpointFormatOption(
pipeline_options=self.pipeline_options,
),
InputFormat.PDF: PdfFormatOption(
pipeline_options=self.pipeline_options,
),
},
)
def parse_to_markdown(self, file_path: str) -> str:
result = self.converter.convert(file_path)
return result.document.export_to_markdown()