Skip to content

Commit 7a5b5c5

Browse files
committed
Fix Line too long
1 parent 668dc9c commit 7a5b5c5

File tree

1 file changed

+10
-7
lines changed
  • libs/community/langchain_community/document_loaders

1 file changed

+10
-7
lines changed

libs/community/langchain_community/document_loaders/pdf.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ class PDFMinerLoader(BasePDFLoader):
336336

337337
def __init__(
338338
self,
339-
file_path: str,
339+
file_path: Union[str, PurePath],
340340
*,
341341
headers: Optional[dict] = None,
342342
extract_images: bool = False,
@@ -376,7 +376,10 @@ def lazy_load(
376376
class PDFMinerPDFasHTMLLoader(BasePDFLoader):
377377
"""Load `PDF` files as HTML content using `PDFMiner`."""
378378

379-
def __init__(self, file_path: str, *, headers: Optional[dict] = None):
379+
def __init__(self,
380+
file_path: Union[str, PurePath],
381+
*,
382+
headers: Optional[dict] = None):
380383
"""Initialize with a file path."""
381384
try:
382385
from pdfminer.high_level import extract_text_to_fp # noqa:F401
@@ -414,7 +417,7 @@ class PyMuPDFLoader(BasePDFLoader):
414417

415418
def __init__(
416419
self,
417-
file_path: str,
420+
file_path: Union[str, PurePath],
418421
*,
419422
headers: Optional[dict] = None,
420423
extract_images: bool = False,
@@ -611,7 +614,7 @@ class PDFPlumberLoader(BasePDFLoader):
611614

612615
def __init__(
613616
self,
614-
file_path: str,
617+
file_path: Union[str, PurePath],
615618
text_kwargs: Optional[Mapping[str, Any]] = None,
616619
dedupe: bool = False,
617620
headers: Optional[dict] = None,
@@ -890,7 +893,7 @@ def _make_config(self) -> dict:
890893
from dedoc.utils.langchain import make_manager_pdf_config
891894

892895
return make_manager_pdf_config(
893-
file_path=self.file_path,
896+
file_path=str(self.file_path),
894897
parsing_params=self.parsing_parameters,
895898
split=self.split,
896899
)
@@ -901,7 +904,7 @@ class DocumentIntelligenceLoader(BasePDFLoader):
901904

902905
def __init__(
903906
self,
904-
file_path: str,
907+
file_path: Union[str, PurePath],
905908
client: Any,
906909
model: str = "prebuilt-document",
907910
headers: Optional[dict] = None,
@@ -1008,7 +1011,7 @@ def lazy_load(self) -> Iterator[Document]:
10081011

10091012
# Directly call asyncio.run to execute zerox synchronously
10101013
zerox_output = asyncio.run(
1011-
zerox(file_path=self.file_path, model=self.model, **self.zerox_kwargs)
1014+
zerox(file_path=str(self.file_path), model=self.model, **self.zerox_kwargs)
10121015
)
10131016

10141017
# Convert zerox output to Document instances and yield them

0 commit comments

Comments
 (0)