@@ -225,7 +225,7 @@ class PyPDFLoader(BasePDFLoader):
225225
226226 def __init__ (
227227 self ,
228- file_path : Union [ str , PurePath ] ,
228+ file_path : str ,
229229 password : Optional [Union [str , bytes ]] = None ,
230230 headers : Optional [dict ] = None ,
231231 extract_images : bool = False ,
@@ -264,7 +264,7 @@ class PyPDFium2Loader(BasePDFLoader):
264264
265265 def __init__ (
266266 self ,
267- file_path : Union [ str , PurePath ] ,
267+ file_path : str ,
268268 * ,
269269 headers : Optional [dict ] = None ,
270270 extract_images : bool = False ,
@@ -336,7 +336,7 @@ class PDFMinerLoader(BasePDFLoader):
336336
337337 def __init__ (
338338 self ,
339- file_path : Union [ str , PurePath ] ,
339+ file_path : str ,
340340 * ,
341341 headers : Optional [dict ] = None ,
342342 extract_images : bool = False ,
@@ -376,9 +376,7 @@ def lazy_load(
376376class PDFMinerPDFasHTMLLoader (BasePDFLoader ):
377377 """Load `PDF` files as HTML content using `PDFMiner`."""
378378
379- def __init__ (
380- self , file_path : Union [str , PurePath ], * , headers : Optional [dict ] = None
381- ):
379+ def __init__ (self , file_path : str , * , headers : Optional [dict ] = None ):
382380 """Initialize with a file path."""
383381 try :
384382 from pdfminer .high_level import extract_text_to_fp # noqa:F401
@@ -406,7 +404,7 @@ def lazy_load(self) -> Iterator[Document]:
406404 output_type = "html" ,
407405 )
408406 metadata = {
409- "source" : str ( self .file_path ) if self .web_path is None else self .web_path
407+ "source" : self .file_path if self .web_path is None else self .web_path
410408 }
411409 yield Document (page_content = output_string .getvalue (), metadata = metadata )
412410
@@ -416,7 +414,7 @@ class PyMuPDFLoader(BasePDFLoader):
416414
417415 def __init__ (
418416 self ,
419- file_path : Union [ str , PurePath ] ,
417+ file_path : str ,
420418 * ,
421419 headers : Optional [dict ] = None ,
422420 extract_images : bool = False ,
@@ -613,7 +611,7 @@ class PDFPlumberLoader(BasePDFLoader):
613611
614612 def __init__ (
615613 self ,
616- file_path : Union [ str , PurePath ] ,
614+ file_path : str ,
617615 text_kwargs : Optional [Mapping [str , Any ]] = None ,
618616 dedupe : bool = False ,
619617 headers : Optional [dict ] = None ,
@@ -892,7 +890,7 @@ def _make_config(self) -> dict:
892890 from dedoc .utils .langchain import make_manager_pdf_config
893891
894892 return make_manager_pdf_config (
895- file_path = str ( self .file_path ) ,
893+ file_path = self .file_path ,
896894 parsing_params = self .parsing_parameters ,
897895 split = self .split ,
898896 )
@@ -903,7 +901,7 @@ class DocumentIntelligenceLoader(BasePDFLoader):
903901
904902 def __init__ (
905903 self ,
906- file_path : Union [ str , PurePath ] ,
904+ file_path : str ,
907905 client : Any ,
908906 model : str = "prebuilt-document" ,
909907 headers : Optional [dict ] = None ,
@@ -1010,7 +1008,7 @@ def lazy_load(self) -> Iterator[Document]:
10101008
10111009 # Directly call asyncio.run to execute zerox synchronously
10121010 zerox_output = asyncio .run (
1013- zerox (file_path = str ( self .file_path ) , model = self .model , ** self .zerox_kwargs )
1011+ zerox (file_path = self .file_path , model = self .model , ** self .zerox_kwargs )
10141012 )
10151013
10161014 # Convert zerox output to Document instances and yield them
0 commit comments