@@ -225,7 +225,7 @@ class PyPDFLoader(BasePDFLoader):
225225
226226 def __init__ (
227227 self ,
228- file_path : str ,
228+ file_path : Union [ str , PurePath ] ,
229229 password : Optional [Union [str , bytes ]] = None ,
230230 headers : Optional [dict ] = None ,
231231 extract_images : bool = False ,
@@ -264,7 +264,7 @@ class PyPDFium2Loader(BasePDFLoader):
264264
265265 def __init__ (
266266 self ,
267- file_path : str ,
267+ file_path : Union [ str , PurePath ] ,
268268 * ,
269269 headers : Optional [dict ] = None ,
270270 extract_images : bool = False ,
@@ -376,10 +376,9 @@ def lazy_load(
376376class PDFMinerPDFasHTMLLoader (BasePDFLoader ):
377377 """Load `PDF` files as HTML content using `PDFMiner`."""
378378
379- def __init__ (self ,
380- file_path : Union [str , PurePath ],
381- * ,
382- headers : Optional [dict ] = None ):
379+ def __init__ (
380+ self , file_path : Union [str , PurePath ], * , headers : Optional [dict ] = None
381+ ):
383382 """Initialize with a file path."""
384383 try :
385384 from pdfminer .high_level import extract_text_to_fp # noqa:F401
@@ -407,7 +406,7 @@ def lazy_load(self) -> Iterator[Document]:
407406 output_type = "html" ,
408407 )
409408 metadata = {
410- "source" : self .file_path if self .web_path is None else self .web_path
409+ "source" : str ( self .file_path ) if self .web_path is None else self .web_path
411410 }
412411 yield Document (page_content = output_string .getvalue (), metadata = metadata )
413412
0 commit comments