@@ -144,6 +144,7 @@ def partition_pdf(
144144 starting_page_number : int = 1 ,
145145 extract_forms : bool = False ,
146146 form_extraction_skip_tables : bool = True ,
147+ password : Optional [str ] = None ,
147148 ** kwargs : Any ,
148149) -> list [Element ]:
149150 """Parses a pdf document into a list of interpreted elements.
@@ -224,6 +225,7 @@ def partition_pdf(
224225 starting_page_number = starting_page_number ,
225226 extract_forms = extract_forms ,
226227 form_extraction_skip_tables = form_extraction_skip_tables ,
228+ password = password ,
227229 ** kwargs ,
228230 )
229231
@@ -245,6 +247,7 @@ def partition_pdf_or_image(
245247 starting_page_number : int = 1 ,
246248 extract_forms : bool = False ,
247249 form_extraction_skip_tables : bool = True ,
250+ password : Optional [str ] = None ,
248251 ** kwargs : Any ,
249252) -> list [Element ]:
250253 """Parses a pdf or image document into a list of interpreted elements."""
@@ -273,6 +276,7 @@ def partition_pdf_or_image(
273276 languages = languages ,
274277 metadata_last_modified = metadata_last_modified or last_modified ,
275278 starting_page_number = starting_page_number ,
279+ password = password ,
276280 ** kwargs ,
277281 )
278282 pdf_text_extractable = any (
@@ -322,6 +326,7 @@ def partition_pdf_or_image(
322326 starting_page_number = starting_page_number ,
323327 extract_forms = extract_forms ,
324328 form_extraction_skip_tables = form_extraction_skip_tables ,
329+ password = password ,
325330 ** kwargs ,
326331 )
327332 out_elements = _process_uncategorized_text_elements (elements )
@@ -347,6 +352,7 @@ def partition_pdf_or_image(
347352 is_image = is_image ,
348353 metadata_last_modified = metadata_last_modified or last_modified ,
349354 starting_page_number = starting_page_number ,
355+ password = password ,
350356 ** kwargs ,
351357 )
352358 out_elements = _process_uncategorized_text_elements (elements )
@@ -360,6 +366,7 @@ def extractable_elements(
360366 languages : Optional [list [str ]] = None ,
361367 metadata_last_modified : Optional [str ] = None ,
362368 starting_page_number : int = 1 ,
369+ password : Optional [str ] = None ,
363370 ** kwargs : Any ,
364371) -> list [list [Element ]]:
365372 if isinstance (file , bytes ):
@@ -370,6 +377,7 @@ def extractable_elements(
370377 languages = languages ,
371378 metadata_last_modified = metadata_last_modified ,
372379 starting_page_number = starting_page_number ,
380+ password = password ,
373381 ** kwargs ,
374382 )
375383
@@ -380,6 +388,7 @@ def _partition_pdf_with_pdfminer(
380388 languages : list [str ],
381389 metadata_last_modified : Optional [str ],
382390 starting_page_number : int = 1 ,
391+ password : Optional [str ] = None ,
383392 ** kwargs : Any ,
384393) -> list [list [Element ]]:
385394 """Partitions a PDF using PDFMiner instead of using a layoutmodel. Used for faster
@@ -403,6 +412,7 @@ def _partition_pdf_with_pdfminer(
403412 languages = languages ,
404413 metadata_last_modified = metadata_last_modified ,
405414 starting_page_number = starting_page_number ,
415+ password = password ,
406416 ** kwargs ,
407417 )
408418
@@ -413,6 +423,7 @@ def _partition_pdf_with_pdfminer(
413423 languages = languages ,
414424 metadata_last_modified = metadata_last_modified ,
415425 starting_page_number = starting_page_number ,
426+ password = password ,
416427 ** kwargs ,
417428 )
418429
@@ -427,14 +438,16 @@ def _process_pdfminer_pages(
427438 metadata_last_modified : Optional [str ],
428439 annotation_threshold : Optional [float ] = env_config .PDF_ANNOTATION_THRESHOLD ,
429440 starting_page_number : int = 1 ,
441+ password : Optional [str ] = None ,
430442 ** kwargs ,
431443) -> list [list [Element ]]:
432444 """Uses PDFMiner to split a document into pages and process them."""
433445
434446 elements = []
435447
436448 for page_number , (page , page_layout ) in enumerate (
437- open_pdfminer_pages_generator (fp ), start = starting_page_number
449+ open_pdfminer_pages_generator (fp , password = password ),
450+ start = starting_page_number ,
438451 ):
439452 width , height = page_layout .width , page_layout .height
440453
@@ -556,6 +569,7 @@ def _partition_pdf_or_image_local(
556569 extract_forms : bool = False ,
557570 form_extraction_skip_tables : bool = True ,
558571 pdf_hi_res_max_pages : Optional [int ] = None ,
572+ password : Optional [str ] = None ,
559573 ** kwargs : Any ,
560574) -> list [Element ]:
561575 """Partition using package installed locally"""
@@ -592,10 +606,11 @@ def _partition_pdf_or_image_local(
592606 is_image = is_image ,
593607 model_name = hi_res_model_name ,
594608 pdf_image_dpi = pdf_image_dpi ,
609+ password = password ,
595610 )
596611
597612 extracted_layout , layouts_links = (
598- process_file_with_pdfminer (filename = filename , dpi = pdf_image_dpi )
613+ process_file_with_pdfminer (filename = filename , dpi = pdf_image_dpi , password = password )
599614 if pdf_text_extractable
600615 else ([], [])
601616 )
@@ -635,20 +650,22 @@ def _partition_pdf_or_image_local(
635650 ocr_mode = ocr_mode ,
636651 pdf_image_dpi = pdf_image_dpi ,
637652 ocr_layout_dumper = ocr_layout_dumper ,
653+ password = password ,
638654 )
639655 else :
640656 inferred_document_layout = process_data_with_model (
641657 file ,
642658 is_image = is_image ,
643659 model_name = hi_res_model_name ,
644660 pdf_image_dpi = pdf_image_dpi ,
661+ password = password ,
645662 )
646663
647664 if hasattr (file , "seek" ):
648665 file .seek (0 )
649666
650667 extracted_layout , layouts_links = (
651- process_data_with_pdfminer (file = file , dpi = pdf_image_dpi )
668+ process_data_with_pdfminer (file = file , dpi = pdf_image_dpi , password = password )
652669 if pdf_text_extractable
653670 else ([], [])
654671 )
@@ -690,6 +707,7 @@ def _partition_pdf_or_image_local(
690707 ocr_mode = ocr_mode ,
691708 pdf_image_dpi = pdf_image_dpi ,
692709 ocr_layout_dumper = ocr_layout_dumper ,
710+ password = password ,
693711 )
694712
695713 # vectorization of the data structure ends here
@@ -837,6 +855,7 @@ def _partition_pdf_or_image_with_ocr(
837855 is_image : bool = False ,
838856 metadata_last_modified : Optional [str ] = None ,
839857 starting_page_number : int = 1 ,
858+ password : Optional [str ] = None ,
840859 ** kwargs : Any ,
841860):
842861 """Partitions an image or PDF using OCR. For PDFs, each page is converted
@@ -861,7 +880,7 @@ def _partition_pdf_or_image_with_ocr(
861880 elements .extend (page_elements )
862881 else :
863882 for page_number , image in enumerate (
864- convert_pdf_to_images (filename , file ), start = starting_page_number
883+ convert_pdf_to_images (filename , file , password = password ), start = starting_page_number
865884 ):
866885 page_elements = _partition_pdf_or_image_with_ocr_from_image (
867886 image = image ,
0 commit comments