@@ -133,6 +133,7 @@ def partition_pdf(
133133 starting_page_number : int = 1 ,
134134 extract_forms : bool = False ,
135135 form_extraction_skip_tables : bool = True ,
136+ password : Optional [str ] = None ,
136137 ** kwargs : Any ,
137138) -> list [Element ]:
138139 """Parses a pdf document into a list of interpreted elements.
@@ -213,6 +214,7 @@ def partition_pdf(
213214 starting_page_number = starting_page_number ,
214215 extract_forms = extract_forms ,
215216 form_extraction_skip_tables = form_extraction_skip_tables ,
217+ password = password ,
216218 ** kwargs ,
217219 )
218220
@@ -234,6 +236,7 @@ def partition_pdf_or_image(
234236 starting_page_number : int = 1 ,
235237 extract_forms : bool = False ,
236238 form_extraction_skip_tables : bool = True ,
239+ password : Optional [str ] = None ,
237240 ** kwargs : Any ,
238241) -> list [Element ]:
239242 """Parses a pdf or image document into a list of interpreted elements."""
@@ -262,6 +265,7 @@ def partition_pdf_or_image(
262265 languages = languages ,
263266 metadata_last_modified = metadata_last_modified or last_modified ,
264267 starting_page_number = starting_page_number ,
268+ password = password ,
265269 ** kwargs ,
266270 )
267271 pdf_text_extractable = any (
@@ -311,6 +315,7 @@ def partition_pdf_or_image(
311315 starting_page_number = starting_page_number ,
312316 extract_forms = extract_forms ,
313317 form_extraction_skip_tables = form_extraction_skip_tables ,
318+ password = password ,
314319 ** kwargs ,
315320 )
316321 out_elements = _process_uncategorized_text_elements (elements )
@@ -336,6 +341,7 @@ def partition_pdf_or_image(
336341 is_image = is_image ,
337342 metadata_last_modified = metadata_last_modified or last_modified ,
338343 starting_page_number = starting_page_number ,
344+ password = password ,
339345 ** kwargs ,
340346 )
341347 out_elements = _process_uncategorized_text_elements (elements )
@@ -349,6 +355,7 @@ def extractable_elements(
349355 languages : Optional [list [str ]] = None ,
350356 metadata_last_modified : Optional [str ] = None ,
351357 starting_page_number : int = 1 ,
358+ password :Optional [str ] = None ,
352359 ** kwargs : Any ,
353360) -> list [list [Element ]]:
354361 if isinstance (file , bytes ):
@@ -359,6 +366,7 @@ def extractable_elements(
359366 languages = languages ,
360367 metadata_last_modified = metadata_last_modified ,
361368 starting_page_number = starting_page_number ,
369+ password = password ,
362370 ** kwargs ,
363371 )
364372
@@ -369,6 +377,7 @@ def _partition_pdf_with_pdfminer(
369377 languages : list [str ],
370378 metadata_last_modified : Optional [str ],
371379 starting_page_number : int = 1 ,
380+ password :Optional [str ] = None ,
372381 ** kwargs : Any ,
373382) -> list [list [Element ]]:
374383 """Partitions a PDF using PDFMiner instead of using a layoutmodel. Used for faster
@@ -402,6 +411,7 @@ def _partition_pdf_with_pdfminer(
402411 languages = languages ,
403412 metadata_last_modified = metadata_last_modified ,
404413 starting_page_number = starting_page_number ,
414+ password = password ,
405415 ** kwargs ,
406416 )
407417
@@ -416,14 +426,16 @@ def _process_pdfminer_pages(
416426 metadata_last_modified : Optional [str ],
417427 annotation_threshold : Optional [float ] = env_config .PDF_ANNOTATION_THRESHOLD ,
418428 starting_page_number : int = 1 ,
429+ password : Optional [str ] = None ,
419430 ** kwargs ,
420431) -> list [list [Element ]]:
421432 """Uses PDFMiner to split a document into pages and process them."""
422433
423434 elements = []
424435
425436 for page_number , (page , page_layout ) in enumerate (
426- open_pdfminer_pages_generator (fp ), start = starting_page_number
437+ open_pdfminer_pages_generator (fp , password = password ),
438+ start = starting_page_number ,
427439 ):
428440 width , height = page_layout .width , page_layout .height
429441
@@ -545,6 +557,7 @@ def _partition_pdf_or_image_local(
545557 extract_forms : bool = False ,
546558 form_extraction_skip_tables : bool = True ,
547559 pdf_hi_res_max_pages : Optional [int ] = None ,
560+ password :Optional [str ] = None ,
548561 ** kwargs : Any ,
549562) -> list [Element ]:
550563 """Partition using package installed locally"""
@@ -586,6 +599,7 @@ def _partition_pdf_or_image_local(
586599 is_image = is_image ,
587600 model_name = hi_res_model_name ,
588601 pdf_image_dpi = pdf_image_dpi ,
602+ password = password ,
589603 )
590604
591605 if hi_res_model_name .startswith ("chipper" ):
@@ -594,7 +608,8 @@ def _partition_pdf_or_image_local(
594608 final_document_layout = inferred_document_layout
595609 else :
596610 extracted_layout = (
597- process_file_with_pdfminer (filename = filename , dpi = pdf_image_dpi )
611+ process_file_with_pdfminer (filename = filename , dpi = pdf_image_dpi ,
612+ password = password )
598613 if pdf_text_extractable
599614 else []
600615 )
@@ -634,13 +649,15 @@ def _partition_pdf_or_image_local(
634649 ocr_mode = ocr_mode ,
635650 pdf_image_dpi = pdf_image_dpi ,
636651 ocr_layout_dumper = ocr_layout_dumper ,
652+ password = password ,
637653 )
638654 else :
639655 inferred_document_layout = process_data_with_model (
640656 file ,
641657 is_image = is_image ,
642658 model_name = hi_res_model_name ,
643659 pdf_image_dpi = pdf_image_dpi ,
660+ password = password ,
644661 )
645662
646663 if hi_res_model_name .startswith ("chipper" ):
@@ -652,7 +669,8 @@ def _partition_pdf_or_image_local(
652669 file .seek (0 )
653670
654671 extracted_layout = (
655- process_data_with_pdfminer (file = file , dpi = pdf_image_dpi )
672+ process_data_with_pdfminer (file = file , dpi = pdf_image_dpi ,
673+ password = password )
656674 if pdf_text_extractable
657675 else []
658676 )
@@ -694,6 +712,7 @@ def _partition_pdf_or_image_local(
694712 ocr_mode = ocr_mode ,
695713 pdf_image_dpi = pdf_image_dpi ,
696714 ocr_layout_dumper = ocr_layout_dumper ,
715+ password = password ,
697716 )
698717
699718 # NOTE(alan): starting with v2, chipper sorts the elements itself.
@@ -734,6 +753,7 @@ def _partition_pdf_or_image_local(
734753 pdf_image_dpi = pdf_image_dpi ,
735754 extract_image_block_to_payload = extract_image_block_to_payload ,
736755 output_dir_path = extract_image_block_output_dir ,
756+ password = password ,
737757 )
738758
739759 for el_type in extract_image_block_types :
@@ -807,6 +827,7 @@ def _partition_pdf_or_image_local(
807827 draw_caption = env_config .ANALYSIS_BBOX_DRAW_CAPTION ,
808828 resize = env_config .ANALYSIS_BBOX_RESIZE ,
809829 format = env_config .ANALYSIS_BBOX_FORMAT ,
830+ password = password ,
810831 )
811832
812833 return out_elements
@@ -845,6 +866,7 @@ def _partition_pdf_or_image_with_ocr(
845866 is_image : bool = False ,
846867 metadata_last_modified : Optional [str ] = None ,
847868 starting_page_number : int = 1 ,
869+ password :Optional [str ] = None ,
848870 ** kwargs : Any ,
849871):
850872 """Partitions an image or PDF using OCR. For PDFs, each page is converted
@@ -869,7 +891,8 @@ def _partition_pdf_or_image_with_ocr(
869891 elements .extend (page_elements )
870892 else :
871893 for page_number , image in enumerate (
872- convert_pdf_to_images (filename , file ), start = starting_page_number
894+ convert_pdf_to_images (filename , file , password = password ),
895+ start = starting_page_number
873896 ):
874897 page_elements = _partition_pdf_or_image_with_ocr_from_image (
875898 image = image ,
0 commit comments