@@ -51,6 +51,7 @@ def from_file(
5151 filename : str ,
5252 fixed_layouts : Optional [List [Optional [List [TextRegion ]]]] = None ,
5353 pdf_image_dpi : int = 200 ,
54+ password :Optional [str ] = None ,
5455 ** kwargs ,
5556 ) -> DocumentLayout :
5657 """Creates a DocumentLayout from a pdf file."""
@@ -62,6 +63,7 @@ def from_file(
6263 pdf_image_dpi ,
6364 output_folder = temp_dir ,
6465 path_only = True ,
66+ password = password ,
6567 )
6668 image_paths = cast (List [str ], _image_paths )
6769 number_of_pages = len (image_paths )
@@ -89,6 +91,7 @@ def from_image_file(
8991 detection_model : Optional [UnstructuredObjectDetectionModel ] = None ,
9092 element_extraction_model : Optional [UnstructuredElementExtractionModel ] = None ,
9193 fixed_layout : Optional [List [TextRegion ]] = None ,
94+ password :Optional [str ] = None ,
9295 ** kwargs ,
9396 ) -> DocumentLayout :
9497 """Creates a DocumentLayout from an image file."""
@@ -115,6 +118,7 @@ def from_image_file(
115118 detection_model = detection_model ,
116119 element_extraction_model = element_extraction_model ,
117120 fixed_layout = fixed_layout ,
121+ password = password ,
118122 ** kwargs ,
119123 )
120124 pages .append (page )
@@ -133,6 +137,7 @@ def __init__(
133137 document_filename : Optional [Union [str , PurePath ]] = None ,
134138 detection_model : Optional [UnstructuredObjectDetectionModel ] = None ,
135139 element_extraction_model : Optional [UnstructuredElementExtractionModel ] = None ,
140+ password :Optional [str ] = None ,
136141 ):
137142 if detection_model is not None and element_extraction_model is not None :
138143 raise ValueError ("Only one of detection_model and extraction_model should be passed." )
@@ -148,6 +153,7 @@ def __init__(
148153 self .element_extraction_model = element_extraction_model
149154 self .elements : Collection [LayoutElement ] = []
150155 self .elements_array : LayoutElements | None = None
156+ self .password = password
151157 # NOTE(alan): Dropped LocationlessLayoutElement that was created for chipper - chipper has
152158 # locations now and if we need to support LayoutElements without bounding boxes we can make
153159 # the bbox property optional
@@ -291,6 +297,7 @@ def from_image(
291297 detection_model : Optional [UnstructuredObjectDetectionModel ] = None ,
292298 element_extraction_model : Optional [UnstructuredElementExtractionModel ] = None ,
293299 fixed_layout : Optional [List [TextRegion ]] = None ,
300+ password :Optional [str ] = None ,
294301 ):
295302 """Creates a PageLayout from an already-loaded PIL Image."""
296303
@@ -299,6 +306,7 @@ def from_image(
299306 image = image ,
300307 detection_model = detection_model ,
301308 element_extraction_model = element_extraction_model ,
309+ password = password ,
302310 )
303311 # FIXME (yao): refactor the other methods so they all return elements like the third route
304312 if page .element_extraction_model is not None :
@@ -325,6 +333,7 @@ def from_image(
325333def process_data_with_model (
326334 data : BinaryIO ,
327335 model_name : Optional [str ],
336+ password : Optional [str ] = None ,
328337 ** kwargs : Any ,
329338) -> DocumentLayout :
330339 """Process PDF as file-like object `data` into a `DocumentLayout`.
@@ -339,6 +348,7 @@ def process_data_with_model(
339348 layout = process_file_with_model (
340349 file_path ,
341350 model_name ,
351+ password = password ,
342352 ** kwargs ,
343353 )
344354
@@ -351,6 +361,7 @@ def process_file_with_model(
351361 is_image : bool = False ,
352362 fixed_layouts : Optional [List [Optional [List [TextRegion ]]]] = None ,
353363 pdf_image_dpi : int = 200 ,
364+ password : Optional [str ] = None ,
354365 ** kwargs : Any ,
355366) -> DocumentLayout :
356367 """Processes pdf file with name filename into a DocumentLayout by using a model identified by
@@ -370,6 +381,7 @@ def process_file_with_model(
370381 filename ,
371382 detection_model = detection_model ,
372383 element_extraction_model = element_extraction_model ,
384+ password = password ,
373385 ** kwargs ,
374386 )
375387 if is_image
@@ -379,6 +391,7 @@ def process_file_with_model(
379391 element_extraction_model = element_extraction_model ,
380392 fixed_layouts = fixed_layouts ,
381393 pdf_image_dpi = pdf_image_dpi ,
394+ password = password ,
382395 ** kwargs ,
383396 )
384397 )
@@ -390,6 +403,7 @@ def convert_pdf_to_image(
390403 dpi : int = 200 ,
391404 output_folder : Optional [Union [str , PurePath ]] = None ,
392405 path_only : bool = False ,
406+ password : Optional [str ] = None ,
393407) -> Union [List [Image .Image ], List [str ]]:
394408 """Get the image renderings of the pdf pages using pdf2image"""
395409
@@ -402,12 +416,14 @@ def convert_pdf_to_image(
402416 dpi = dpi ,
403417 output_folder = output_folder ,
404418 paths_only = path_only ,
419+ userpw = password ,
405420 )
406421 else :
407422 images = pdf2image .convert_from_path (
408423 filename ,
409424 dpi = dpi ,
410425 paths_only = path_only ,
426+ userpw = password ,
411427 )
412428
413429 return images
0 commit comments