Skip to content

Commit ee62dc4

Browse files
committed
Add password with PDF files
1 parent 4431fe5 commit ee62dc4

File tree

1 file changed

+16
-0
lines changed

1 file changed

+16
-0
lines changed

unstructured_inference/inference/layout.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def from_file(
5151
filename: str,
5252
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
5353
pdf_image_dpi: int = 200,
54+
password:Optional[str] = None,
5455
**kwargs,
5556
) -> DocumentLayout:
5657
"""Creates a DocumentLayout from a pdf file."""
@@ -62,6 +63,7 @@ def from_file(
6263
pdf_image_dpi,
6364
output_folder=temp_dir,
6465
path_only=True,
66+
password=password,
6567
)
6668
image_paths = cast(List[str], _image_paths)
6769
number_of_pages = len(image_paths)
@@ -89,6 +91,7 @@ def from_image_file(
8991
detection_model: Optional[UnstructuredObjectDetectionModel] = None,
9092
element_extraction_model: Optional[UnstructuredElementExtractionModel] = None,
9193
fixed_layout: Optional[List[TextRegion]] = None,
94+
password:Optional[str] = None,
9295
**kwargs,
9396
) -> DocumentLayout:
9497
"""Creates a DocumentLayout from an image file."""
@@ -115,6 +118,7 @@ def from_image_file(
115118
detection_model=detection_model,
116119
element_extraction_model=element_extraction_model,
117120
fixed_layout=fixed_layout,
121+
password=password,
118122
**kwargs,
119123
)
120124
pages.append(page)
@@ -133,6 +137,7 @@ def __init__(
133137
document_filename: Optional[Union[str, PurePath]] = None,
134138
detection_model: Optional[UnstructuredObjectDetectionModel] = None,
135139
element_extraction_model: Optional[UnstructuredElementExtractionModel] = None,
140+
password:Optional[str] = None,
136141
):
137142
if detection_model is not None and element_extraction_model is not None:
138143
raise ValueError("Only one of detection_model and extraction_model should be passed.")
@@ -148,6 +153,7 @@ def __init__(
148153
self.element_extraction_model = element_extraction_model
149154
self.elements: Collection[LayoutElement] = []
150155
self.elements_array: LayoutElements | None = None
156+
self.password = password
151157
# NOTE(alan): Dropped LocationlessLayoutElement that was created for chipper - chipper has
152158
# locations now and if we need to support LayoutElements without bounding boxes we can make
153159
# the bbox property optional
@@ -291,6 +297,7 @@ def from_image(
291297
detection_model: Optional[UnstructuredObjectDetectionModel] = None,
292298
element_extraction_model: Optional[UnstructuredElementExtractionModel] = None,
293299
fixed_layout: Optional[List[TextRegion]] = None,
300+
password:Optional[str] = None,
294301
):
295302
"""Creates a PageLayout from an already-loaded PIL Image."""
296303

@@ -299,6 +306,7 @@ def from_image(
299306
image=image,
300307
detection_model=detection_model,
301308
element_extraction_model=element_extraction_model,
309+
password=password,
302310
)
303311
# FIXME (yao): refactor the other methods so they all return elements like the third route
304312
if page.element_extraction_model is not None:
@@ -325,6 +333,7 @@ def from_image(
325333
def process_data_with_model(
326334
data: BinaryIO,
327335
model_name: Optional[str],
336+
password: Optional[str] = None,
328337
**kwargs: Any,
329338
) -> DocumentLayout:
330339
"""Process PDF as file-like object `data` into a `DocumentLayout`.
@@ -339,6 +348,7 @@ def process_data_with_model(
339348
layout = process_file_with_model(
340349
file_path,
341350
model_name,
351+
password=password,
342352
**kwargs,
343353
)
344354

@@ -351,6 +361,7 @@ def process_file_with_model(
351361
is_image: bool = False,
352362
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
353363
pdf_image_dpi: int = 200,
364+
password: Optional[str] = None,
354365
**kwargs: Any,
355366
) -> DocumentLayout:
356367
"""Processes pdf file with name filename into a DocumentLayout by using a model identified by
@@ -370,6 +381,7 @@ def process_file_with_model(
370381
filename,
371382
detection_model=detection_model,
372383
element_extraction_model=element_extraction_model,
384+
password=password,
373385
**kwargs,
374386
)
375387
if is_image
@@ -379,6 +391,7 @@ def process_file_with_model(
379391
element_extraction_model=element_extraction_model,
380392
fixed_layouts=fixed_layouts,
381393
pdf_image_dpi=pdf_image_dpi,
394+
password=password,
382395
**kwargs,
383396
)
384397
)
@@ -390,6 +403,7 @@ def convert_pdf_to_image(
390403
dpi: int = 200,
391404
output_folder: Optional[Union[str, PurePath]] = None,
392405
path_only: bool = False,
406+
password: Optional[str] = None,
393407
) -> Union[List[Image.Image], List[str]]:
394408
"""Get the image renderings of the pdf pages using pdf2image"""
395409

@@ -402,12 +416,14 @@ def convert_pdf_to_image(
402416
dpi=dpi,
403417
output_folder=output_folder,
404418
paths_only=path_only,
419+
userpw=password,
405420
)
406421
else:
407422
images = pdf2image.convert_from_path(
408423
filename,
409424
dpi=dpi,
410425
paths_only=path_only,
426+
userpw=password,
411427
)
412428

413429
return images

0 commit comments

Comments
 (0)