1212import numpy as np
1313import wrapt
1414from pdfminer import psparser
15- from pdfminer .layout import LTChar , LTContainer , LTImage , LTItem , LTTextBox
16- from pdfminer .pdftypes import PDFObjRef
15+ from pdfminer .layout import LTContainer , LTImage , LTItem , LTTextBox
1716from pdfminer .utils import open_filename
1817from pi_heif import register_heif_opener
1918from PIL import Image as PILImage
@@ -365,7 +364,7 @@ def extractable_elements(
365364 languages : Optional [list [str ]] = None ,
366365 metadata_last_modified : Optional [str ] = None ,
367366 starting_page_number : int = 1 ,
368- password :Optional [str ] = None ,
367+ password : Optional [str ] = None ,
369368 ** kwargs : Any ,
370369) -> list [list [Element ]]:
371370 if isinstance (file , bytes ):
@@ -387,7 +386,7 @@ def _partition_pdf_with_pdfminer(
387386 languages : list [str ],
388387 metadata_last_modified : Optional [str ],
389388 starting_page_number : int = 1 ,
390- password :Optional [str ] = None ,
389+ password : Optional [str ] = None ,
391390 ** kwargs : Any ,
392391) -> list [list [Element ]]:
393392 """Partitions a PDF using PDFMiner instead of using a layoutmodel. Used for faster
@@ -446,7 +445,7 @@ def _process_pdfminer_pages(
446445
447446 for page_number , (page , page_layout ) in enumerate (
448447 open_pdfminer_pages_generator (fp , password = password ),
449- start = starting_page_number ,
448+ start = starting_page_number ,
450449 ):
451450 width , height = page_layout .width , page_layout .height
452451
@@ -568,7 +567,7 @@ def _partition_pdf_or_image_local(
568567 extract_forms : bool = False ,
569568 form_extraction_skip_tables : bool = True ,
570569 pdf_hi_res_max_pages : Optional [int ] = None ,
571- password :Optional [str ] = None ,
570+ password : Optional [str ] = None ,
572571 ** kwargs : Any ,
573572) -> list [Element ]:
574573 """Partition using package installed locally"""
@@ -609,8 +608,7 @@ def _partition_pdf_or_image_local(
609608 )
610609
611610 extracted_layout , layouts_links = (
612- process_file_with_pdfminer (filename = filename , dpi = pdf_image_dpi ,
613- password = password )
611+ process_file_with_pdfminer (filename = filename , dpi = pdf_image_dpi , password = password )
614612 if pdf_text_extractable
615613 else ([], [])
616614 )
@@ -879,8 +877,7 @@ def _partition_pdf_or_image_with_ocr(
879877 elements .extend (page_elements )
880878 else :
881879 for page_number , image in enumerate (
882- convert_pdf_to_images (filename , file , password = password ),
883- start = starting_page_number
880+ convert_pdf_to_images (filename , file , password = password ), start = starting_page_number
884881 ):
885882 page_elements = _partition_pdf_or_image_with_ocr_from_image (
886883 image = image ,
@@ -1177,11 +1174,11 @@ def document_to_element_list(
11771174 word = ""
11781175
11791176 if len (word ) == 0 :
1180- start_index = text_len + index
1181- x1 = character .x0
1182- y2 = height - character .y0
1183- x2 = character .x1
1184- y1 = height - character .y1
1177+ text_len + index
1178+ character .x0
1179+ height - character .y0
1180+ character .x1
1181+ height - character .y1
11851182 else :
11861183
11871184 element .metadata .links = (
0 commit comments