@@ -77,10 +77,11 @@ def from_file(
7777 ocr_strategy : str = "auto" ,
7878 ocr_languages : str = "eng" ,
7979 extract_tables : bool = False ,
80+ pdf_image_dpi : int = 200 ,
8081 ) -> DocumentLayout :
8182 """Creates a DocumentLayout from a pdf file."""
8283 logger .info (f"Reading PDF for file: { filename } ..." )
83- layouts , images = load_pdf (filename )
84+ layouts , images = load_pdf (filename , pdf_image_dpi )
8485 if len (layouts ) > len (images ):
8586 raise RuntimeError (
8687 "Some images were not loaded. Check that poppler is installed and in your $PATH." ,
@@ -297,6 +298,7 @@ def process_data_with_model(
297298 ocr_languages : str = "eng" ,
298299 fixed_layouts : Optional [List [Optional [List [TextRegion ]]]] = None ,
299300 extract_tables : bool = False ,
301+ pdf_image_dpi : int = 200 ,
300302) -> DocumentLayout :
301303 """Processes pdf file in the form of a file handler (supporting a read method) into a
302304 DocumentLayout by using a model identified by model_name."""
@@ -310,6 +312,7 @@ def process_data_with_model(
310312 ocr_languages = ocr_languages ,
311313 fixed_layouts = fixed_layouts ,
312314 extract_tables = extract_tables ,
315+ pdf_image_dpi = pdf_image_dpi ,
313316 )
314317
315318 return layout
@@ -323,6 +326,7 @@ def process_file_with_model(
323326 ocr_languages : str = "eng" ,
324327 fixed_layouts : Optional [List [Optional [List [TextRegion ]]]] = None ,
325328 extract_tables : bool = False ,
329+ pdf_image_dpi : int = 200 ,
326330) -> DocumentLayout :
327331 """Processes pdf file with name filename into a DocumentLayout by using a model identified by
328332 model_name."""
@@ -353,6 +357,7 @@ def process_file_with_model(
353357 ocr_languages = ocr_languages ,
354358 fixed_layouts = fixed_layouts ,
355359 extract_tables = extract_tables ,
360+ pdf_image_dpi = pdf_image_dpi ,
356361 )
357362 )
358363 return layout
0 commit comments