Skip to content

Commit 4b7276a

Browse files
authored
enhancement: make DPI default 300 when using chipper (#174)
Made `pdf_image_dpi` default to `None` to allow for conditional default, 300 when using chipper, 200 otherwise.
1 parent c337a95 commit 4b7276a

File tree

3 files changed

+7
-4
lines changed

3 files changed

+7
-4
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
## 0.5.11-dev0
1+
## 0.5.11
22

33
* Add warning when chipper is used with < 300 DPI
4+
* Use None default for dpi so defaults can be properly handled upstream
45

56
## 0.5.10
67

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.11-dev0" # pragma: no cover
1+
__version__ = "0.5.11" # pragma: no cover

unstructured_inference/inference/layout.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ def process_data_with_model(
367367
ocr_mode: str = "entire_page",
368368
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
369369
extract_tables: bool = False,
370-
pdf_image_dpi: int = 200,
370+
pdf_image_dpi: Optional[int] = None,
371371
) -> DocumentLayout:
372372
"""Processes pdf file in the form of a file handler (supporting a read method) into a
373373
DocumentLayout by using a model identified by model_name."""
@@ -397,11 +397,13 @@ def process_file_with_model(
397397
ocr_mode: str = "entire_page",
398398
fixed_layouts: Optional[List[Optional[List[TextRegion]]]] = None,
399399
extract_tables: bool = False,
400-
pdf_image_dpi: int = 200,
400+
pdf_image_dpi: Optional[int] = None,
401401
) -> DocumentLayout:
402402
"""Processes pdf file with name filename into a DocumentLayout by using a model identified by
403403
model_name."""
404404

405+
if pdf_image_dpi is None:
406+
pdf_image_dpi = 300 if model_name == "chipper" else 200
405407
if (pdf_image_dpi < 300) and (model_name == "chipper"):
406408
logger.warning(
407409
"The Chipper model performs better when images are rendered with DPI >= 300 "

0 commit comments

Comments
 (0)