Skip to content

Commit c337a95

Browse files
authored
enhancement: warn if low dpi chipper (#171)
Adds a warning if Chipper is used with DPI less than 300.
1 parent 15bbc56 commit c337a95

File tree

5 files changed

+27
-4
lines changed

5 files changed

+27
-4
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 0.5.11-dev0
2+
3+
* Add warning when chipper is used with < 300 DPI
4+
15
## 0.5.10
26

37
* Implement full-page OCR

test_unstructured_inference/inference/test_layout.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import unstructured_inference.models.base as models
1212
from unstructured_inference.inference import elements, layout, layoutelement
1313
from unstructured_inference.inference.layout import create_image_output_dir
14-
from unstructured_inference.models import detectron2, tesseract
14+
from unstructured_inference.models import chipper, detectron2, tesseract
1515
from unstructured_inference.models.unstructuredmodel import (
1616
UnstructuredElementExtractionModel,
1717
UnstructuredObjectDetectionModel,
@@ -866,3 +866,14 @@ def test_create_image_output_dir_no_ext():
866866
assert os.path.isdir(output_dir)
867867
assert os.path.isabs(output_dir)
868868
assert output_dir == expected_output_dir
869+
870+
871+
def test_warning_if_chipper_and_low_dpi(caplog):
872+
with patch.object(layout.DocumentLayout, "from_file") as mock_from_file, patch.object(
873+
chipper.UnstructuredChipperModel,
874+
"initialize",
875+
):
876+
layout.process_file_with_model("asdf", model_name="chipper", pdf_image_dpi=299)
877+
mock_from_file.assert_called_once()
878+
assert caplog.records[0].levelname == "WARNING"
879+
assert "DPI >= 300" in caplog.records[0].msg

test_unstructured_inference/models/test_model.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ def test_get_model_warns_on_chipper(monkeypatch, caplog):
4242
"UnstructuredChipperModel",
4343
MockModel,
4444
)
45-
models.get_model("chipper")
46-
assert caplog.records[0].levelname == "WARNING"
45+
with mock.patch.object(models, "models", {}):
46+
models.get_model("chipper")
47+
assert caplog.records[0].levelname == "WARNING"
4748

4849

4950
def test_raises_invalid_model():
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.10" # pragma: no cover
1+
__version__ = "0.5.11-dev0" # pragma: no cover

unstructured_inference/inference/layout.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,13 @@ def process_file_with_model(
401401
) -> DocumentLayout:
402402
"""Processes pdf file with name filename into a DocumentLayout by using a model identified by
403403
model_name."""
404+
405+
if (pdf_image_dpi < 300) and (model_name == "chipper"):
406+
logger.warning(
407+
"The Chipper model performs better when images are rendered with DPI >= 300 "
408+
f"(currently {pdf_image_dpi}).",
409+
)
410+
404411
model = get_model(model_name)
405412
if isinstance(model, UnstructuredObjectDetectionModel):
406413
detection_model = model

0 commit comments

Comments
 (0)