Skip to content

Commit d162c56

Browse files
Klaijanqued
andauthored
Klaijan/add tiff image file support case in from_image_file function (#173)
feat: supports multipage image (TIFF) in DocumentLayout style: change viz to f-string formatting style: change type comparison from == to is test: add test tiff file and tiff test in test_layout --------- Co-authored-by: qued <[email protected]>
1 parent de19ace commit d162c56

File tree

8 files changed

+33
-22
lines changed

8 files changed

+33
-22
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
1+
## 0.5.14
2+
3+
* Add TIFF test file and TIFF filetype to `test_from_image_file` in `test_layout`
4+
15
## 0.5.13
26

37
* Fix extracted image elements being included in layout merge
48

59
## 0.5.12
610

11+
* Add multipage TIFF extraction support
712
* Fix a pdfminer error when using `process_data_with_model`
813

914
## 0.5.11

sample-docs/loremipsum.tiff

14.3 MB
Binary file not shown.

test_unstructured_inference/inference/test_layout.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def test_process_data_with_model(monkeypatch, mock_final_layout, model_name):
209209
)
210210

211211
def new_isinstance(obj, cls):
212-
if type(obj) == MockLayoutModel:
212+
if type(obj) is MockLayoutModel:
213213
return True
214214
else:
215215
return isinstance(obj, cls)
@@ -345,7 +345,7 @@ def test_get_elements_from_block_raises():
345345
layout.get_element_from_block(block, None, None)
346346

347347

348-
@pytest.mark.parametrize("filetype", ["png", "jpg"])
348+
@pytest.mark.parametrize("filetype", ["png", "jpg", "tiff"])
349349
def test_from_image_file(monkeypatch, mock_final_layout, filetype):
350350
def mock_get_elements(self, *args, **kwargs):
351351
self.elements = [mock_final_layout]

test_unstructured_inference/models/test_donut.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def test_load_donut_model(model_path, processor_path, config_path):
4141
config=config_path,
4242
task_prompt="<s>",
4343
)
44-
assert type(donut_model.model.encoder) == DonutSwinModel
44+
assert type(donut_model.model.encoder) is DonutSwinModel
4545

4646

4747
@pytest.fixture()

test_unstructured_inference/models/test_tables.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def test_load_table_model_raises_when_not_available(model_path):
3131
def test_load_donut_model(model_path):
3232
table_model = tables.UnstructuredTableTransformerModel()
3333
table_model.initialize(model=model_path)
34-
assert type(table_model.model.model.decoder) == TableTransformerDecoder
34+
assert type(table_model.model.model.decoder) is TableTransformerDecoder
3535

3636

3737
@pytest.fixture()
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.13" # pragma: no cover
1+
__version__ = "0.5.14" # pragma: no cover

unstructured_inference/inference/layout.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import pytesseract
1111
from pdfminer import psparser
1212
from pdfminer.high_level import extract_pages
13-
from PIL import Image
13+
from PIL import Image, ImageSequence
1414
from pytesseract import Output
1515

1616
from unstructured_inference.inference.elements import (
@@ -143,26 +143,32 @@ def from_image_file(
143143
try:
144144
image = Image.open(filename)
145145
format = image.format
146-
image = image.convert("RGB")
147-
image.format = format
146+
images = []
147+
for i, im in enumerate(ImageSequence.Iterator(image)):
148+
im = im.convert("RGB")
149+
im.format = format
150+
images.append(im)
148151
except Exception as e:
149152
if os.path.isdir(filename) or os.path.isfile(filename):
150153
raise e
151154
else:
152155
raise FileNotFoundError(f'File "{filename}" not found!') from e
153-
page = PageLayout.from_image(
154-
image,
155-
image_path=filename,
156-
detection_model=detection_model,
157-
element_extraction_model=element_extraction_model,
158-
layout=None,
159-
ocr_strategy=ocr_strategy,
160-
ocr_languages=ocr_languages,
161-
ocr_mode=ocr_mode,
162-
fixed_layout=fixed_layout,
163-
extract_tables=extract_tables,
164-
)
165-
return cls.from_pages([page])
156+
pages = []
157+
for i, image in enumerate(images):
158+
page = PageLayout.from_image(
159+
image,
160+
image_path=filename,
161+
number=i,
162+
detection_model=detection_model,
163+
element_extraction_model=element_extraction_model,
164+
layout=None,
165+
ocr_strategy=ocr_strategy,
166+
ocr_languages=ocr_languages,
167+
fixed_layout=fixed_layout,
168+
extract_tables=extract_tables,
169+
)
170+
pages.append(page)
171+
return cls.from_pages(pages)
166172

167173

168174
class PageLayout:

unstructured_inference/visualize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def draw_yolox_bounding_boxes(img, boxes, scores, cls_ids, conf=0.5, class_names
4444
y1 = int(box[3])
4545

4646
color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist()
47-
text = "{}:{:.1f}%".format(class_names[cls_id], score * 100)
47+
text = f"{class_names[cls_id]}:{score * 100:.1f}%"
4848
txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255)
4949
font = cv2.FONT_HERSHEY_SIMPLEX
5050

0 commit comments

Comments
 (0)