Skip to content

Commit 42139eb

Browse files
authored
feat: page annotation (#105)
Add capability for PageLayout to output annotated image.
1 parent ff96a97 commit 42139eb

File tree

7 files changed

+166
-260
lines changed

7 files changed

+166
-260
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
## 0.5.1-dev1
1+
## 0.5.1-dev2
22

3+
* Add annotation for pages
34
* Store page numbers when processing PDFs
45
* Hotfix to handle inference of blank pages using ONNX detectron2
56

test_unstructured_inference/inference/test_layout.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,3 +482,27 @@ def test_load_pdf_with_multicolumn_layout_and_ocr(filename="sample-docs/design-t
482482

483483
for i, element in enumerate(test_elements):
484484
assert element.text.startswith(test_snippets[i])
485+
486+
487+
def test_annotate():
488+
test_image_arr = np.ones((100, 100, 3), dtype="uint8")
489+
image = Image.fromarray(test_image_arr)
490+
page = layout.PageLayout(number=1, image=image, layout=None)
491+
coords1 = (21, 30, 37, 41)
492+
rect1 = elements.Rectangle(*coords1)
493+
coords2 = (1, 10, 7, 11)
494+
rect2 = elements.Rectangle(*coords2)
495+
page.elements = [rect1, rect2]
496+
annotated_image = page.annotate(colors="red")
497+
annotated_array = np.array(annotated_image)
498+
for x1, y1, x2, y2 in [coords1, coords2]:
499+
# Make sure the pixels on the edge of the box are red
500+
for i, expected in zip(range(3), [255, 0, 0]):
501+
assert all(annotated_array[y1, x1:x2, i] == expected)
502+
assert all(annotated_array[y2, x1:x2, i] == expected)
503+
assert all(annotated_array[y1:y2, x1, i] == expected)
504+
assert all(annotated_array[y1:y2, x2, i] == expected)
505+
# Make sure almost all the pixels are not changed
506+
assert ((annotated_array[:, :, 0] == 1).mean()) > 0.992
507+
assert ((annotated_array[:, :, 1] == 1).mean()) > 0.992
508+
assert ((annotated_array[:, :, 2] == 1).mean()) > 0.992

test_unstructured_inference/test_visualization.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import pytest
22

3+
from PIL import Image
34
import numpy as np
45

5-
from unstructured_inference.visualize import draw_bounding_boxes
6+
from unstructured_inference.inference.elements import Rectangle
7+
from unstructured_inference.visualize import draw_bbox, draw_yolox_bounding_boxes
68

79

810
@pytest.mark.parametrize(
@@ -20,7 +22,26 @@
2022
def test_visualize(y_coords, x_coords):
2123
test_image = np.ones((100, 100, 3))
2224
boxes = [[1, 10, 50, 40]]
23-
annotated_img = draw_bounding_boxes(
25+
annotated_img = draw_yolox_bounding_boxes(
2426
test_image, boxes, scores=[0.8], cls_ids=[0], class_names=["thing"]
2527
)
2628
assert annotated_img[y_coords, x_coords, 0].sum() == 0.0
29+
30+
31+
def test_draw_bbox():
32+
test_image_arr = np.ones((100, 100, 3), dtype="uint8")
33+
image = Image.fromarray(test_image_arr)
34+
x1, y1, x2, y2 = (1, 10, 7, 11)
35+
rect = Rectangle(x1, y1, x2, y2)
36+
annotated_image = draw_bbox(image=image, rect=rect)
37+
annotated_array = np.array(annotated_image)
38+
# Make sure the pixels on the edge of the box are red
39+
for i, expected in zip(range(3), [255, 0, 0]):
40+
assert all(annotated_array[y1, x1:x2, i] == expected)
41+
assert all(annotated_array[y2, x1:x2, i] == expected)
42+
assert all(annotated_array[y1:y2, x1, i] == expected)
43+
assert all(annotated_array[y1:y2, x2, i] == expected)
44+
# Make sure almost all the pixels are not changed
45+
assert ((annotated_array[:, :, 0] == 1).mean()) > 0.995
46+
assert ((annotated_array[:, :, 1] == 1).mean()) > 0.995
47+
assert ((annotated_array[:, :, 2] == 1).mean()) > 0.995
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.1-dev1" # pragma: no cover
1+
__version__ = "0.5.1-dev2" # pragma: no cover

unstructured_inference/inference/layout.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from unstructured_inference.models.base import get_model
2020
from unstructured_inference.models.unstructuredmodel import UnstructuredModel
2121
from unstructured_inference.patches.pdfminer import parse_keyword
22+
from unstructured_inference.visualize import draw_bbox
2223

2324
# NOTE(alan): Patching this to fix a bug in pdfminer.six. Submitted this PR into pdfminer.six to fix
2425
# the bug: https://github.com/pdfminer/pdfminer.six/pull/885
@@ -192,6 +193,21 @@ def _get_image_array(self) -> Union[np.ndarray, None]:
192193
self.image_array = np.array(self.image)
193194
return self.image_array
194195

196+
def annotate(self, colors: Optional[Union[List[str], str]] = None) -> Image.Image:
197+
"""Annotates the elements on the page image."""
198+
if colors is None:
199+
colors = ["red" for _ in self.elements]
200+
if isinstance(colors, str):
201+
colors = [colors]
202+
# If there aren't enough colors, just cycle through the colors a few times
203+
if len(colors) < len(self.elements):
204+
n_copies = (len(self.elements) // len(colors)) + 1
205+
colors = colors * n_copies
206+
img = self.image.copy()
207+
for el, color in zip(self.elements, colors):
208+
img = draw_bbox(img, el, color=color)
209+
return img
210+
195211
@classmethod
196212
def from_image(
197213
cls,

unstructured_inference/models/yolox.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
from unstructured_inference.inference.layoutelement import LayoutElement
1414
from unstructured_inference.models.unstructuredmodel import UnstructuredModel
15-
from unstructured_inference.visualize import draw_bounding_boxes
15+
from unstructured_inference.visualize import draw_yolox_bounding_boxes
1616
from unstructured_inference.utils import LazyDict, LazyEvaluateInfo
1717

1818
YOLOX_LABEL_MAP = {
@@ -118,7 +118,7 @@ def annotate_image(self, image_fn, dets, out_fn):
118118
origin_img = np.array(Image.open(image_fn))
119119
final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5]
120120

121-
annotated_image = draw_bounding_boxes(
121+
annotated_image = draw_yolox_bounding_boxes(
122122
origin_img,
123123
final_boxes,
124124
final_scores,

0 commit comments

Comments
 (0)