feat: page annotation (#105)

qued · web-flow · commit 42139eb45b44 · 2023-05-25T16:32:11.000-05:00
Add capability for PageLayout to output annotated image.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,6 @@
-## 0.5.1-dev1
+## 0.5.1-dev2
 
+* Add annotation for pages
 * Store page numbers when processing PDFs
 * Hotfix to handle inference of blank pages using ONNX detectron2
 
diff --git a/test_unstructured_inference/inference/test_layout.py b/test_unstructured_inference/inference/test_layout.py
@@ -482,3 +482,27 @@ def test_load_pdf_with_multicolumn_layout_and_ocr(filename="sample-docs/design-t
 
     for i, element in enumerate(test_elements):
         assert element.text.startswith(test_snippets[i])
+
+
+def test_annotate():
+    test_image_arr = np.ones((100, 100, 3), dtype="uint8")
+    image = Image.fromarray(test_image_arr)
+    page = layout.PageLayout(number=1, image=image, layout=None)
+    coords1 = (21, 30, 37, 41)
+    rect1 = elements.Rectangle(*coords1)
+    coords2 = (1, 10, 7, 11)
+    rect2 = elements.Rectangle(*coords2)
+    page.elements = [rect1, rect2]
+    annotated_image = page.annotate(colors="red")
+    annotated_array = np.array(annotated_image)
+    for x1, y1, x2, y2 in [coords1, coords2]:
+        # Make sure the pixels on the edge of the box are red
+        for i, expected in zip(range(3), [255, 0, 0]):
+            assert all(annotated_array[y1, x1:x2, i] == expected)
+            assert all(annotated_array[y2, x1:x2, i] == expected)
+            assert all(annotated_array[y1:y2, x1, i] == expected)
+            assert all(annotated_array[y1:y2, x2, i] == expected)
+        # Make sure almost all the pixels are not changed
+        assert ((annotated_array[:, :, 0] == 1).mean()) > 0.992
+        assert ((annotated_array[:, :, 1] == 1).mean()) > 0.992
+        assert ((annotated_array[:, :, 2] == 1).mean()) > 0.992
diff --git a/test_unstructured_inference/test_visualization.py b/test_unstructured_inference/test_visualization.py
@@ -1,8 +1,10 @@
 import pytest
 
+from PIL import Image
 import numpy as np
 
-from unstructured_inference.visualize import draw_bounding_boxes
+from unstructured_inference.inference.elements import Rectangle
+from unstructured_inference.visualize import draw_bbox, draw_yolox_bounding_boxes
 
 
 @pytest.mark.parametrize(
@@ -20,7 +22,26 @@
 def test_visualize(y_coords, x_coords):
     test_image = np.ones((100, 100, 3))
     boxes = [[1, 10, 50, 40]]
-    annotated_img = draw_bounding_boxes(
+    annotated_img = draw_yolox_bounding_boxes(
         test_image, boxes, scores=[0.8], cls_ids=[0], class_names=["thing"]
     )
     assert annotated_img[y_coords, x_coords, 0].sum() == 0.0
+
+
+def test_draw_bbox():
+    test_image_arr = np.ones((100, 100, 3), dtype="uint8")
+    image = Image.fromarray(test_image_arr)
+    x1, y1, x2, y2 = (1, 10, 7, 11)
+    rect = Rectangle(x1, y1, x2, y2)
+    annotated_image = draw_bbox(image=image, rect=rect)
+    annotated_array = np.array(annotated_image)
+    # Make sure the pixels on the edge of the box are red
+    for i, expected in zip(range(3), [255, 0, 0]):
+        assert all(annotated_array[y1, x1:x2, i] == expected)
+        assert all(annotated_array[y2, x1:x2, i] == expected)
+        assert all(annotated_array[y1:y2, x1, i] == expected)
+        assert all(annotated_array[y1:y2, x2, i] == expected)
+    # Make sure almost all the pixels are not changed
+    assert ((annotated_array[:, :, 0] == 1).mean()) > 0.995
+    assert ((annotated_array[:, :, 1] == 1).mean()) > 0.995
+    assert ((annotated_array[:, :, 2] == 1).mean()) > 0.995
diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py
@@ -1 +1 @@
-__version__ = "0.5.1-dev1"  # pragma: no cover
+__version__ = "0.5.1-dev2"  # pragma: no cover
diff --git a/unstructured_inference/inference/layout.py b/unstructured_inference/inference/layout.py
@@ -19,6 +19,7 @@
 from unstructured_inference.models.base import get_model
 from unstructured_inference.models.unstructuredmodel import UnstructuredModel
 from unstructured_inference.patches.pdfminer import parse_keyword
+from unstructured_inference.visualize import draw_bbox
 
 # NOTE(alan): Patching this to fix a bug in pdfminer.six. Submitted this PR into pdfminer.six to fix
 # the bug: https://github.com/pdfminer/pdfminer.six/pull/885
@@ -192,6 +193,21 @@ def _get_image_array(self) -> Union[np.ndarray, None]:
             self.image_array = np.array(self.image)
         return self.image_array
 
+    def annotate(self, colors: Optional[Union[List[str], str]] = None) -> Image.Image:
+        """Annotates the elements on the page image."""
+        if colors is None:
+            colors = ["red" for _ in self.elements]
+        if isinstance(colors, str):
+            colors = [colors]
+        # If there aren't enough colors, just cycle through the colors a few times
+        if len(colors) < len(self.elements):
+            n_copies = (len(self.elements) // len(colors)) + 1
+            colors = colors * n_copies
+        img = self.image.copy()
+        for el, color in zip(self.elements, colors):
+            img = draw_bbox(img, el, color=color)
+        return img
+
     @classmethod
     def from_image(
         cls,
diff --git a/unstructured_inference/models/yolox.py b/unstructured_inference/models/yolox.py
@@ -12,7 +12,7 @@
 
 from unstructured_inference.inference.layoutelement import LayoutElement
 from unstructured_inference.models.unstructuredmodel import UnstructuredModel
-from unstructured_inference.visualize import draw_bounding_boxes
+from unstructured_inference.visualize import draw_yolox_bounding_boxes
 from unstructured_inference.utils import LazyDict, LazyEvaluateInfo
 
 YOLOX_LABEL_MAP = {
@@ -118,7 +118,7 @@ def annotate_image(self, image_fn, dets, out_fn):
         origin_img = np.array(Image.open(image_fn))
         final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5]
 
-        annotated_image = draw_bounding_boxes(
+        annotated_image = draw_yolox_bounding_boxes(
             origin_img,
             final_boxes,
             final_scores,
diff --git a/unstructured_inference/visualize.py b/unstructured_inference/visualize.py

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.5.1-dev1" # pragma: no cover`
	`1`	`+__version__ = "0.5.1-dev2" # pragma: no cover`