add visualizer.draw_soft_mask

ppwwyyxx · facebook-github-bot · commit f14e63101168 · 2021-11-29T11:13:10.000-08:00
Reviewed By: zhanghang1989

Differential Revision: D32688852

fbshipit-source-id: a47f3b507fa520b1d66d69a90b02126489f3debb
diff --git a/INSTALL.md b/INSTALL.md
@@ -67,7 +67,7 @@ Click each issue for its solutions:
 
 <details>
 <summary>
-Undefined symbols that contains TH,aten,torch,caffe2.
+Undefined symbols that looks like "TH..","at::Tensor...","torch..."
 </summary>
 <br/>
 
@@ -96,7 +96,7 @@ compiled with the version of PyTorch you're running. See the previous common iss
 
 <details>
 <summary>
-Undefined C++ symbols (e.g. GLIBCXX) or C++ symbols not found.
+Undefined C++ symbols (e.g. "GLIBCXX..") or C++ symbols not found.
 </summary>
 <br/>
 Usually it's because the library is compiled with a newer C++ compiler but run with an old C++ runtime.
diff --git a/detectron2/utils/visualizer.py b/detectron2/utils/visualizer.py
@@ -1032,7 +1032,7 @@ def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None):
         return self.output
 
     def draw_binary_mask(
-        self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=0
+        self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=10
     ):
         """
         Args:
@@ -1043,9 +1043,9 @@ def draw_binary_mask(
                 formats that are accepted. If None, will pick a random color.
             edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
                 full list of formats that are accepted.
-            text (str): if None, will be drawn in the object's center of mass.
+            text (str): if None, will be drawn on the object
             alpha (float): blending efficient. Smaller values lead to more transparent masks.
-            area_threshold (float): a connected component small than this will not be shown.
+            area_threshold (float): a connected component smaller than this area will not be shown.
 
         Returns:
             output (VisImage): image object with mask drawn.
@@ -1078,18 +1078,36 @@ def draw_binary_mask(
             self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
 
         if text is not None and has_valid_segment:
-            # TODO sometimes drawn on wrong objects. the heuristics here can improve.
             lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
-            _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
-            largest_component_id = np.argmax(stats[1:, -1]) + 1
-
-            # draw text on the largest component, as well as other very large components.
-            for cid in range(1, _num_cc):
-                if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
-                    # median is more stable than centroid
-                    # center = centroids[largest_component_id]
-                    center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
-                    self.draw_text(text, center, color=lighter_color)
+            self._draw_text_in_mask(binary_mask, text, lighter_color)
+        return self.output
+
+    def draw_soft_mask(self, soft_mask, color=None, *, text=None, alpha=0.5):
+        """
+        Args:
+            soft_mask (ndarray): float array of shape (H, W), each value in [0, 1].
+            color: color of the mask. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted. If None, will pick a random color.
+            text (str): if None, will be drawn on the object
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+
+        Returns:
+            output (VisImage): image object with mask drawn.
+        """
+        if color is None:
+            color = random_color(rgb=True, maximum=1)
+        color = mplc.to_rgb(color)
+
+        shape2d = (soft_mask.shape[0], soft_mask.shape[1])
+        rgba = np.zeros(shape2d + (4,), dtype="float32")
+        rgba[:, :, :3] = color
+        rgba[:, :, 3] = soft_mask * alpha
+        self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
+
+        if text is not None:
+            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+            binary_mask = (soft_mask > 0.5).astype("uint8")
+            self._draw_text_in_mask(binary_mask, text, lighter_color)
         return self.output
 
     def draw_polygon(self, segment, color, edge_color=None, alpha=0.5):
@@ -1215,6 +1233,24 @@ def _convert_masks(self, masks_or_polygons):
                 ret.append(GenericMask(x, self.output.height, self.output.width))
         return ret
 
+    def _draw_text_in_mask(self, binary_mask, text, color):
+        """
+        Find proper places to draw text given a binary mask.
+        """
+        # TODO sometimes drawn on wrong objects. the heuristics here can improve.
+        _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
+        if stats[1:, -1].size == 0:
+            return
+        largest_component_id = np.argmax(stats[1:, -1]) + 1
+
+        # draw text on the largest component, as well as other very large components.
+        for cid in range(1, _num_cc):
+            if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
+                # median is more stable than centroid
+                # center = centroids[largest_component_id]
+                center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
+                self.draw_text(text, center, color=color)
+
     def _convert_keypoints(self, keypoints):
         if isinstance(keypoints, Keypoints):
             keypoints = keypoints.tensor
diff --git a/docs/tutorials/models.md b/docs/tutorials/models.md
@@ -71,7 +71,7 @@ The dict may contain the following keys:
 * "image": `Tensor` in (C, H, W) format. The meaning of channels are defined by `cfg.INPUT.FORMAT`.
   Image normalization, if any, will be performed inside the model using
   `cfg.MODEL.PIXEL_{MEAN,STD}`.
-* "height", "width": the **desired** output height and width, which is not necessarily the same
+* "height", "width": the **desired** output height and width **in inference**, which is not necessarily the same
   as the height or width of the `image` field.
   For example, the `image` field contains the resized image, if resize is used as a preprocessing step.
   But you may want the outputs to be in **original** resolution.
diff --git a/tests/test_visualizer.py b/tests/test_visualizer.py
@@ -213,6 +213,24 @@ def test_draw_binary_mask(self):
                     # red color is drawn on the image
                 self.assertTrue(o[:, :, 0].sum() > 0)
 
+    def test_draw_soft_mask(self):
+        img = np.random.rand(100, 100, 3) * 255
+        img[:, :, 0] = 0  # remove red color
+        mask = np.zeros((100, 100), dtype=np.float32)
+        mask[30:50, 40:50] = 1.0
+        cv2.GaussianBlur(mask, (21, 21), 10)
+
+        v = Visualizer(img)
+        o = v.draw_soft_mask(mask, color="red", text="test")
+        o = o.get_image().astype("float32")
+        # red color is drawn on the image
+        self.assertTrue(o[:, :, 0].sum() > 0)
+
+        # test draw empty mask
+        v = Visualizer(img)
+        o = v.draw_soft_mask(np.zeros((100, 100), dtype=np.float32), color="red", text="test")
+        o = o.get_image().astype("float32")
+
     def test_border_mask_with_holes(self):
         H, W = 200, 200
         img = np.zeros((H, W, 3))