fix ColorMode.IMAGE_BW of visualizer

ppwwyyxx · facebook-github-bot · commit 6545979631eb · 2021-09-15T18:43:39.000-07:00
Summary: fix #3486 Reviewed By: zhanghang1989 Differential Revision: D30974958 fbshipit-source-id: 68c7d041f3b5b64eb0c32b4fcc99ad6d13e3542f
diff --git a/detectron2/config/defaults.py b/detectron2/config/defaults.py
@@ -50,6 +50,8 @@
 # INPUT
 # -----------------------------------------------------------------------------
 _C.INPUT = CN()
+# By default, {MIN,MAX}_SIZE options are used in transforms.ResizeShortestEdge.
+# Please refer to ResizeShortestEdge for detailed definition.
 # Size of the smallest side of the image during training
 _C.INPUT.MIN_SIZE_TRAIN = (800,)
 # Sample size of smallest side by choice or random selection from range give by
@@ -258,7 +260,7 @@
 # Overlap threshold for an RoI to be considered foreground (if >= IOU_THRESHOLD)
 _C.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5]
 _C.MODEL.ROI_HEADS.IOU_LABELS = [0, 1]
-# RoI minibatch size *per image* (number of regions of interest [ROIs])
+# RoI minibatch size *per image* (number of regions of interest [ROIs]) during training
 # Total number of RoIs per training minibatch =
 #   ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH
 # E.g., a common configuration is: 512 * 16 = 8192
diff --git a/detectron2/data/transforms/augmentation_impl.py b/detectron2/data/transforms/augmentation_impl.py
@@ -128,7 +128,9 @@ def get_transform(self, image):
 
 class ResizeShortestEdge(Augmentation):
     """
-    Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge.
+    Resize the image while keeping the aspect ratio unchanged.
+    It attempts to scale the shorter edge to the given `short_edge_length`,
+    as long as the longer edge does not exceed `max_size`.
     If `max_size` is reached, then downscale so that the longer edge does not exceed max_size.
     """
 
diff --git a/detectron2/utils/video_visualizer.py b/detectron2/utils/video_visualizer.py
@@ -91,8 +91,10 @@ def draw_instance_predictions(self, frame, predictions):
 
         if self._instance_mode == ColorMode.IMAGE_BW:
             # any() returns uint8 tensor
-            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
-                (masks.any(dim=0) > 0).numpy() if masks is not None else None
+            frame_visualizer.output.reset_image(
+                frame_visualizer._create_grayscale_image(
+                    (masks.any(dim=0) > 0).numpy() if masks is not None else None
+                )
             )
             alpha = 0.3
         else:
@@ -128,8 +130,8 @@ def draw_panoptic_seg_predictions(
         pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
 
         if self._instance_mode == ColorMode.IMAGE_BW:
-            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
-                pred.non_empty_mask()
+            frame_visualizer.output.reset_image(
+                frame_visualizer._create_grayscale_image(pred.non_empty_mask())
             )
 
         # draw mask for all semantic segments first i.e. "stuff"
diff --git a/detectron2/utils/visualizer.py b/detectron2/utils/visualizer.py
@@ -255,7 +255,7 @@ class VisImage:
     def __init__(self, img, scale=1.0):
         """
         Args:
-            img (ndarray): an RGB image of shape (H, W, 3).
+            img (ndarray): an RGB image of shape (H, W, 3) in range [0, 255].
             scale (float): scale the input image
         """
         self.img = img
@@ -284,11 +284,17 @@ def _setup_figure(self, img):
         # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig)
         ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
         ax.axis("off")
-        # Need to imshow this first so that other patches can be drawn on top
-        ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest")
-
         self.fig = fig
         self.ax = ax
+        self.reset_image(img)
+
+    def reset_image(self, img):
+        """
+        Args:
+            img: same as in __init__
+        """
+        img = img.astype("uint8")
+        self.ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest")
 
     def save(self, filepath):
         """
@@ -404,10 +410,12 @@ def draw_instance_predictions(self, predictions):
             alpha = 0.5
 
         if self._instance_mode == ColorMode.IMAGE_BW:
-            self.output.img = self._create_grayscale_image(
-                (predictions.pred_masks.any(dim=0) > 0).numpy()
-                if predictions.has("pred_masks")
-                else None
+            self.output.reset_image(
+                self._create_grayscale_image(
+                    (predictions.pred_masks.any(dim=0) > 0).numpy()
+                    if predictions.has("pred_masks")
+                    else None
+                )
             )
             alpha = 0.3
 
@@ -476,7 +484,7 @@ def draw_panoptic_seg(self, panoptic_seg, segments_info, area_threshold=None, al
         pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
 
         if self._instance_mode == ColorMode.IMAGE_BW:
-            self.output.img = self._create_grayscale_image(pred.non_empty_mask())
+            self.output.reset_image(self._create_grayscale_image(pred.non_empty_mask()))
 
         # draw mask for all semantic segments first i.e. "stuff"
         for mask, sinfo in pred.semantic_masks():
diff --git a/tests/test_visualizer.py b/tests/test_visualizer.py
@@ -138,6 +138,13 @@ def test_BWmode_nomask(self):
         v = Visualizer(img, self.metadata, instance_mode=ColorMode.IMAGE_BW)
         v.draw_instance_predictions(inst)
 
+        # check that output is grayscale
+        inst = inst[:0]
+        v = Visualizer(img, self.metadata, instance_mode=ColorMode.IMAGE_BW)
+        output = v.draw_instance_predictions(inst).get_image()
+        self.assertTrue(np.allclose(output[:, :, 0], output[:, :, 1]))
+        self.assertTrue(np.allclose(output[:, :, 0], output[:, :, 2]))
+
     def test_draw_empty_mask_predictions(self):
         img, boxes, _, _, masks = self._random_data()
         num_inst = len(boxes)