Internal change

tensorflower-gardener · fyangf · commit 95f107047265 · 2023-03-20T16:41:26.000-07:00
PiperOrigin-RevId: 490350449
diff --git a/official/projects/deepmac_maskrcnn/tasks/deep_mask_head_rcnn.py b/official/projects/deepmac_maskrcnn/tasks/deep_mask_head_rcnn.py
@@ -120,7 +120,8 @@ def build_maskrcnn(input_specs: tf.keras.layers.InputSpec,
       pre_nms_score_threshold=generator_config.pre_nms_score_threshold,
       nms_iou_threshold=generator_config.nms_iou_threshold,
       max_num_detections=generator_config.max_num_detections,
-      nms_version=generator_config.nms_version)
+      nms_version=generator_config.nms_version,
+      use_sigmoid_probability=generator_config.use_sigmoid_probability)
 
   if model_config.include_mask:
     mask_head = deep_instance_heads.DeepMaskHead(
diff --git a/official/vision/configs/maskrcnn.py b/official/vision/configs/maskrcnn.py
@@ -135,6 +135,7 @@ class DetectionGenerator(hyperparams.Config):
   nms_version: str = 'v2'  # `v2`, `v1`, `batched`
   use_cpu_nms: bool = False
   soft_nms_sigma: Optional[float] = None  # Only works when nms_version='v1'.
+  use_sigmoid_probability: bool = False
 
 
 @dataclasses.dataclass
@@ -189,6 +190,7 @@ class Losses(hyperparams.Config):
   loss_weight: float = 1.0
   rpn_huber_loss_delta: float = 1. / 9.
   frcnn_huber_loss_delta: float = 1.
+  frcnn_class_use_binary_cross_entropy: bool = False
   l2_weight_decay: float = 0.0
   rpn_score_weight: float = 1.0
   rpn_box_weight: float = 1.0
diff --git a/official/vision/losses/maskrcnn_losses.py b/official/vision/losses/maskrcnn_losses.py
@@ -131,43 +131,45 @@ def _rpn_box_loss(self, box_outputs, box_targets, normalizer=1.0):
 class FastrcnnClassLoss(object):
   """Fast R-CNN classification loss function."""
 
-  def __init__(self):
-    self._categorical_crossentropy = tf.keras.losses.CategoricalCrossentropy(
-        reduction=tf.keras.losses.Reduction.SUM, from_logits=True)
+  def __init__(self, use_binary_cross_entropy=False):
+    """Initializes loss computation.
+
+    Args:
+      use_binary_cross_entropy: If true, uses binary cross entropy loss,
+        otherwise uses categorical cross entropy loss.
+    """
+    self._use_binary_cross_entropy = use_binary_cross_entropy
 
   def __call__(self, class_outputs, class_targets):
     """Computes the class loss (Fast-RCNN branch) of Mask-RCNN.
 
     This function implements the classification loss of the Fast-RCNN.
 
-    The classification loss is softmax on all RoIs.
+    The classification loss is categorical (or binary) cross entropy on all
+    RoIs.
     Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/fast_rcnn_heads.py  # pylint: disable=line-too-long
 
     Args:
-      class_outputs: a float tensor representing the class prediction for each box
-        with a shape of [batch_size, num_boxes, num_classes].
+      class_outputs: a float tensor representing the class prediction for each
+        box with a shape of [batch_size, num_boxes, num_classes].
       class_targets: a float tensor representing the class label for each box
         with a shape of [batch_size, num_boxes].
 
     Returns:
       a scalar tensor representing total class loss.
     """
     with tf.name_scope('fast_rcnn_loss'):
-      batch_size, num_boxes, num_classes = class_outputs.get_shape().as_list()
-      class_targets = tf.cast(class_targets, dtype=tf.int32)
-      class_targets_one_hot = tf.one_hot(class_targets, num_classes)
-      return self._fast_rcnn_class_loss(class_outputs, class_targets_one_hot,
-                                        normalizer=batch_size * num_boxes)
-
-  def _fast_rcnn_class_loss(self, class_outputs, class_targets_one_hot,
-                            normalizer=1.0):
-    """Computes classification loss."""
-    with tf.name_scope('fast_rcnn_class_loss'):
-      class_loss = self._categorical_crossentropy(class_targets_one_hot,
-                                                  class_outputs)
-
-      class_loss /= normalizer
-      return class_loss
+      num_classes = class_outputs.get_shape().as_list()[-1]
+      class_targets_one_hot = tf.one_hot(
+          tf.cast(class_targets, dtype=tf.int32), num_classes)
+      if self._use_binary_cross_entropy:
+        cross_entropy_loss = tf.nn.sigmoid_cross_entropy_with_logits(
+            class_targets_one_hot, class_outputs)
+        return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, axis=-1))
+      else:
+        return tf.reduce_mean(
+            tf.nn.softmax_cross_entropy_with_logits(class_targets_one_hot,
+                                                    class_outputs))
 
 
 class FastrcnnBoxLoss(object):
@@ -227,22 +229,9 @@ def _assign_class_targets(self, box_outputs, class_targets):
     num_classes = num_class_specific_boxes // 4
     box_outputs = tf.reshape(box_outputs,
                              [batch_size, num_rois, num_classes, 4])
-
-    box_indices = tf.reshape(
-        class_targets + tf.tile(
-            tf.expand_dims(tf.range(batch_size) * num_rois * num_classes, 1),
-            [1, num_rois]) + tf.tile(
-                tf.expand_dims(tf.range(num_rois) * num_classes, 0),
-                [batch_size, 1]), [-1])
-
-    box_outputs = tf.matmul(
-        tf.one_hot(
-            box_indices,
-            batch_size * num_rois * num_classes,
-            dtype=box_outputs.dtype), tf.reshape(box_outputs, [-1, 4]))
-    box_outputs = tf.reshape(box_outputs, [batch_size, -1, 4])
-
-    return box_outputs
+    class_targets_ont_hot = tf.one_hot(
+        class_targets, num_classes, dtype=box_outputs.dtype)
+    return tf.einsum('bnij,bni->bnj', box_outputs, class_targets_ont_hot)
 
   def _fast_rcnn_box_loss(self, box_outputs, box_targets, class_targets,
                           normalizer=1.0):
diff --git a/official/vision/losses/maskrcnn_losses_test.py b/official/vision/losses/maskrcnn_losses_test.py
@@ -0,0 +1,103 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for maskrcnn_losses."""
+
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.vision.losses import maskrcnn_losses
+
+
+class MaskrcnnLossesTest(parameterized.TestCase, tf.test.TestCase):
+
+  def testRpnScoreLoss(self):
+    batch_size = 2
+    height = 32
+    width = 32
+    num_anchors = 10
+    score_outputs = {
+        '1': tf.random.uniform([batch_size, height, width, num_anchors])
+    }
+    score_targets = {
+        '1':
+            tf.random.uniform([batch_size, height, width, num_anchors],
+                              minval=-1,
+                              maxval=2,
+                              dtype=tf.int32)
+    }
+    loss_fn = maskrcnn_losses.RpnScoreLoss(rpn_batch_size_per_im=8)
+    self.assertEqual(tf.rank(loss_fn(score_outputs, score_targets)), 0)
+
+  def testRpnBoxLoss(self):
+    batch_size = 2
+    height = 32
+    width = 32
+    num_anchors = 10
+    box_outputs = {
+        '1': tf.random.uniform([batch_size, height, width, num_anchors * 4])
+    }
+    box_targets = {
+        '1': tf.random.uniform([batch_size, height, width, num_anchors * 4])
+    }
+    loss_fn = maskrcnn_losses.RpnBoxLoss(huber_loss_delta=1. / 9.)
+    self.assertEqual(tf.rank(loss_fn(box_outputs, box_targets)), 0)
+
+  @parameterized.parameters((True, False))
+  def testFastrcnnClassLoss(self, use_binary_cross_entropy):
+    batch_size = 2
+    num_boxes = 10
+    num_classes = 5
+    class_outputs = tf.random.uniform([batch_size, num_boxes, num_classes])
+    class_targets = tf.random.uniform([batch_size, num_boxes],
+                                      minval=0,
+                                      maxval=num_classes + 1,
+                                      dtype=tf.int32)
+    loss_fn = maskrcnn_losses.FastrcnnClassLoss(use_binary_cross_entropy)
+    self.assertEqual(tf.rank(loss_fn(class_outputs, class_targets)), 0)
+
+  def testFastrcnnBoxLoss(self):
+    batch_size = 2
+    num_boxes = 10
+    num_classes = 5
+    box_outputs = tf.random.uniform([batch_size, num_boxes, num_classes * 4])
+    box_targets = tf.random.uniform([batch_size, num_boxes, 4])
+    class_targets = tf.random.uniform([batch_size, num_boxes],
+                                      minval=0,
+                                      maxval=num_classes + 1,
+                                      dtype=tf.int32)
+    loss_fn = maskrcnn_losses.FastrcnnBoxLoss(huber_loss_delta=1.)
+    self.assertEqual(
+        tf.rank(loss_fn(box_outputs, box_targets, class_targets)), 0)
+
+  def testMaskrcnnLoss(self):
+    batch_size = 2
+    num_masks = 10
+    mask_height = 16
+    mask_width = 16
+    num_classes = 5
+    mask_outputs = tf.random.uniform(
+        [batch_size, num_masks, mask_height, mask_width])
+    mask_targets = tf.cast(
+        tf.random.uniform([batch_size, num_masks, mask_height, mask_width],
+                          minval=0,
+                          maxval=2,
+                          dtype=tf.int32), tf.float32)
+    select_class_targets = tf.random.uniform([batch_size, num_masks],
+                                             minval=0,
+                                             maxval=num_classes + 1,
+                                             dtype=tf.int32)
+    loss_fn = maskrcnn_losses.MaskrcnnLoss()
+    self.assertEqual(
+        tf.rank(loss_fn(mask_outputs, mask_targets, select_class_targets)), 0)
diff --git a/official/vision/modeling/factory.py b/official/vision/modeling/factory.py
@@ -204,7 +204,8 @@ def build_maskrcnn(input_specs: tf.keras.layers.InputSpec,
       max_num_detections=generator_config.max_num_detections,
       nms_version=generator_config.nms_version,
       use_cpu_nms=generator_config.use_cpu_nms,
-      soft_nms_sigma=generator_config.soft_nms_sigma)
+      soft_nms_sigma=generator_config.soft_nms_sigma,
+      use_sigmoid_probability=generator_config.use_sigmoid_probability)
 
   if model_config.include_mask:
     mask_head = instance_heads.MaskHead(
diff --git a/official/vision/modeling/layers/detection_generator.py b/official/vision/modeling/layers/detection_generator.py
@@ -572,6 +572,7 @@ def __init__(self,
                nms_version: str = 'v2',
                use_cpu_nms: bool = False,
                soft_nms_sigma: Optional[float] = None,
+               use_sigmoid_probability: bool = False,
                **kwargs):
     """Initializes a detection generator.
 
@@ -590,6 +591,8 @@ def __init__(self,
       use_cpu_nms: A `bool` of whether or not enforce NMS to run on CPU.
       soft_nms_sigma: A `float` representing the sigma parameter for Soft NMS.
         When soft_nms_sigma=0.0, we fall back to standard NMS.
+      use_sigmoid_probability: A `bool`, if true, use sigmoid to get
+        probability, otherwise use softmax.
       **kwargs: Additional keyword arguments passed to Layer.
     """
     self._config_dict = {
@@ -601,6 +604,7 @@ def __init__(self,
         'nms_version': nms_version,
         'use_cpu_nms': use_cpu_nms,
         'soft_nms_sigma': soft_nms_sigma,
+        'use_sigmoid_probability': use_sigmoid_probability,
     }
     super(DetectionGenerator, self).__init__(**kwargs)
 
@@ -644,7 +648,10 @@ def __call__(self,
         `decoded_box_scores`: A `float` tf.Tensor of shape
           [batch, num_raw_boxes] representing socres of all the decoded boxes.
     """
-    box_scores = tf.nn.softmax(raw_scores, axis=-1)
+    if self._config_dict['use_sigmoid_probability']:
+      box_scores = tf.math.sigmoid(raw_scores)
+    else:
+      box_scores = tf.nn.softmax(raw_scores, axis=-1)
 
     # Removes the background class.
     box_scores_shape = tf.shape(box_scores)
diff --git a/official/vision/modeling/layers/detection_generator_test.py b/official/vision/modeling/layers/detection_generator_test.py
@@ -46,8 +46,10 @@ class DetectionGeneratorTest(
   @parameterized.product(
       nms_version=['batched', 'v1', 'v2'],
       use_cpu_nms=[True, False],
-      soft_nms_sigma=[None, 0.1])
-  def testDetectionsOutputShape(self, nms_version, use_cpu_nms, soft_nms_sigma):
+      soft_nms_sigma=[None, 0.1],
+      use_sigmoid_probability=[True, False])
+  def testDetectionsOutputShape(self, nms_version, use_cpu_nms, soft_nms_sigma,
+                                use_sigmoid_probability):
     max_num_detections = 10
     num_classes = 4
     pre_nms_top_k = 5000
@@ -62,6 +64,7 @@ def testDetectionsOutputShape(self, nms_version, use_cpu_nms, soft_nms_sigma):
         'nms_version': nms_version,
         'use_cpu_nms': use_cpu_nms,
         'soft_nms_sigma': soft_nms_sigma,
+        'use_sigmoid_probability': use_sigmoid_probability,
     }
     generator = detection_generator.DetectionGenerator(**kwargs)
 
@@ -103,6 +106,7 @@ def test_serialize_deserialize(self):
         'nms_version': 'v2',
         'use_cpu_nms': False,
         'soft_nms_sigma': None,
+        'use_sigmoid_probability': False,
     }
     generator = detection_generator.DetectionGenerator(**kwargs)
 
diff --git a/official/vision/tasks/maskrcnn.py b/official/vision/tasks/maskrcnn.py
@@ -193,7 +193,9 @@ def _build_frcnn_losses(
     """Build losses for Fast R-CNN."""
     cascade_ious = self.task_config.model.roi_sampler.cascade_iou_thresholds
 
-    frcnn_cls_loss_fn = maskrcnn_losses.FastrcnnClassLoss()
+    frcnn_cls_loss_fn = maskrcnn_losses.FastrcnnClassLoss(
+        use_binary_cross_entropy=self.task_config.losses
+        .frcnn_class_use_binary_cross_entropy)
     frcnn_box_loss_fn = maskrcnn_losses.FastrcnnBoxLoss(
         self.task_config.losses.frcnn_huber_loss_delta,
         self.task_config.model.detection_head.class_agnostic_bbox_pred)