Internal change

tensorflower-gardener · fyangf · commit 9e9bd3977780 · 2023-03-20T16:46:05.000-07:00
PiperOrigin-RevId: 500805358
diff --git a/official/vision/modeling/layers/detection_generator.py b/official/vision/modeling/layers/detection_generator.py
@@ -382,7 +382,8 @@ def _generate_detections_v3(
     scores: tf.Tensor,
     pre_nms_score_threshold: float = 0.05,
     nms_iou_threshold: float = 0.5,
-    max_num_detections: int = 100
+    max_num_detections: int = 100,
+    refinements: int = 2,
 ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]:
   """Generates the detections given the model outputs using NMS for EdgeTPU.
 
@@ -400,6 +401,7 @@ def _generate_detections_v3(
       boxes overlap too much with respect to IOU.
     max_num_detections: A `scalar` representing maximum number of boxes retained
       over all classes.
+    refinements: Quality parameter for NMS algorithm.
 
   Returns:
     nms_boxes: A `float` tf.Tensor of shape [batch_size, max_num_detections, 4]
@@ -434,10 +436,8 @@ def _generate_detections_v3(
 
     # EdgeTPU-friendly class-wise NMS, -1 for invalid.
     indices = edgetpu.non_max_suppression_padded(
-        boxes,
-        scores,
-        max_num_detections,
-        iou_threshold=nms_iou_threshold)
+        boxes, scores, max_num_detections, iou_threshold=nms_iou_threshold,
+        refinements=refinements)
     # Gather NMS-ed boxes and scores.
     safe_indices = tf.nn.relu(indices)  # 0 for invalid
     invalid_detections = safe_indices - indices  # 1 for invalid, 0 for valid
@@ -859,6 +859,7 @@ def __init__(self,
                soft_nms_sigma: Optional[float] = None,
                tflite_post_processing_config: Optional[Dict[str, Any]] = None,
                pre_nms_top_k_sharding_block: Optional[int] = None,
+               nms_v3_refinements: Optional[int] = None,
                **kwargs):
     """Initializes a multi-level detection generator.
 
@@ -882,6 +883,12 @@ def __init__(self,
       pre_nms_top_k_sharding_block: For v3 (edge tpu friendly) NMS, avoids
         creating long axis for pre_nms_top_k. Will do top_k in shards of size
         [num_classes, pre_nms_top_k_sharding_block * boxes_per_location]
+      nms_v3_refinements: For v3 (edge tpu friendly) NMS, sets how close result
+        should be to standard NMS. When None, 2 is used. Here is some
+        experimental deviations for different refinement values:
+        if == 0, AP is reduced 1.0%, AR is reduced 5% on COCO
+        if == 1, AP is reduced 0.2%, AR is reduced 2% on COCO
+        if == 2, AP is reduced <0.1%, AR is reduced <1% on COCO
 
       **kwargs: Additional keyword arguments passed to Layer.
     """
@@ -899,6 +906,9 @@ def __init__(self,
     if pre_nms_top_k_sharding_block is not None:
       self._config_dict[
           'pre_nms_top_k_sharding_block'] = pre_nms_top_k_sharding_block
+    if nms_v3_refinements is not None:
+      self._config_dict[
+          'nms_v3_refinements'] = nms_v3_refinements
 
     if tflite_post_processing_config is not None:
       self._config_dict.update(
@@ -999,22 +1009,26 @@ def _decode_multilevel_outputs_and_pre_nms_top_k(
     levels = list(raw_boxes.keys())
     min_level = int(min(levels))
     max_level = int(max(levels))
+    clip_shape = tf.expand_dims(tf.expand_dims(image_shape, axis=1), axis=1)
     for i in range(max_level, min_level - 1, -1):
       (batch_size, unsharded_h, unsharded_w, num_anchors_per_locations_times_4
       ) = raw_boxes[str(i)].get_shape().as_list()
+      num_anchors_per_locations = num_anchors_per_locations_times_4 // 4
       if batch_size is None:
         batch_size = tf.shape(raw_boxes[str(i)])[0]
       block = max(1, pre_nms_top_k_sharding_block // unsharded_w)
-      anchor_boxes_unsharded = tf.reshape(anchor_boxes[str(i)], [
-          batch_size, unsharded_h, unsharded_w,
-          num_anchors_per_locations_times_4
-      ])
-      for (raw_scores_i, raw_boxes_i, anchor_boxes_i) in edgetpu.shard_tensors(
+      boxes_shape = [
+          batch_size, unsharded_h, unsharded_w * num_anchors_per_locations, 4
+      ]
+      decoded_boxes = box_ops.clip_boxes(
+          box_ops.decode_boxes(
+              tf.reshape(raw_boxes[str(i)], boxes_shape),
+              tf.reshape(anchor_boxes[str(i)], boxes_shape)), clip_shape)
+      for (raw_scores_i, decoded_boxes_i) in edgetpu.shard_tensors(
           1, block,
-          (raw_scores[str(i)], raw_boxes[str(i)], anchor_boxes_unsharded)):
-        (_, feature_h_i, feature_w_i, _) = raw_boxes_i.get_shape().as_list()
+          (raw_scores[str(i)], decoded_boxes)):
+        (_, feature_h_i, feature_w_i, _) = raw_scores_i.get_shape().as_list()
         num_locations = feature_h_i * feature_w_i
-        num_anchors_per_locations = num_anchors_per_locations_times_4 // 4
         num_classes = raw_scores_i.get_shape().as_list(
         )[-1] // num_anchors_per_locations
 
@@ -1029,18 +1043,16 @@ def _decode_multilevel_outputs_and_pre_nms_top_k(
         # Box decoding.
         # The anchor boxes are shared for all data in a batch.
         # One stage detector only supports class agnostic box regression.
-        boxes_shape = [batch_size, num_locations * num_anchors_per_locations, 4]
         boxes_i = tf.tile(
-            tf.expand_dims(
-                box_ops.decode_boxes(
-                    tf.reshape(raw_boxes_i, boxes_shape),
-                    tf.reshape(anchor_boxes_i, boxes_shape)),
-                axis=1), [1, num_classes - 1, 1, 1])
+            tf.reshape(
+                decoded_boxes_i,
+                [batch_size, 1, num_locations * num_anchors_per_locations, 4]),
+            [1, num_classes - 1, 1, 1])
         scores, boxes = edgetpu.concat_and_top_k(pre_nms_top_k,
                                                  (scores, scores_i),
                                                  (boxes, boxes_i))
-    clip_shape = tf.expand_dims(tf.expand_dims(image_shape, axis=1), axis=1)
-    return box_ops.clip_boxes(boxes, clip_shape), tf.sigmoid(scores)
+    boxes: tf.Tensor = boxes  # pytype: disable=annotation-type-mismatch
+    return boxes, tf.sigmoid(scores)
 
   def __call__(
       self,
@@ -1173,7 +1185,8 @@ def __call__(
                 pre_nms_score_threshold=self
                 ._config_dict['pre_nms_score_threshold'],
                 nms_iou_threshold=self._config_dict['nms_iou_threshold'],
-                max_num_detections=self._config_dict['max_num_detections']))
+                max_num_detections=self._config_dict['max_num_detections'],
+                refinements=self._config_dict.get('nms_v3_refinements', 2)))
         # Set `nmsed_attributes` to None for v3.
         nmsed_attributes = {}
       else:
diff --git a/official/vision/modeling/layers/edgetpu.py b/official/vision/modeling/layers/edgetpu.py
@@ -198,7 +198,8 @@ def non_max_suppression_padded(boxes: tf.Tensor,
     the selected indices from the boxes tensor and `-1` values for the padding.
   """
   if not boxes.shape.is_fully_defined():
-    return _non_max_suppression_as_is(boxes, scores, output_size, iou_threshold)
+    return _non_max_suppression_as_is(boxes, scores, output_size, iou_threshold,
+                                      refinements)
   # Does partitioning job to help compiler converge with memory.
   batch_shape = boxes.shape[:-2]
   batch_size = np.prod(batch_shape, dtype=np.int32)
@@ -231,14 +232,15 @@ def _refine_nms_graph_to_original_algorithm(better: tf.Tensor) -> tf.Tensor:
   Returns:
     Modification of tensor encoding adjacency matrix of `better` relation.
   """
+  one = tf.constant(1, dtype=better.dtype)
   # good_box: is a tensor with zeros and ones so that
   # [batch dims ..., box_i] represents belonging of a box_i to the `good`
   # subset. `good` subset is defined as exactly those boxes that do not have any
   # `better` boxes.
   # INTUITION: In terms of oriented graph , this is subset of nodes nobody
   # points to as "I'm better than you". These nodes will never be suppressed in
   # the original NMS algorithm.
-  good_box = tf.constant(1.) - _reduce_or(better, axis=-1)
+  good_box = one - _reduce_or(better, axis=-1)
   # good_better: is a tensor with zeros and ones so that
   # [batch dims ..., box_1, box_2] represents the adjacency matrix for the
   # `good_better` relation on all boxes set. `good_better` relation is defined
@@ -253,7 +255,7 @@ def _refine_nms_graph_to_original_algorithm(better: tf.Tensor) -> tf.Tensor:
   # does not have any `good_better` boxes.
   # INTUITION: These nodes are nodes which are not suppressed by `good` boxes
   # in the original NMS algorithm.
-  not_bad_box = tf.constant(1.) - _reduce_or(good_better, axis=-1)
+  not_bad_box = one - _reduce_or(good_better, axis=-1)
   # return: is a tensor with zeros and ones so that
   # [batch dims ..., box_1, box_2] represents the adjacency matrix for the
   # `better` relation on all boxes set which is closer to represent suppression
@@ -306,8 +308,9 @@ def _non_max_suppression_as_is(boxes: tf.Tensor,
     remaining = tf.reshape(remaining, scores.shape)
   # top_k runs on TPU cores, let it happen, TPU tiles implementation is slower.
   top_k = tf.math.top_k(scores * remaining, output_size)
-  return (tf.cast(top_k.indices, top_k.values.dtype) * _greater(top_k.values) -
-          _same(top_k.values))
+  valid = _greater(top_k.values)
+  return (tf.cast(top_k.indices, top_k.values.dtype) * valid + valid -
+          tf.constant(1, dtype=top_k.values.dtype))
 
 
 def concat_and_top_k(