Internal change

tensorflower-gardener · fyangf · commit c0c87ec1aa8a · 2023-03-23T12:50:17.000-07:00
PiperOrigin-RevId: 492000777
diff --git a/official/vision/modeling/layers/edgetpu.py b/official/vision/modeling/layers/edgetpu.py
@@ -160,7 +160,8 @@ def shard_tensors(axis: int, block_size: int,
 def non_max_suppression_padded(boxes: tf.Tensor,
                                scores: tf.Tensor,
                                output_size: int,
-                               iou_threshold: float = 0.5) -> tf.Tensor:
+                               iou_threshold: float = 0.5,
+                               refinements: int = 0) -> tf.Tensor:
   """Selects a subset of boxes which have highest score among IOU-similar boxes.
 
   Prunes away boxes that have high intersection-over-union (IOU) overlap
@@ -190,8 +191,10 @@ def non_max_suppression_padded(boxes: tf.Tensor,
       representing a single score corresponding to each box (each row of boxes).
     output_size: A scalar integer `Tensor` representing the maximum number of
       boxes to be selected by non-max suppression.
-    iou_threshold: A 0-D float tensor representing the threshold for deciding
-      whether boxes overlap too much with respect to IOU.
+    iou_threshold: A float representing the threshold for deciding whether boxes
+      overlap too much with respect to IOU.
+    refinements: A number of extra refinement steps to make result closer to
+      original sequential NMS.
 
   Returns:
     A 1-D+ integer `Tensor` of shape `[...batch_dims, output_size]` representing
@@ -211,7 +214,7 @@ def non_max_suppression_padded(boxes: tf.Tensor,
   for boxes_i, scores_i in shard_tensors(0, block, boxes, scores):
     indices.append(
         _non_max_suppression_as_is(boxes_i, scores_i, output_size,
-                                   iou_threshold))
+                                   iou_threshold, refinements))
   indices = tf.concat(indices, axis=0)
   return tf.reshape(indices, batch_shape + [output_size])
 
@@ -266,7 +269,8 @@ def _refine_nms_graph_to_original_algorithm(better: tf.Tensor) -> tf.Tensor:
 def _non_max_suppression_as_is(boxes: tf.Tensor,
                                scores: tf.Tensor,
                                output_size: int,
-                               iou_threshold: float = 0.5) -> tf.Tensor:
+                               iou_threshold: float = 0.5,
+                               refinements: int = 0) -> tf.Tensor:
   """Selects a subset of boxes which have highest score among IOU-similar boxes.
 
   Args:
@@ -277,6 +281,8 @@ def _non_max_suppression_as_is(boxes: tf.Tensor,
       boxes to be selected by non-max suppression.
     iou_threshold: A 0-D float tensor representing the threshold for deciding
       whether boxes overlap too much with respect to IOU.
+    refinements: A number of extra refinement steps to make result closer to
+      original sequencial NMS.
 
   Returns:
     A 1-D+ integer `Tensor` of shape `[...batch_dims, output_size]` representing
@@ -299,6 +305,9 @@ def _non_max_suppression_as_is(boxes: tf.Tensor,
   worse = _greater(relative_scores)
   same_later = _and(_same(relative_scores), _greater(relative_order))
   similar_worse_or_same_later = _and(similar, _or(worse, same_later))
+  for _ in range(refinements):
+    similar_worse_or_same_later = _refine_nms_graph_to_original_algorithm(
+        similar_worse_or_same_later)
   prunable = _reduce_or(similar_worse_or_same_later, axis=-1)
   remaining = tf.constant(1, dtype=prunable.dtype) - prunable
   if scores.shape[0] is None:
diff --git a/official/vision/modeling/layers/edgetpu_test.py b/official/vision/modeling/layers/edgetpu_test.py
@@ -44,16 +44,75 @@ def _maximum_activation_size(model):
   return max_size
 
 
+def _deviation_and_margin(reference, valid, optimized):
+  """Returns deviation and margin between two batched sets of indices."""
+  deviation_rate = 0
+  min_union = reference.shape[1] + optimized.shape[1]
+  runs = reference.shape[0]
+  for run in range(runs):
+    reference_slice = {*reference[run, :valid[run]].numpy().tolist()}
+    optimized_slice = {*optimized[run].numpy().astype(int).tolist()} - {-1}
+    union_size = len(optimized_slice | reference_slice)
+    symdiff_size = len(optimized_slice ^ reference_slice)
+    deviation_rate += symdiff_size / union_size
+    min_union = min(min_union, union_size)
+  deviation_rate = deviation_rate / runs
+  # six sigma estimate via LLN theorem
+  margin = 6 * (deviation_rate / np.sqrt(runs) + 1 / (runs * min_union))
+  return deviation_rate, margin
+
+
 class NonMaxSuppressionTest(parameterized.TestCase, tf.test.TestCase):
 
   def setUp(self):
     super().setUp()
     tf.random.set_seed(42)
 
-  @parameterized.parameters((16, 8, 200, 0.009), (31, 17, 100, 0.013),
-                            (71, 41, 100, 0.045), (150, 100, 100, 0.129),
-                            (300, 300, 100, 0.116), (600, 600, 50, 0.176))
-  def test_reference_match(self, n, top, runs, max_deviation):
+  def test_refinement_sample(self):
+    """Tests difference in NMS behaviours.
+
+    Runs on four boxes with following IOU table (only neighbours will qualify
+    as similar boxes)
+
+    box | 0    | 1    | 2    | 3
+    --- | ---- | ---- | ---- | ----
+    0   | 1    | 7/13 | 1/4  | 1/19
+    1   | 7/13 | 1    | 7/13 | 1/4
+    2   | 1/4  | 7/13 | 1    | 7/13
+    3   | 1/19 | 1/4  | 7/13 | 1
+
+    So 0 is best box, it eliminates 1, next is box 2 which is eleminated by 1
+    if it is allowed (depending on number of refinements).
+    """
+    boxes: tf.Tensor = tf.constant(
+        [
+            # y1,  x1,  y2,  x2
+            [0.0, 0.0, 1.0, 1.0],
+            [0.0, 0.3, 1.0, 1.3],
+            [0.0, 0.6, 1.0, 1.6],
+            [0.0, 0.9, 1.0, 1.9],
+        ],
+        dtype=tf.float32)
+    scores: tf.Tensor = tf.constant([
+        1.0,
+        0.9,
+        0.8,
+        0.7,
+    ], dtype=tf.float32)
+    self.assertAllEqual(
+        edgetpu.non_max_suppression_padded(boxes, scores, 4, refinements=0),
+        tf.constant([0.0, -1.0, -1.0, -1.0], dtype=tf.float32))
+    self.assertAllEqual(
+        edgetpu.non_max_suppression_padded(boxes, scores, 4, refinements=1),
+        tf.constant([0.0, 2.0, -1.0, -1.0], dtype=tf.float32))
+
+  @parameterized.parameters((16, 8, 200, [0.009, 0.004, 0.004]),
+                            (31, 17, 100, [0.013, 0.004, 0.004]),
+                            (71, 41, 100, [0.045, 0.003, 0.002]),
+                            (150, 100, 100, [0.129, 0.010, 0.001]),
+                            (300, 300, 100, [0.116, 0.016, 0.002]),
+                            (600, 600, 50, [0.176, 0.032, 0.003]))
+  def test_reference_match(self, n, top, runs, max_devs):
     """Compares that new optimized method is close to reference method.
 
     Runs two algorithms with same sets of input boxes and scores, and measures
@@ -71,32 +130,26 @@ def test_reference_match(self, n, top, runs, max_deviation):
       top: limit of output boxes count.
       runs: for the statistical testing number of runs to performs to avoid
         tests flakiness.
-      max_deviation: mean limit on deviation between optimized and reference
-        algorithms. Please read notes why this number may be set higher to avoid
-        flaky testing.
+      max_devs: series of mean limits on deviation between optimized and
+        reference algorithms with different number of refinements. (Indexes of
+        elements correspond to number of refinements) Please use margin based
+        values proposed by failed test to avoid flaky testing.
     """
-    deviation_rate = 0
-    min_union = 2*n
     boxes = random_boxes([runs, n])
     scores = tf.random.uniform(shape=[runs, n])
-    test = edgetpu.non_max_suppression_padded(boxes, scores, top)
-    for run in range(runs):
-      reference = tf.image.non_max_suppression(boxes[run], scores[run], top)
-      reference = {*reference.numpy().tolist()}
-      optimized = {*test[run].numpy().astype(int).tolist()} - {-1}
-      union_size = len(optimized | reference)
-      deviation_rate += len(optimized ^ reference) / union_size
-      min_union = min(min_union, union_size)
-    deviation_rate = deviation_rate / runs
-    # six sigma estimate via LLN theorem
-    safe_margin = 6 * (deviation_rate / np.sqrt(runs) + 1/(runs*min_union))
-    self.assertLess(
-        deviation_rate,
-        max_deviation,
-        msg='Deviation rate between optimized and reference implementations is '
-        'higher than expected. If you are tuning the test, recommended safe '
-        'deviation rate is '
-        f'{deviation_rate} + {safe_margin} = {deviation_rate + safe_margin}')
+    reference, valid = tf.image.non_max_suppression_padded(
+        boxes, scores, top, pad_to_max_output_size=True)
+    for refinements, max_deviation in enumerate(max_devs):
+      optimized = edgetpu.non_max_suppression_padded(
+          boxes, scores, top, refinements=refinements)
+      deviation, margin = _deviation_and_margin(reference, valid, optimized)
+      self.assertLess(
+          deviation,
+          max_deviation,
+          msg='Deviation rate between optimized and reference implementations is '
+          'higher than expected. If you are tuning the test, recommended safe '
+          'deviation rate is '
+          f'{deviation} + {margin} = {deviation + margin}')
 
   @parameterized.parameters(([16], 8), ([91, 150], 100), ([20, 20, 200], 10))
   def test_sharded_match(self, shape: list[int], top: int):