Internal change

tensorflower-gardener · fyangf · commit d1272f997731 · 2023-03-20T16:46:02.000-07:00
PiperOrigin-RevId: 500453807
diff --git a/official/vision/modeling/layers/detection_generator.py b/official/vision/modeling/layers/detection_generator.py
@@ -13,9 +13,13 @@
 # limitations under the License.
 
 """Contains definitions of generators to generate the final detections."""
+from collections.abc import Mapping, Sequence
 import contextlib
-from typing import Any, Dict, List, Optional, Mapping, Sequence, Tuple
+from typing import Any, Dict, List, Optional, Tuple
+
 # Import libraries
+
+import numpy as np
 import tensorflow as tf
 
 from official.vision.modeling.layers import edgetpu
@@ -411,6 +415,7 @@ def _generate_detections_v3(
   Raises:
     ValueError if inputs shapes are not valid.
   """
+  one = tf.constant(1, dtype=scores.dtype)
   with tf.name_scope('generate_detections'):
     batch_size, num_box_classes, box_locations, sides = (
         boxes.get_shape().as_list())
@@ -436,14 +441,14 @@ def _generate_detections_v3(
     # Gather NMS-ed boxes and scores.
     safe_indices = tf.nn.relu(indices)  # 0 for invalid
     invalid_detections = safe_indices - indices  # 1 for invalid, 0 for valid
-    valid_detections = 1.0 - invalid_detections  # 0 for invalid, 1 for valid
+    valid_detections = one - invalid_detections  # 0 for invalid, 1 for valid
     safe_indices = tf.cast(safe_indices, tf.int32)
-    boxes = tf.expand_dims(valid_detections, -1) * tf.gather(
-        boxes, safe_indices, axis=2, batch_dims=2)
+    boxes = tf.gather(boxes, safe_indices, axis=2, batch_dims=2)
+    boxes = tf.cast(tf.expand_dims(valid_detections, -1), boxes.dtype) * boxes
     scores = valid_detections * tf.gather(
         scores, safe_indices, axis=2, batch_dims=2)
     # Compliment with class numbers.
-    classes = tf.range(num_classes, dtype=tf.float32)
+    classes = tf.constant(np.arange(num_classes), dtype=scores.dtype)
     classes = tf.reshape(classes, [1, num_classes, 1])
     classes = tf.tile(classes, [batch_size, 1, max_num_detections])
     # Flatten classes, locations. Class = -1 for invalid detection
@@ -456,7 +461,7 @@ def _generate_detections_v3(
     boxes = tf.gather(boxes, indices, batch_dims=1, axis=1)
     classes = tf.gather(classes, indices, batch_dims=1, axis=1)
     invalid_detections = tf.nn.relu(classes) - classes
-    valid_detections = tf.reduce_sum(1. - invalid_detections, axis=1)
+    valid_detections = tf.reduce_sum(one - invalid_detections, axis=1)
     return boxes, scores, classes, valid_detections
 
 
@@ -995,16 +1000,18 @@ def _decode_multilevel_outputs_and_pre_nms_top_k(
     min_level = int(min(levels))
     max_level = int(max(levels))
     for i in range(max_level, min_level - 1, -1):
-      (_, unsharded_h, unsharded_w, num_anchors_per_locations_times_4
+      (batch_size, unsharded_h, unsharded_w, num_anchors_per_locations_times_4
       ) = raw_boxes[str(i)].get_shape().as_list()
+      if batch_size is None:
+        batch_size = tf.shape(raw_boxes[str(i)])[0]
       block = max(1, pre_nms_top_k_sharding_block // unsharded_w)
-      anchor_boxes_unsharded = tf.reshape(
-          anchor_boxes[str(i)],
-          [1, unsharded_h, unsharded_w, num_anchors_per_locations_times_4])
+      anchor_boxes_unsharded = tf.reshape(anchor_boxes[str(i)], [
+          batch_size, unsharded_h, unsharded_w,
+          num_anchors_per_locations_times_4
+      ])
       for (raw_scores_i, raw_boxes_i, anchor_boxes_i) in edgetpu.shard_tensors(
           1, block,
           (raw_scores[str(i)], raw_boxes[str(i)], anchor_boxes_unsharded)):
-        batch_size = tf.shape(raw_boxes_i)[0]
         (_, feature_h_i, feature_w_i, _) = raw_boxes_i.get_shape().as_list()
         num_locations = feature_h_i * feature_w_i
         num_anchors_per_locations = num_anchors_per_locations_times_4 // 4
@@ -1032,10 +1039,8 @@ def _decode_multilevel_outputs_and_pre_nms_top_k(
         scores, boxes = edgetpu.concat_and_top_k(pre_nms_top_k,
                                                  (scores, scores_i),
                                                  (boxes, boxes_i))
-
-    return (box_ops.clip_boxes(boxes,
-                               tf.expand_dims(image_shape,
-                                              axis=1)), tf.sigmoid(scores))
+    clip_shape = tf.expand_dims(tf.expand_dims(image_shape, axis=1), axis=1)
+    return box_ops.clip_boxes(boxes, clip_shape), tf.sigmoid(scores)
 
   def __call__(
       self,
diff --git a/official/vision/modeling/layers/edgetpu.py b/official/vision/modeling/layers/edgetpu.py
@@ -13,9 +13,8 @@
 # limitations under the License.
 
 """EdgeTPU oriented layers and tools."""
-
 from collections.abc import Iterable, Sequence
-from typing import Optional
+from typing import List, Optional, Union
 
 import numpy as np
 import tensorflow as tf
@@ -51,7 +50,8 @@ def _tensor_product_iou(boxes):
   # - last dimension is not 1. (Structure alignment)
   tpu_friendly_shape = [1, -1, 1, boxes_size]
   bottom, left, top, right = (
-      tf.reshape(side, tpu_friendly_shape) for side in tf.split(boxes, 4, -1))
+      tf.reshape(side, tpu_friendly_shape)
+      for side in tf.split(boxes, 4, -1))
   height, width = top - bottom, right - left
   area = height * width
   area_sum = _tensor_sum_vectors(area, area)
@@ -116,6 +116,8 @@ def shard_tensors(axis: int, block_size: int,
   Raises:
     ValueError: if input tensors has different size of sharded dimension.
   """
+  if not all(tensor.shape.is_fully_defined() for tensor in tensors):
+    return [tensors]
   for validate_axis in range(axis + 1):
     consistent_length: int = tensors[0].shape[validate_axis]
     for tensor in tensors:
@@ -195,6 +197,8 @@ def non_max_suppression_padded(boxes: tf.Tensor,
     A 1-D+ integer `Tensor` of shape `[...batch_dims, output_size]` representing
     the selected indices from the boxes tensor and `-1` values for the padding.
   """
+  if not boxes.shape.is_fully_defined():
+    return _non_max_suppression_as_is(boxes, scores, output_size, iou_threshold)
   # Does partitioning job to help compiler converge with memory.
   batch_shape = boxes.shape[:-2]
   batch_size = np.prod(batch_shape, dtype=np.int32)
@@ -211,6 +215,52 @@ def non_max_suppression_padded(boxes: tf.Tensor,
   return tf.reshape(indices, batch_shape + [output_size])
 
 
+def _refine_nms_graph_to_original_algorithm(better: tf.Tensor) -> tf.Tensor:
+  """Refines the relationship graph, bringing it closer to the iterative NMS.
+
+  See `test_refinement_sample` unit tests for example, also comments in body of
+  the algorithm, for the intuition.
+
+  Args:
+    better: is a tensor with zeros and ones so that [batch dims ..., box_1,
+      box_2] represents the [adjacency
+      matrix](https://en.wikipedia.org/wiki/Adjacency_matrix) for the
+      [relation](https://en.wikipedia.org/wiki/Relation_(mathematics)) `better`
+      between boxes box_1 and box_2.
+
+  Returns:
+    Modification of tensor encoding adjacency matrix of `better` relation.
+  """
+  # good_box: is a tensor with zeros and ones so that
+  # [batch dims ..., box_i] represents belonging of a box_i to the `good`
+  # subset. `good` subset is defined as exactly those boxes that do not have any
+  # `better` boxes.
+  # INTUITION: In terms of oriented graph , this is subset of nodes nobody
+  # points to as "I'm better than you". These nodes will never be suppressed in
+  # the original NMS algorithm.
+  good_box = tf.constant(1.) - _reduce_or(better, axis=-1)
+  # good_better: is a tensor with zeros and ones so that
+  # [batch dims ..., box_1, box_2] represents the adjacency matrix for the
+  # `good_better` relation on all boxes set. `good_better` relation is defined
+  # as relation between good box and boxes it is better than.
+  # INTUITION: In terms of oriented graph, this is subset of edges, which
+  # doesn't have any other inbound edges. These edges will represent
+  # suppression actions in the original NMS algorithm.
+  good_better = _and(tf.expand_dims(good_box, axis=-2), better)
+  # not_bad_box: is a tensor with zeros and ones so that
+  # [batch dims ..., box_i] represents belonging of a box_i to the `not_bad`
+  # subset. `not_bad` subset is defined as boxes all that and only those that
+  # does not have any `good_better` boxes.
+  # INTUITION: These nodes are nodes which are not suppressed by `good` boxes
+  # in the original NMS algorithm.
+  not_bad_box = tf.constant(1.) - _reduce_or(good_better, axis=-1)
+  # return: is a tensor with zeros and ones so that
+  # [batch dims ..., box_1, box_2] represents the adjacency matrix for the
+  # `better` relation on all boxes set which is closer to represent suppression
+  # procedure in original NMS algorithm.
+  return _and(tf.expand_dims(not_bad_box, axis=-2), better)
+
+
 def _non_max_suppression_as_is(boxes: tf.Tensor,
                                scores: tf.Tensor,
                                output_size: int,
@@ -230,32 +280,34 @@ def _non_max_suppression_as_is(boxes: tf.Tensor,
     A 1-D+ integer `Tensor` of shape `[...batch_dims, output_size]` representing
     the selected indices from the boxes tensor and `-1` values for the padding.
   """
-  batch_shape = boxes.shape[:-2]
-  batch_size = np.prod(batch_shape, dtype=np.int32)
   boxes_size = boxes.shape[-2]
   if boxes.shape[-1] != 4:
     raise ValueError(f'Boxes shape ({boxes.shape}) last dimension must be 4 '
                      'to represent [y1, x1, y2, x2] boxes coordinates')
   if scores.shape != boxes.shape[:-1]:
     raise ValueError(f'Boxes shape ({boxes.shape}) and scores shape '
                      f'({scores.shape}) do not match.')
-  order = tf.range(boxes_size, dtype=tf.float32)
+  order = tf.constant(np.arange(boxes_size), dtype=scores.dtype)
   relative_order = _tensor_sum_vectors(order, -order)
   relative_scores = _tensor_sum_vectors(scores, -scores)
-  similar = _greater(_tensor_product_iou(boxes) - iou_threshold)
+  similar = tf.cast(
+      _greater(
+          _tensor_product_iou(boxes) -
+          tf.constant(iou_threshold, dtype=boxes.dtype)), scores.dtype)
   worse = _greater(relative_scores)
   same_later = _and(_same(relative_scores), _greater(relative_order))
   similar_worse_or_same_later = _and(similar, _or(worse, same_later))
   prunable = _reduce_or(similar_worse_or_same_later, axis=-1)
-  remaining = tf.constant(1.) - prunable
-  scores = tf.reshape(tf.exp(scores), [1, 1, batch_size, boxes_size])
-  remaining = tf.reshape(remaining, [1, 1, batch_size, boxes_size])
+  remaining = tf.constant(1, dtype=prunable.dtype) - prunable
+  if scores.shape[0] is None:
+    # Prefer the most of tesnor shape defined, so that error messages are clear.
+    remaining = tf.reshape(remaining, [tf.shape(scores)[0], *scores.shape[1:]])
+  else:
+    remaining = tf.reshape(remaining, scores.shape)
   # top_k runs on TPU cores, let it happen, TPU tiles implementation is slower.
   top_k = tf.math.top_k(scores * remaining, output_size)
-  indices = (
-      tf.cast(top_k.indices, top_k.values.dtype) * _greater(top_k.values) -
-      _same(top_k.values))
-  return tf.reshape(indices, batch_shape + [output_size])
+  return (tf.cast(top_k.indices, top_k.values.dtype) * _greater(top_k.values) -
+          _same(top_k.values))
 
 
 def concat_and_top_k(
diff --git a/official/vision/serving/detection.py b/official/vision/serving/detection.py
@@ -32,12 +32,13 @@ class DetectionModule(export_base.ExportModule):
 
   def _build_model(self):
 
-    if self._batch_size is None:
-      # Only batched NMS is supported with dynamic batch size.
+    nms_versions_supporting_dynamic_batch_size = {'batched', 'v3'}
+    nms_version = self.params.task.model.detection_generator.nms_version
+    if (self._batch_size is None and
+        nms_version not in nms_versions_supporting_dynamic_batch_size):
+      logging.info('nms_version is set to `batched` because `%s` '
+                   'does not support with dynamic batch size.', nms_version)
       self.params.task.model.detection_generator.nms_version = 'batched'
-      logging.info(
-          'nms_version is set to `batched` because only batched NMS is '
-          'supported with dynamic batch size.')
 
     input_specs = tf.keras.layers.InputSpec(shape=[self._batch_size] +
                                             self._input_image_size + [3])