Skip to content

Commit 9e9bd39

Browse files
tensorflower-gardenerfyangf
authored andcommitted
Internal change
PiperOrigin-RevId: 500805358
1 parent d1272f9 commit 9e9bd39

File tree

2 files changed

+43
-27
lines changed

2 files changed

+43
-27
lines changed

official/vision/modeling/layers/detection_generator.py

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,8 @@ def _generate_detections_v3(
382382
scores: tf.Tensor,
383383
pre_nms_score_threshold: float = 0.05,
384384
nms_iou_threshold: float = 0.5,
385-
max_num_detections: int = 100
385+
max_num_detections: int = 100,
386+
refinements: int = 2,
386387
) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]:
387388
"""Generates the detections given the model outputs using NMS for EdgeTPU.
388389
@@ -400,6 +401,7 @@ def _generate_detections_v3(
400401
boxes overlap too much with respect to IOU.
401402
max_num_detections: A `scalar` representing maximum number of boxes retained
402403
over all classes.
404+
refinements: Quality parameter for NMS algorithm.
403405
404406
Returns:
405407
nms_boxes: A `float` tf.Tensor of shape [batch_size, max_num_detections, 4]
@@ -434,10 +436,8 @@ def _generate_detections_v3(
434436

435437
# EdgeTPU-friendly class-wise NMS, -1 for invalid.
436438
indices = edgetpu.non_max_suppression_padded(
437-
boxes,
438-
scores,
439-
max_num_detections,
440-
iou_threshold=nms_iou_threshold)
439+
boxes, scores, max_num_detections, iou_threshold=nms_iou_threshold,
440+
refinements=refinements)
441441
# Gather NMS-ed boxes and scores.
442442
safe_indices = tf.nn.relu(indices) # 0 for invalid
443443
invalid_detections = safe_indices - indices # 1 for invalid, 0 for valid
@@ -859,6 +859,7 @@ def __init__(self,
859859
soft_nms_sigma: Optional[float] = None,
860860
tflite_post_processing_config: Optional[Dict[str, Any]] = None,
861861
pre_nms_top_k_sharding_block: Optional[int] = None,
862+
nms_v3_refinements: Optional[int] = None,
862863
**kwargs):
863864
"""Initializes a multi-level detection generator.
864865
@@ -882,6 +883,12 @@ def __init__(self,
882883
pre_nms_top_k_sharding_block: For v3 (edge tpu friendly) NMS, avoids
883884
creating long axis for pre_nms_top_k. Will do top_k in shards of size
884885
[num_classes, pre_nms_top_k_sharding_block * boxes_per_location]
886+
nms_v3_refinements: For v3 (edge tpu friendly) NMS, sets how close result
887+
should be to standard NMS. When None, 2 is used. Here is some
888+
experimental deviations for different refinement values:
889+
if == 0, AP is reduced 1.0%, AR is reduced 5% on COCO
890+
if == 1, AP is reduced 0.2%, AR is reduced 2% on COCO
891+
if == 2, AP is reduced <0.1%, AR is reduced <1% on COCO
885892
886893
**kwargs: Additional keyword arguments passed to Layer.
887894
"""
@@ -899,6 +906,9 @@ def __init__(self,
899906
if pre_nms_top_k_sharding_block is not None:
900907
self._config_dict[
901908
'pre_nms_top_k_sharding_block'] = pre_nms_top_k_sharding_block
909+
if nms_v3_refinements is not None:
910+
self._config_dict[
911+
'nms_v3_refinements'] = nms_v3_refinements
902912

903913
if tflite_post_processing_config is not None:
904914
self._config_dict.update(
@@ -999,22 +1009,26 @@ def _decode_multilevel_outputs_and_pre_nms_top_k(
9991009
levels = list(raw_boxes.keys())
10001010
min_level = int(min(levels))
10011011
max_level = int(max(levels))
1012+
clip_shape = tf.expand_dims(tf.expand_dims(image_shape, axis=1), axis=1)
10021013
for i in range(max_level, min_level - 1, -1):
10031014
(batch_size, unsharded_h, unsharded_w, num_anchors_per_locations_times_4
10041015
) = raw_boxes[str(i)].get_shape().as_list()
1016+
num_anchors_per_locations = num_anchors_per_locations_times_4 // 4
10051017
if batch_size is None:
10061018
batch_size = tf.shape(raw_boxes[str(i)])[0]
10071019
block = max(1, pre_nms_top_k_sharding_block // unsharded_w)
1008-
anchor_boxes_unsharded = tf.reshape(anchor_boxes[str(i)], [
1009-
batch_size, unsharded_h, unsharded_w,
1010-
num_anchors_per_locations_times_4
1011-
])
1012-
for (raw_scores_i, raw_boxes_i, anchor_boxes_i) in edgetpu.shard_tensors(
1020+
boxes_shape = [
1021+
batch_size, unsharded_h, unsharded_w * num_anchors_per_locations, 4
1022+
]
1023+
decoded_boxes = box_ops.clip_boxes(
1024+
box_ops.decode_boxes(
1025+
tf.reshape(raw_boxes[str(i)], boxes_shape),
1026+
tf.reshape(anchor_boxes[str(i)], boxes_shape)), clip_shape)
1027+
for (raw_scores_i, decoded_boxes_i) in edgetpu.shard_tensors(
10131028
1, block,
1014-
(raw_scores[str(i)], raw_boxes[str(i)], anchor_boxes_unsharded)):
1015-
(_, feature_h_i, feature_w_i, _) = raw_boxes_i.get_shape().as_list()
1029+
(raw_scores[str(i)], decoded_boxes)):
1030+
(_, feature_h_i, feature_w_i, _) = raw_scores_i.get_shape().as_list()
10161031
num_locations = feature_h_i * feature_w_i
1017-
num_anchors_per_locations = num_anchors_per_locations_times_4 // 4
10181032
num_classes = raw_scores_i.get_shape().as_list(
10191033
)[-1] // num_anchors_per_locations
10201034

@@ -1029,18 +1043,16 @@ def _decode_multilevel_outputs_and_pre_nms_top_k(
10291043
# Box decoding.
10301044
# The anchor boxes are shared for all data in a batch.
10311045
# One stage detector only supports class agnostic box regression.
1032-
boxes_shape = [batch_size, num_locations * num_anchors_per_locations, 4]
10331046
boxes_i = tf.tile(
1034-
tf.expand_dims(
1035-
box_ops.decode_boxes(
1036-
tf.reshape(raw_boxes_i, boxes_shape),
1037-
tf.reshape(anchor_boxes_i, boxes_shape)),
1038-
axis=1), [1, num_classes - 1, 1, 1])
1047+
tf.reshape(
1048+
decoded_boxes_i,
1049+
[batch_size, 1, num_locations * num_anchors_per_locations, 4]),
1050+
[1, num_classes - 1, 1, 1])
10391051
scores, boxes = edgetpu.concat_and_top_k(pre_nms_top_k,
10401052
(scores, scores_i),
10411053
(boxes, boxes_i))
1042-
clip_shape = tf.expand_dims(tf.expand_dims(image_shape, axis=1), axis=1)
1043-
return box_ops.clip_boxes(boxes, clip_shape), tf.sigmoid(scores)
1054+
boxes: tf.Tensor = boxes # pytype: disable=annotation-type-mismatch
1055+
return boxes, tf.sigmoid(scores)
10441056

10451057
def __call__(
10461058
self,
@@ -1173,7 +1185,8 @@ def __call__(
11731185
pre_nms_score_threshold=self
11741186
._config_dict['pre_nms_score_threshold'],
11751187
nms_iou_threshold=self._config_dict['nms_iou_threshold'],
1176-
max_num_detections=self._config_dict['max_num_detections']))
1188+
max_num_detections=self._config_dict['max_num_detections'],
1189+
refinements=self._config_dict.get('nms_v3_refinements', 2)))
11771190
# Set `nmsed_attributes` to None for v3.
11781191
nmsed_attributes = {}
11791192
else:

official/vision/modeling/layers/edgetpu.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,8 @@ def non_max_suppression_padded(boxes: tf.Tensor,
198198
the selected indices from the boxes tensor and `-1` values for the padding.
199199
"""
200200
if not boxes.shape.is_fully_defined():
201-
return _non_max_suppression_as_is(boxes, scores, output_size, iou_threshold)
201+
return _non_max_suppression_as_is(boxes, scores, output_size, iou_threshold,
202+
refinements)
202203
# Does partitioning job to help compiler converge with memory.
203204
batch_shape = boxes.shape[:-2]
204205
batch_size = np.prod(batch_shape, dtype=np.int32)
@@ -231,14 +232,15 @@ def _refine_nms_graph_to_original_algorithm(better: tf.Tensor) -> tf.Tensor:
231232
Returns:
232233
Modification of tensor encoding adjacency matrix of `better` relation.
233234
"""
235+
one = tf.constant(1, dtype=better.dtype)
234236
# good_box: is a tensor with zeros and ones so that
235237
# [batch dims ..., box_i] represents belonging of a box_i to the `good`
236238
# subset. `good` subset is defined as exactly those boxes that do not have any
237239
# `better` boxes.
238240
# INTUITION: In terms of oriented graph , this is subset of nodes nobody
239241
# points to as "I'm better than you". These nodes will never be suppressed in
240242
# the original NMS algorithm.
241-
good_box = tf.constant(1.) - _reduce_or(better, axis=-1)
243+
good_box = one - _reduce_or(better, axis=-1)
242244
# good_better: is a tensor with zeros and ones so that
243245
# [batch dims ..., box_1, box_2] represents the adjacency matrix for the
244246
# `good_better` relation on all boxes set. `good_better` relation is defined
@@ -253,7 +255,7 @@ def _refine_nms_graph_to_original_algorithm(better: tf.Tensor) -> tf.Tensor:
253255
# does not have any `good_better` boxes.
254256
# INTUITION: These nodes are nodes which are not suppressed by `good` boxes
255257
# in the original NMS algorithm.
256-
not_bad_box = tf.constant(1.) - _reduce_or(good_better, axis=-1)
258+
not_bad_box = one - _reduce_or(good_better, axis=-1)
257259
# return: is a tensor with zeros and ones so that
258260
# [batch dims ..., box_1, box_2] represents the adjacency matrix for the
259261
# `better` relation on all boxes set which is closer to represent suppression
@@ -306,8 +308,9 @@ def _non_max_suppression_as_is(boxes: tf.Tensor,
306308
remaining = tf.reshape(remaining, scores.shape)
307309
# top_k runs on TPU cores, let it happen, TPU tiles implementation is slower.
308310
top_k = tf.math.top_k(scores * remaining, output_size)
309-
return (tf.cast(top_k.indices, top_k.values.dtype) * _greater(top_k.values) -
310-
_same(top_k.values))
311+
valid = _greater(top_k.values)
312+
return (tf.cast(top_k.indices, top_k.values.dtype) * valid + valid -
313+
tf.constant(1, dtype=top_k.values.dtype))
311314

312315

313316
def concat_and_top_k(

0 commit comments

Comments
 (0)