Skip to content

Commit c2f86f9

Browse files
authored
cherry-pick, move nms2 to contrib, test=release/1.6 (#20710)
1 parent 8fb760d commit c2f86f9

File tree

3 files changed

+139
-138
lines changed

3 files changed

+139
-138
lines changed

python/paddle/fluid/contrib/layers/nn.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
'var_conv_2d',
3131
'match_matrix_tensor',
3232
'tree_conv',
33+
'multiclass_nms2',
3334
]
3435

3536

@@ -427,3 +428,138 @@ def tree_conv(nodes_vector,
427428
else:
428429
pre_activation = out
429430
return helper.append_activation(pre_activation)
431+
432+
433+
def multiclass_nms2(bboxes,
434+
scores,
435+
score_threshold,
436+
nms_top_k,
437+
keep_top_k,
438+
nms_threshold=0.3,
439+
normalized=True,
440+
nms_eta=1.,
441+
background_label=0,
442+
return_index=False,
443+
name=None):
444+
"""
445+
**Multiclass NMS2**
446+
447+
This operator is to do multi-class non maximum suppression (NMS) on
448+
boxes and scores.
449+
450+
In the NMS step, this operator greedily selects a subset of detection bounding
451+
boxes that have high scores larger than score_threshold, if providing this
452+
threshold, then selects the largest nms_top_k confidences scores if nms_top_k
453+
is larger than -1. Then this operator pruns away boxes that have high IOU
454+
(intersection over union) overlap with already selected boxes by adaptive
455+
threshold NMS based on parameters of nms_threshold and nms_eta.
456+
457+
Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
458+
per image if keep_top_k is larger than -1.
459+
460+
Args:
461+
bboxes (Variable): Two types of bboxes are supported:
462+
1. (Tensor) A 3-D Tensor with shape
463+
[N, M, 4 or 8 16 24 32] represents the
464+
predicted locations of M bounding bboxes,
465+
N is the batch size. Each bounding box has four
466+
coordinate values and the layout is
467+
[xmin, ymin, xmax, ymax], when box size equals to 4.
468+
2. (LoDTensor) A 3-D Tensor with shape [M, C, 4]
469+
M is the number of bounding boxes, C is the
470+
class number
471+
scores (Variable): Two types of scores are supported:
472+
1. (Tensor) A 3-D Tensor with shape [N, C, M]
473+
represents the predicted confidence predictions.
474+
N is the batch size, C is the class number, M is
475+
number of bounding boxes. For each category there
476+
are total M scores which corresponding M bounding
477+
boxes. Please note, M is equal to the 2nd dimension
478+
of BBoxes.
479+
2. (LoDTensor) A 2-D LoDTensor with shape [M, C].
480+
M is the number of bbox, C is the class number.
481+
In this case, input BBoxes should be the second
482+
case with shape [M, C, 4].
483+
background_label (int): The index of background label, the background
484+
label will be ignored. If set to -1, then all
485+
categories will be considered. Default: 0
486+
score_threshold (float): Threshold to filter out bounding boxes with
487+
low confidence score. If not provided,
488+
consider all boxes.
489+
nms_top_k (int): Maximum number of detections to be kept according to
490+
the confidences aftern the filtering detections based
491+
on score_threshold.
492+
nms_threshold (float): The threshold to be used in NMS. Default: 0.3
493+
nms_eta (float): The threshold to be used in NMS. Default: 1.0
494+
keep_top_k (int): Number of total bboxes to be kept per image after NMS
495+
step. -1 means keeping all bboxes after NMS step.
496+
normalized (bool): Whether detections are normalized. Default: True
497+
return_index(bool): Whether return selected index. Default: False
498+
name(str): Name of the multiclass nms op. Default: None.
499+
500+
Returns:
501+
A tuple with two Variables: (Out, Index) if return_index is True,
502+
otherwise, a tuple with one Variable(Out) is returned.
503+
504+
Out: A 2-D LoDTensor with shape [No, 6] represents the detections.
505+
Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]
506+
or A 2-D LoDTensor with shape [No, 10] represents the detections.
507+
Each row has 10 values: [label, confidence, x1, y1, x2, y2, x3, y3,
508+
x4, y4]. No is the total number of detections.
509+
510+
If all images have not detected results, all elements in LoD will be
511+
0, and output tensor is empty (None).
512+
513+
Index: Only return when return_index is True. A 2-D LoDTensor with
514+
shape [No, 1] represents the selected index which type is Integer.
515+
The index is the absolute value cross batches. No is the same number
516+
as Out. If the index is used to gather other attribute such as age,
517+
one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where
518+
N is the batch size and M is the number of boxes.
519+
520+
521+
Examples:
522+
.. code-block:: python
523+
524+
525+
import paddle.fluid as fluid
526+
boxes = fluid.layers.data(name='bboxes', shape=[81, 4],
527+
dtype='float32', lod_level=1)
528+
scores = fluid.layers.data(name='scores', shape=[81],
529+
dtype='float32', lod_level=1)
530+
out, index = fluid.layers.multiclass_nms2(bboxes=boxes,
531+
scores=scores,
532+
background_label=0,
533+
score_threshold=0.5,
534+
nms_top_k=400,
535+
nms_threshold=0.3,
536+
keep_top_k=200,
537+
normalized=False,
538+
return_index=True)
539+
"""
540+
helper = LayerHelper('multiclass_nms2', **locals())
541+
542+
output = helper.create_variable_for_type_inference(dtype=bboxes.dtype)
543+
index = helper.create_variable_for_type_inference(dtype='int')
544+
helper.append_op(
545+
type="multiclass_nms2",
546+
inputs={'BBoxes': bboxes,
547+
'Scores': scores},
548+
attrs={
549+
'background_label': background_label,
550+
'score_threshold': score_threshold,
551+
'nms_top_k': nms_top_k,
552+
'nms_threshold': nms_threshold,
553+
'nms_eta': nms_eta,
554+
'keep_top_k': keep_top_k,
555+
'nms_eta': nms_eta,
556+
'normalized': normalized
557+
},
558+
outputs={'Out': output,
559+
'Index': index})
560+
output.stop_gradient = True
561+
index.stop_gradient = True
562+
563+
if return_index:
564+
return output, index
565+
return output

python/paddle/fluid/layers/detection.py

Lines changed: 0 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
'yolo_box',
5454
'box_clip',
5555
'multiclass_nms',
56-
'multiclass_nms2',
5756
'retinanet_detection_output',
5857
'distribute_fpn_proposals',
5958
'box_decoder_and_assign',
@@ -3148,141 +3147,6 @@ class number. The data type is float32 or float64.
31483147
return output
31493148

31503149

3151-
def multiclass_nms2(bboxes,
3152-
scores,
3153-
score_threshold,
3154-
nms_top_k,
3155-
keep_top_k,
3156-
nms_threshold=0.3,
3157-
normalized=True,
3158-
nms_eta=1.,
3159-
background_label=0,
3160-
return_index=False,
3161-
name=None):
3162-
"""
3163-
**Multiclass NMS2**
3164-
3165-
This operator is to do multi-class non maximum suppression (NMS) on
3166-
boxes and scores.
3167-
3168-
In the NMS step, this operator greedily selects a subset of detection bounding
3169-
boxes that have high scores larger than score_threshold, if providing this
3170-
threshold, then selects the largest nms_top_k confidences scores if nms_top_k
3171-
is larger than -1. Then this operator pruns away boxes that have high IOU
3172-
(intersection over union) overlap with already selected boxes by adaptive
3173-
threshold NMS based on parameters of nms_threshold and nms_eta.
3174-
3175-
Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
3176-
per image if keep_top_k is larger than -1.
3177-
3178-
Args:
3179-
bboxes (Variable): Two types of bboxes are supported:
3180-
1. (Tensor) A 3-D Tensor with shape
3181-
[N, M, 4 or 8 16 24 32] represents the
3182-
predicted locations of M bounding bboxes,
3183-
N is the batch size. Each bounding box has four
3184-
coordinate values and the layout is
3185-
[xmin, ymin, xmax, ymax], when box size equals to 4.
3186-
2. (LoDTensor) A 3-D Tensor with shape [M, C, 4]
3187-
M is the number of bounding boxes, C is the
3188-
class number
3189-
scores (Variable): Two types of scores are supported:
3190-
1. (Tensor) A 3-D Tensor with shape [N, C, M]
3191-
represents the predicted confidence predictions.
3192-
N is the batch size, C is the class number, M is
3193-
number of bounding boxes. For each category there
3194-
are total M scores which corresponding M bounding
3195-
boxes. Please note, M is equal to the 2nd dimension
3196-
of BBoxes.
3197-
2. (LoDTensor) A 2-D LoDTensor with shape [M, C].
3198-
M is the number of bbox, C is the class number.
3199-
In this case, input BBoxes should be the second
3200-
case with shape [M, C, 4].
3201-
background_label (int): The index of background label, the background
3202-
label will be ignored. If set to -1, then all
3203-
categories will be considered. Default: 0
3204-
score_threshold (float): Threshold to filter out bounding boxes with
3205-
low confidence score. If not provided,
3206-
consider all boxes.
3207-
nms_top_k (int): Maximum number of detections to be kept according to
3208-
the confidences aftern the filtering detections based
3209-
on score_threshold.
3210-
nms_threshold (float): The threshold to be used in NMS. Default: 0.3
3211-
nms_eta (float): The threshold to be used in NMS. Default: 1.0
3212-
keep_top_k (int): Number of total bboxes to be kept per image after NMS
3213-
step. -1 means keeping all bboxes after NMS step.
3214-
normalized (bool): Whether detections are normalized. Default: True
3215-
return_index(bool): Whether return selected index. Default: False
3216-
name(str): Name of the multiclass nms op. Default: None.
3217-
3218-
Returns:
3219-
A tuple with two Variables: (Out, Index) if return_index is True,
3220-
otherwise, a tuple with one Variable(Out) is returned.
3221-
3222-
Out: A 2-D LoDTensor with shape [No, 6] represents the detections.
3223-
Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]
3224-
or A 2-D LoDTensor with shape [No, 10] represents the detections.
3225-
Each row has 10 values: [label, confidence, x1, y1, x2, y2, x3, y3,
3226-
x4, y4]. No is the total number of detections.
3227-
3228-
If all images have not detected results, all elements in LoD will be
3229-
0, and output tensor is empty (None).
3230-
3231-
Index: Only return when return_index is True. A 2-D LoDTensor with
3232-
shape [No, 1] represents the selected index which type is Integer.
3233-
The index is the absolute value cross batches. No is the same number
3234-
as Out. If the index is used to gather other attribute such as age,
3235-
one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where
3236-
N is the batch size and M is the number of boxes.
3237-
3238-
3239-
Examples:
3240-
.. code-block:: python
3241-
3242-
3243-
import paddle.fluid as fluid
3244-
boxes = fluid.layers.data(name='bboxes', shape=[81, 4],
3245-
dtype='float32', lod_level=1)
3246-
scores = fluid.layers.data(name='scores', shape=[81],
3247-
dtype='float32', lod_level=1)
3248-
out, index = fluid.layers.multiclass_nms2(bboxes=boxes,
3249-
scores=scores,
3250-
background_label=0,
3251-
score_threshold=0.5,
3252-
nms_top_k=400,
3253-
nms_threshold=0.3,
3254-
keep_top_k=200,
3255-
normalized=False,
3256-
return_index=True)
3257-
"""
3258-
helper = LayerHelper('multiclass_nms2', **locals())
3259-
3260-
output = helper.create_variable_for_type_inference(dtype=bboxes.dtype)
3261-
index = helper.create_variable_for_type_inference(dtype='int')
3262-
helper.append_op(
3263-
type="multiclass_nms2",
3264-
inputs={'BBoxes': bboxes,
3265-
'Scores': scores},
3266-
attrs={
3267-
'background_label': background_label,
3268-
'score_threshold': score_threshold,
3269-
'nms_top_k': nms_top_k,
3270-
'nms_threshold': nms_threshold,
3271-
'nms_eta': nms_eta,
3272-
'keep_top_k': keep_top_k,
3273-
'nms_eta': nms_eta,
3274-
'normalized': normalized
3275-
},
3276-
outputs={'Out': output,
3277-
'Index': index})
3278-
output.stop_gradient = True
3279-
index.stop_gradient = True
3280-
3281-
if return_index:
3282-
return output, index
3283-
return output
3284-
3285-
32863150
def distribute_fpn_proposals(fpn_rois,
32873151
min_level,
32883152
max_level,

python/paddle/fluid/tests/test_detection.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -557,8 +557,9 @@ def test_multiclass_nms2(self):
557557
bboxes = layers.data(
558558
name='bboxes', shape=[-1, 10, 4], dtype='float32')
559559
scores = layers.data(name='scores', shape=[-1, 10], dtype='float32')
560-
output = layers.multiclass_nms2(bboxes, scores, 0.3, 400, 200, 0.7)
561-
output2, index = layers.multiclass_nms2(
560+
output = fluid.contrib.multiclass_nms2(bboxes, scores, 0.3, 400,
561+
200, 0.7)
562+
output2, index = fluid.contrib.multiclass_nms2(
562563
bboxes, scores, 0.3, 400, 200, 0.7, return_index=True)
563564
self.assertIsNotNone(output)
564565
self.assertIsNotNone(output2)

0 commit comments

Comments
 (0)