Skip to content

Commit b3d26cd

Browse files
authored
Fix bug in detection_output and mAP calculation in SSD. (#8985)
* Clipping bbox in the mAP evaluator calculation. * Fix bug in detection_output and mAP calculation in SSD. * Fix bug in detection.py. * Fix bug in test_detection_map_op.py.
1 parent e4ce479 commit b3d26cd

File tree

5 files changed

+50
-61
lines changed

5 files changed

+50
-61
lines changed

paddle/fluid/operators/detection_map_op.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,6 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
273273
std::map<int, std::vector<std::pair<T, int>>>& true_pos,
274274
std::map<int, std::vector<std::pair<T, int>>>& false_pos,
275275
const int class_num) const {
276-
constexpr T kEPS = static_cast<T>(1e-6);
277276
const int* pos_count_data = input_pos_count.data<int>();
278277
for (int i = 0; i < class_num; ++i) {
279278
label_pos_count[i] = pos_count_data[i];
@@ -282,12 +281,11 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
282281
auto SetData = [](const framework::LoDTensor& pos_tensor,
283282
std::map<int, std::vector<std::pair<T, int>>>& pos) {
284283
const T* pos_data = pos_tensor.data<T>();
285-
auto pos_data_lod = pos_tensor.lod();
286-
for (size_t i = 0; i < pos_data_lod.size(); ++i) {
287-
for (size_t j = pos_data_lod[0][i]; j < pos_data_lod[0][i + 1]; ++j) {
284+
auto pos_data_lod = pos_tensor.lod()[0];
285+
for (size_t i = 0; i < pos_data_lod.size() - 1; ++i) {
286+
for (size_t j = pos_data_lod[i]; j < pos_data_lod[i + 1]; ++j) {
288287
T score = pos_data[j * 2];
289-
int flag = 1;
290-
if (pos_data[j * 2 + 1] < kEPS) flag = 0;
288+
int flag = pos_data[j * 2 + 1];
291289
pos[i].push_back(std::make_pair(score, flag));
292290
}
293291
}

paddle/fluid/operators/prior_box_op.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker {
111111
});
112112
AddAttr<std::vector<float>>(
113113
"max_sizes",
114-
"(vector<float>) List of max sizes of generated prior boxes.");
114+
"(vector<float>) List of max sizes of generated prior boxes.")
115+
.SetDefault(std::vector<float>{});
115116
AddAttr<std::vector<float>>(
116117
"aspect_ratios",
117118
"(vector<float>) List of aspect ratios of generated prior boxes.");

paddle/fluid/operators/prior_box_op.h

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,6 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
9797
boxes->mutable_data<T>(ctx.GetPlace());
9898
vars->mutable_data<T>(ctx.GetPlace());
9999

100-
T inv_img_width = 1.0 / img_width;
101-
T inv_img_height = 1.0 / img_height;
102-
103100
auto e_boxes = framework::EigenTensor<T, 4>::From(*boxes);
104101
for (int h = 0; h < feature_height; ++h) {
105102
for (int w = 0; w < feature_width; ++w) {
@@ -110,36 +107,30 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
110107
for (size_t s = 0; s < min_sizes.size(); ++s) {
111108
auto min_size = min_sizes[s];
112109
// first prior: aspect_ratio = 1, size = min_size
113-
box_width = box_height = min_size;
110+
box_width = box_height = min_size / 2.;
114111
// xmin
115-
e_boxes(h, w, idx, 0) = (center_x - box_width * 0.5) * inv_img_width;
112+
e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width;
116113
// ymin
117-
e_boxes(h, w, idx, 1) =
118-
(center_y - box_height * 0.5) * inv_img_height;
114+
e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height;
119115
// xmax
120-
e_boxes(h, w, idx, 2) = (center_x + box_width * 0.5) * inv_img_width;
116+
e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width;
121117
// ymax
122-
e_boxes(h, w, idx, 3) =
123-
(center_y + box_height * 0.5) * inv_img_height;
118+
e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height;
124119

125120
idx++;
126121
if (max_sizes.size() > 0) {
127122
auto max_size = max_sizes[s];
128123
// second prior: aspect_ratio = 1,
129124
// size = sqrt(min_size * max_size)
130-
box_width = box_height = sqrt(min_size * max_size);
125+
box_width = box_height = sqrt(min_size * max_size) / 2.;
131126
// xmin
132-
e_boxes(h, w, idx, 0) =
133-
(center_x - box_width * 0.5) * inv_img_width;
127+
e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width;
134128
// ymin
135-
e_boxes(h, w, idx, 1) =
136-
(center_y - box_height * 0.5) * inv_img_height;
129+
e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height;
137130
// xmax
138-
e_boxes(h, w, idx, 2) =
139-
(center_x + box_width * 0.5) * inv_img_width;
131+
e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width;
140132
// ymax
141-
e_boxes(h, w, idx, 3) =
142-
(center_y + box_height * 0.5) * inv_img_height;
133+
e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height;
143134
idx++;
144135
}
145136

@@ -149,20 +140,16 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
149140
if (fabs(ar - 1.) < 1e-6) {
150141
continue;
151142
}
152-
box_width = min_size * sqrt(ar);
153-
box_height = min_size / sqrt(ar);
143+
box_width = min_size * sqrt(ar) / 2.;
144+
box_height = min_size / sqrt(ar) / 2.;
154145
// xmin
155-
e_boxes(h, w, idx, 0) =
156-
(center_x - box_width * 0.5) * inv_img_width;
146+
e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width;
157147
// ymin
158-
e_boxes(h, w, idx, 1) =
159-
(center_y - box_height * 0.5) * inv_img_height;
148+
e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height;
160149
// xmax
161-
e_boxes(h, w, idx, 2) =
162-
(center_x + box_width * 0.5) * inv_img_width;
150+
e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width;
163151
// ymax
164-
e_boxes(h, w, idx, 3) =
165-
(center_y + box_height * 0.5) * inv_img_height;
152+
e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height;
166153
idx++;
167154
}
168155
}

python/paddle/fluid/layers/detection.py

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,13 @@ class number, M is number of bounding boxes. For each category
130130
target_box=loc,
131131
code_type='decode_center_size')
132132

133-
nmsed_outs = helper.create_tmp_variable(dtype=decoded_box.dtype)
133+
old_shape = scores.shape
134+
scores = ops.reshape(x=scores, shape=(-1, old_shape[-1]))
135+
scores = ops.softmax(x=scores)
136+
scores = ops.reshape(x=scores, shape=old_shape)
134137
scores = nn.transpose(scores, perm=[0, 2, 1])
138+
139+
nmsed_outs = helper.create_tmp_variable(dtype=decoded_box.dtype)
135140
helper.append_op(
136141
type="multiclass_nms",
137142
inputs={'Scores': scores,
@@ -562,16 +567,16 @@ def multi_box_head(inputs,
562567
base_size,
563568
num_classes,
564569
aspect_ratios,
565-
min_ratio,
566-
max_ratio,
570+
min_ratio=None,
571+
max_ratio=None,
567572
min_sizes=None,
568573
max_sizes=None,
569574
steps=None,
570575
step_w=None,
571576
step_h=None,
572577
offset=0.5,
573-
variance=[0.1, 0.1, 0.1, 0.1],
574-
flip=False,
578+
variance=[0.1, 0.1, 0.2, 0.2],
579+
flip=True,
575580
clip=False,
576581
kernel_size=1,
577582
pad=0,
@@ -614,7 +619,7 @@ def multi_box_head(inputs,
614619
the inputs[i] will be automatically calculated. Default: None.
615620
offset(float): Prior boxes center offset. Default: 0.5
616621
variance(list|tuple): the variances to be encoded in prior boxes.
617-
Default:[0.1, 0.1, 0.1, 0.1].
622+
Default:[0.1, 0.1, 0.2, 0.2].
618623
flip(bool): Whether to flip aspect ratios. Default:False.
619624
clip(bool): Whether to clip out-of-boundary boxes. Default: False.
620625
kernel_size(int): The kernel size of conv2d. Default: 1.
@@ -668,6 +673,19 @@ def _prior_box_(input,
668673
helper = LayerHelper("prior_box", **locals())
669674
dtype = helper.input_dtype()
670675

676+
attrs = {
677+
'min_sizes': min_sizes,
678+
'aspect_ratios': aspect_ratios,
679+
'variances': variance,
680+
'flip': flip,
681+
'clip': clip,
682+
'step_w': step_w,
683+
'step_h': step_h,
684+
'offset': offset
685+
}
686+
if len(max_sizes) > 0 and max_sizes[0] > 0:
687+
attrs['max_sizes'] = max_sizes
688+
671689
box = helper.create_tmp_variable(dtype)
672690
var = helper.create_tmp_variable(dtype)
673691
helper.append_op(
@@ -676,17 +694,7 @@ def _prior_box_(input,
676694
"Image": image},
677695
outputs={"Boxes": box,
678696
"Variances": var},
679-
attrs={
680-
'min_sizes': min_sizes,
681-
'max_sizes': max_sizes,
682-
'aspect_ratios': aspect_ratios,
683-
'variances': variance,
684-
'flip': flip,
685-
'clip': clip,
686-
'step_w': step_w,
687-
'step_h': step_h,
688-
'offset': offset
689-
})
697+
attrs=attrs, )
690698
return box, var
691699

692700
def _reshape_with_axis_(input, axis=1):
@@ -714,7 +722,7 @@ def _is_list_or_tuple_and_equal(data, length, err_info):
714722
if num_layer <= 2:
715723
assert min_sizes is not None and max_sizes is not None
716724
assert len(min_sizes) == num_layer and len(max_sizes) == num_layer
717-
else:
725+
elif min_sizes is None and max_sizes is None:
718726
min_sizes = []
719727
max_sizes = []
720728
step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2)))
@@ -759,9 +767,6 @@ def _is_list_or_tuple_and_equal(data, length, err_info):
759767
min_size = [min_size]
760768
if not _is_list_or_tuple_(max_size):
761769
max_size = [max_size]
762-
if not (len(max_size) == len(min_size)):
763-
raise ValueError(
764-
'the length of max_size and min_size should be equal.')
765770

766771
aspect_ratio = []
767772
if aspect_ratios is not None:
@@ -779,7 +784,7 @@ def _is_list_or_tuple_and_equal(data, length, err_info):
779784

780785
num_boxes = box.shape[2]
781786

782-
# get box_loc
787+
# get loc
783788
num_loc_output = num_boxes * 4
784789
mbox_loc = nn.conv2d(
785790
input=input,
@@ -796,7 +801,7 @@ def _is_list_or_tuple_and_equal(data, length, err_info):
796801
mbox_loc_flatten = ops.reshape(mbox_loc, shape=new_shape)
797802
mbox_locs.append(mbox_loc_flatten)
798803

799-
# get conf_loc
804+
# get conf
800805
num_conf_output = num_boxes * num_classes
801806
conf_loc = nn.conv2d(
802807
input=input,

python/paddle/fluid/tests/unittests/test_detection_map_op.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,6 @@ def get_accumulation(pos_list):
166166
elif not difficult:
167167
label_count[label] += 1
168168

169-
true_pos = collections.defaultdict(list)
170-
false_pos = collections.defaultdict(list)
171169
for (label, score, tp, fp) in tf_pos:
172170
true_pos[label].append([score, tp])
173171
false_pos[label].append([score, fp])

0 commit comments

Comments
 (0)