Skip to content

Commit 2a77fc5

Browse files
authored
Enhance detection_map_op and more check in prior_box API. (#10796)
1. If all bboxes are not difficult ground truth, the users can not define the data layer for this flag and not the input can be None for detection_map API. 2. Set default value for aspect_ratios in prior_box API. 3. Add more check in prior_box API.
1 parent be26b71 commit 2a77fc5

File tree

5 files changed

+68
-27
lines changed

5 files changed

+68
-27
lines changed

paddle/fluid/operators/detection_map_op.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ class DetectionMAPOp : public framework::OperatorWithKernel {
5151
PADDLE_ENFORCE_EQ(label_dims.size(), 2,
5252
"The rank of Input(Label) must be 2, "
5353
"the shape is [N, 6].");
54-
PADDLE_ENFORCE_EQ(label_dims[1], 6, "The shape is of Input(Label) [N, 6].");
54+
PADDLE_ENFORCE(label_dims[1] == 6 || label_dims[1] == 5,
55+
"The shape of Input(Label) is [N, 6] or [N, 5].");
5556

5657
if (ctx->HasInput("PosCount")) {
5758
PADDLE_ENFORCE(ctx->HasInput("TruePos"),
@@ -88,9 +89,10 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
8889
"offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
8990
"no detected data.");
9091
AddInput("Label",
91-
"(LoDTensor) A 2-D LoDTensor with shape[N, 6] represents the"
92+
"(LoDTensor) A 2-D LoDTensor represents the"
9293
"Labeled ground-truth data. Each row has 6 values: "
93-
"[label, is_difficult, xmin, ymin, xmax, ymax], N is the total "
94+
"[label, xmin, ymin, xmax, ymax, is_difficult] or 5 values: "
95+
"[label, xmin, ymin, xmax, ymax], where N is the total "
9496
"number of ground-truth data in this mini-batch. For each "
9597
"instance, the offsets in first dimension are called LoD, "
9698
"the number of offset is N + 1, if LoD[i + 1] - LoD[i] == 0, "

paddle/fluid/operators/detection_map_op.h

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
7272
auto* out_false_pos = ctx.Output<framework::LoDTensor>("AccumFalsePos");
7373

7474
float overlap_threshold = ctx.Attr<float>("overlap_threshold");
75-
float evaluate_difficult = ctx.Attr<bool>("evaluate_difficult");
75+
bool evaluate_difficult = ctx.Attr<bool>("evaluate_difficult");
7676
auto ap_type = GetAPType(ctx.Attr<std::string>("ap_type"));
7777
int class_num = ctx.Attr<int>("class_num");
7878

@@ -175,14 +175,20 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
175175
for (int n = 0; n < batch_size; ++n) {
176176
std::map<int, std::vector<Box>> boxes;
177177
for (size_t i = label_index[n]; i < label_index[n + 1]; ++i) {
178-
Box box(labels(i, 2), labels(i, 3), labels(i, 4), labels(i, 5));
179178
int label = labels(i, 0);
180-
auto is_difficult = labels(i, 1);
181-
if (std::abs(is_difficult - 0.0) < 1e-6)
182-
box.is_difficult = false;
183-
else
184-
box.is_difficult = true;
185-
boxes[label].push_back(box);
179+
if (input_label.dims()[1] == 6) {
180+
Box box(labels(i, 2), labels(i, 3), labels(i, 4), labels(i, 5));
181+
auto is_difficult = labels(i, 1);
182+
if (std::abs(is_difficult - 0.0) < 1e-6)
183+
box.is_difficult = false;
184+
else
185+
box.is_difficult = true;
186+
boxes[label].push_back(box);
187+
} else {
188+
PADDLE_ENFORCE_EQ(input_label.dims()[1], 5);
189+
Box box(labels(i, 1), labels(i, 2), labels(i, 3), labels(i, 4));
190+
boxes[label].push_back(box);
191+
}
186192
}
187193
gt_boxes->push_back(boxes);
188194
}

python/paddle/fluid/evaluator.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -273,18 +273,20 @@ class DetectionMAP(Evaluator):
273273
[M, 6]. The layout is [label, confidence, xmin, ymin, xmax, ymax].
274274
gt_label (Variable): The ground truth label index, which is a LoDTensor
275275
with shape [N, 1].
276-
gt_difficult (Variable): Whether this ground truth is a difficult
277-
bounding box (bbox), which is a LoDTensor [N, 1].
278276
gt_box (Variable): The ground truth bounding box (bbox), which is a
279277
LoDTensor with shape [N, 6]. The layout is [xmin, ymin, xmax, ymax].
278+
gt_difficult (Variable|None): Whether this ground truth is a difficult
279+
bounding bbox, which can be a LoDTensor [N, 1] or not set. If None,
280+
it means all the ground truth labels are not difficult bbox.
280281
class_num (int): The class number.
281282
background_label (int): The index of background label, the background
282283
label will be ignored. If set to -1, then all categories will be
283284
considered, 0 by defalut.
284285
overlap_threshold (float): The threshold for deciding true/false
285286
positive, 0.5 by defalut.
286287
evaluate_difficult (bool): Whether to consider difficult ground truth
287-
for evaluation, True by defalut.
288+
for evaluation, True by defalut. This argument does not work when
289+
gt_difficult is None.
288290
ap_version (string): The average precision calculation ways, it must be
289291
'integral' or '11point'. Please check
290292
https://sanchom.wordpress.com/tag/average-precision/ for details.
@@ -295,7 +297,7 @@ class DetectionMAP(Evaluator):
295297
296298
exe = fluid.executor(place)
297299
map_evaluator = fluid.Evaluator.DetectionMAP(input,
298-
gt_label, gt_difficult, gt_box)
300+
gt_label, gt_box, gt_difficult)
299301
cur_map, accum_map = map_evaluator.get_map_var()
300302
fetch = [cost, cur_map, accum_map]
301303
for epoch in PASS_NUM:
@@ -313,17 +315,20 @@ def __init__(self,
313315
input,
314316
gt_label,
315317
gt_box,
316-
gt_difficult,
317-
class_num,
318+
gt_difficult=None,
319+
class_num=None,
318320
background_label=0,
319321
overlap_threshold=0.5,
320322
evaluate_difficult=True,
321323
ap_version='integral'):
322324
super(DetectionMAP, self).__init__("map_eval")
323325

324326
gt_label = layers.cast(x=gt_label, dtype=gt_box.dtype)
325-
gt_difficult = layers.cast(x=gt_difficult, dtype=gt_box.dtype)
326-
label = layers.concat([gt_label, gt_difficult, gt_box], axis=1)
327+
if gt_difficult:
328+
gt_difficult = layers.cast(x=gt_difficult, dtype=gt_box.dtype)
329+
label = layers.concat([gt_label, gt_difficult, gt_box], axis=1)
330+
else:
331+
label = layers.concat([gt_label, gt_box], axis=1)
327332

328333
# calculate mean average precision (mAP) of current mini-batch
329334
map = layers.detection_map(

python/paddle/fluid/layers/detection.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,7 @@ def prior_box(input,
569569
image,
570570
min_sizes,
571571
max_sizes=None,
572-
aspect_ratios=None,
572+
aspect_ratios=[1.],
573573
variance=[0.1, 0.1, 0.2, 0.2],
574574
flip=False,
575575
clip=False,
@@ -589,19 +589,19 @@ def prior_box(input,
589589
input(Variable): The Input Variables, the format is NCHW.
590590
image(Variable): The input image data of PriorBoxOp,
591591
the layout is NCHW.
592-
min_sizes(list|tuple): min sizes of generated prior boxes.
592+
min_sizes(list|tuple|float value): min sizes of generated prior boxes.
593593
max_sizes(list|tuple|None): max sizes of generated prior boxes.
594594
Default: None.
595-
aspect_ratios(list|tuple): the aspect ratios of generated prior
596-
boxes. Default: None.
595+
aspect_ratios(list|tuple|float value): the aspect ratios of generated
596+
prior boxes. Default: [1.].
597597
variance(list|tuple): the variances to be encoded in prior boxes.
598598
Default:[0.1, 0.1, 0.2, 0.2].
599599
flip(bool): Whether to flip aspect ratios. Default:False.
600600
clip(bool): Whether to clip out-of-boundary boxes. Default: False.
601-
step(list|turple): Prior boxes step across weight and height, If
601+
step(list|turple): Prior boxes step across width and height, If
602602
step[0] == 0.0/step[1] == 0.0, the prior boxes step across
603-
height/weight of the input will be automatically calculated.
604-
Default: [0.0]
603+
height/weight of the input will be automatically calculated.
604+
Default: [0., 0.]
605605
offset(float): Prior boxes center offset. Default: 0.5
606606
name(str): Name of the prior box op. Default: None.
607607
@@ -630,6 +630,21 @@ def prior_box(input,
630630
helper = LayerHelper("prior_box", **locals())
631631
dtype = helper.input_dtype()
632632

633+
def _is_list_or_tuple_(data):
634+
return (isinstance(data, list) or isinstance(data, tuple))
635+
636+
if not _is_list_or_tuple_(min_sizes):
637+
min_sizes = [min_sizes]
638+
if not _is_list_or_tuple_(aspect_ratios):
639+
aspect_ratios = [aspect_ratios]
640+
if not (_is_list_or_tuple_(steps) and len(steps) == 2):
641+
raise ValueError('steps should be a list or tuple ',
642+
'with length 2, (step_width, step_height).')
643+
644+
min_sizes = list(map(float, min_sizes))
645+
aspect_ratios = list(map(float, aspect_ratios))
646+
steps = list(map(float, steps))
647+
633648
attrs = {
634649
'min_sizes': min_sizes,
635650
'aspect_ratios': aspect_ratios,
@@ -641,6 +656,8 @@ def prior_box(input,
641656
'offset': offset
642657
}
643658
if max_sizes is not None and len(max_sizes) > 0 and max_sizes[0] > 0:
659+
if not _is_list_or_tuple_(max_sizes):
660+
max_sizes = [max_sizes]
644661
attrs['max_sizes'] = max_sizes
645662

646663
box = helper.create_tmp_variable(dtype)

python/paddle/fluid/tests/unittests/test_detection_map_op.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,9 @@ def get_accumulation(pos_list):
160160
label_count, true_pos, false_pos = get_input_pos(
161161
self.class_pos_count, self.true_pos, self.true_pos_lod,
162162
self.false_pos, self.false_pos_lod)
163-
for (label, difficult, xmin, ymin, xmax, ymax) in self.label:
163+
for v in self.label:
164+
label = v[0]
165+
difficult = False if len(v) == 5 else v[1]
164166
if self.evaluate_difficult:
165167
label_count[label] += 1
166168
elif not difficult:
@@ -245,6 +247,15 @@ def init_test_case(self):
245247
[2, 0.8, 0, 1], [2, 0.1, 1, 0], [3, 0.2, 0, 1]]
246248

247249

250+
class TestDetectionMAPOpWithoutDiff(TestDetectionMAPOp):
251+
def init_test_case(self):
252+
super(TestDetectionMAPOpWithoutDiff, self).init_test_case()
253+
254+
# label xmin ymin xmax ymax
255+
self.label = [[1, 0.1, 0.1, 0.3, 0.3], [1, 0.6, 0.6, 0.8, 0.8],
256+
[2, 0.3, 0.3, 0.6, 0.5], [1, 0.7, 0.1, 0.9, 0.3]]
257+
258+
248259
class TestDetectionMAPOp11Point(TestDetectionMAPOp):
249260
def init_test_case(self):
250261
super(TestDetectionMAPOp11Point, self).init_test_case()

0 commit comments

Comments
 (0)