Skip to content

Commit ea2bdd1

Browse files
committed
Merge branch 'develop' into remove_unused_code
2 parents f7bbcfa + 9cb8738 commit ea2bdd1

File tree

18 files changed

+475
-237
lines changed

18 files changed

+475
-237
lines changed

paddle/fluid/inference/api/demo_ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ else
2121
fi
2222

2323
USE_TENSORRT=OFF
24-
if [ [-d"$TENSORRT_INCLUDE_DIR"] -a [-d"$TENSORRT_LIB_DIR"] ]; then
24+
if [ -d "$TENSORRT_INCLUDE_DIR" -a -d "$TENSORRT_LIB_DIR" ]; then
2525
USE_TENSORRT=ON
2626
fi
2727

paddle/fluid/inference/tensorrt/convert/pool2d_op.cc

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,22 @@ class Pool2dOpConverter : public OpConverter {
4242
boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
4343
std::vector<int> paddings =
4444
boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
45+
bool ceil_mode = boost::get<bool>(op_desc.GetAttr("ceil_mode"));
4546

47+
nvinfer1::Dims input_shape = input1->getDimensions();
48+
int nbDims = input_shape.nbDims;
4649
nvinfer1::DimsHW nv_ksize(ksize[0], ksize[1]);
50+
nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
51+
nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
52+
4753
if (global_pooling == true) {
48-
nvinfer1::Dims input_shape = input1->getDimensions();
49-
int nbDims = input_shape.nbDims;
5054
nv_ksize.d[0] = input_shape.d[nbDims - 2];
5155
nv_ksize.d[1] = input_shape.d[nbDims - 1];
56+
nv_strides.h() = 1;
57+
nv_strides.w() = 1;
58+
nv_paddings.h() = 0;
59+
nv_paddings.w() = 0;
5260
}
53-
const nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
54-
const nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
5561

5662
PADDLE_ENFORCE_EQ(input1->getDimensions().nbDims, 3UL);
5763

@@ -64,6 +70,36 @@ class Pool2dOpConverter : public OpConverter {
6470
PADDLE_THROW("TensorRT unsupported pooling type!");
6571
}
6672

73+
if (ceil_mode) {
74+
nvinfer1::DimsHW pre_pad(0, 0);
75+
nvinfer1::DimsHW post_pad(0, 0);
76+
int input_height = input_shape.d[nbDims - 2];
77+
int input_width = input_shape.d[nbDims - 1];
78+
int floor_h_output_size =
79+
(input_height - ksize[0] + 2 * paddings[0]) / strides[0] + 1;
80+
int ceil_h_output_size =
81+
(input_height - ksize[0] + 2 * paddings[0] + strides[0] - 1) /
82+
strides[0] +
83+
1;
84+
85+
int floor_w_output_size =
86+
(input_width - ksize[1] + 2 * paddings[1]) / strides[1] + 1;
87+
int ceil_w_output_size =
88+
(input_width - ksize[1] + 2 * paddings[1] + strides[1] - 1) /
89+
strides[1] +
90+
1;
91+
if (floor_h_output_size != ceil_h_output_size) {
92+
post_pad.h() = strides[0] - 1;
93+
}
94+
95+
if (floor_w_output_size != ceil_w_output_size) {
96+
post_pad.w() = strides[1] - 1;
97+
}
98+
auto* layer = TRT_ENGINE_ADD_LAYER(
99+
engine_, Padding, *const_cast<nvinfer1::ITensor*>(input1), pre_pad,
100+
post_pad);
101+
input1 = layer->getOutput(0);
102+
}
67103
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling,
68104
*const_cast<nvinfer1::ITensor*>(input1),
69105
nv_pool_type, nv_ksize);

paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,28 @@ namespace paddle {
2020
namespace inference {
2121
namespace tensorrt {
2222

23-
void test_pool2d(bool global_pooling) {
23+
void test_pool2d(bool global_pooling, bool ceil_mode) {
2424
framework::Scope scope;
2525
std::unordered_set<std::string> parameters;
2626
TRTConvertValidation validator(5, parameters, scope, 1 << 15);
2727

2828
// The ITensor's Dims should not contain the batch size.
2929
// So, the ITensor's Dims of input and output should be C * H * W.
30-
validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 4, 4));
30+
validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 13, 14));
3131
if (global_pooling)
3232
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 1, 1));
33+
else if (ceil_mode)
34+
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 6, 7));
3335
else
34-
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 2, 2));
36+
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 6, 6));
3537

3638
// Prepare Op description
3739
framework::OpDesc desc;
3840
desc.SetType("pool2d");
3941
desc.SetInput("X", {"pool2d-X"});
4042
desc.SetOutput("Out", {"pool2d-Out"});
4143

42-
std::vector<int> ksize({2, 2});
44+
std::vector<int> ksize({3, 3});
4345
std::vector<int> strides({2, 2});
4446
std::vector<int> paddings({0, 0});
4547
std::string pooling_t = "max";
@@ -49,6 +51,7 @@ void test_pool2d(bool global_pooling) {
4951
desc.SetAttr("strides", strides);
5052
desc.SetAttr("paddings", paddings);
5153
desc.SetAttr("global_pooling", global_pooling);
54+
desc.SetAttr("ceil_mode", ceil_mode);
5255

5356
LOG(INFO) << "set OP";
5457
validator.SetOp(*desc.Proto());
@@ -57,9 +60,10 @@ void test_pool2d(bool global_pooling) {
5760
validator.Execute(3);
5861
}
5962

60-
TEST(Pool2dOpConverter, normal) { test_pool2d(false); }
63+
TEST(Pool2dOpConverter, normal) { test_pool2d(false, false); }
64+
TEST(Pool2dOpConverter, test_global_pooling) { test_pool2d(true, false); }
6165

62-
TEST(Pool2dOpConverter, test_global_pooling) { test_pool2d(true); }
66+
TEST(Pool2dOpConverter, test_ceil_mode) { test_pool2d(false, true); }
6367

6468
} // namespace tensorrt
6569
} // namespace inference

paddle/fluid/operators/detection/rpn_target_assign_op.cc

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
5252
PADDLE_ENFORCE(
5353
ctx->HasOutput("TargetBBox"),
5454
"Output(TargetBBox) of RpnTargetAssignOp should not be null");
55+
PADDLE_ENFORCE(
56+
ctx->HasOutput("BBoxInsideWeight"),
57+
"Output(BBoxInsideWeight) of RpnTargetAssignOp should not be null");
5558

5659
auto anchor_dims = ctx->GetInputDim("Anchor");
5760
auto gt_boxes_dims = ctx->GetInputDim("GtBoxes");
@@ -68,6 +71,7 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
6871
ctx->SetOutputDim("ScoreIndex", {-1});
6972
ctx->SetOutputDim("TargetLabel", {-1, 1});
7073
ctx->SetOutputDim("TargetBBox", {-1, 4});
74+
ctx->SetOutputDim("BBoxInsideWeight", {-1, 4});
7175
}
7276

7377
protected:
@@ -169,6 +173,7 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data,
169173
const float rpn_positive_overlap,
170174
const float rpn_negative_overlap, std::vector<int>* fg_inds,
171175
std::vector<int>* bg_inds, std::vector<int>* tgt_lbl,
176+
std::vector<int>* fg_fake, std::vector<T>* bbox_inside_weight,
172177
std::minstd_rand engine, bool use_random) {
173178
float epsilon = 0.00001;
174179
int anchor_num = anchor_to_gt_max.dims()[0];
@@ -201,25 +206,41 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data,
201206
// Reservoir Sampling
202207
int fg_num = static_cast<int>(rpn_fg_fraction * rpn_batch_size_per_im);
203208
ReservoirSampling(fg_num, &fg_inds_fake, engine, use_random);
204-
fg_num = static_cast<int>(fg_inds_fake.size());
205-
for (int64_t i = 0; i < fg_num; ++i) {
209+
int fg_fake_num = static_cast<int>(fg_inds_fake.size());
210+
for (int64_t i = 0; i < fg_fake_num; ++i) {
206211
target_label[fg_inds_fake[i]] = 1;
207212
}
208213

209-
int bg_num = rpn_batch_size_per_im - fg_num;
214+
int bg_num = rpn_batch_size_per_im - fg_fake_num;
210215
for (int64_t i = 0; i < anchor_num; ++i) {
211216
if (anchor_to_gt_max_data[i] < rpn_negative_overlap) {
212217
bg_inds_fake.push_back(i);
213218
}
214219
}
215220
ReservoirSampling(bg_num, &bg_inds_fake, engine, use_random);
216221
bg_num = static_cast<int>(bg_inds_fake.size());
222+
int fake_num = 0;
217223
for (int64_t i = 0; i < bg_num; ++i) {
224+
// fg fake found
225+
if (target_label[bg_inds_fake[i]] == 1) {
226+
fake_num++;
227+
fg_fake->emplace_back(fg_inds_fake[0]);
228+
for (int j = 0; j < 4; ++j) {
229+
bbox_inside_weight->emplace_back(T(0.));
230+
}
231+
}
218232
target_label[bg_inds_fake[i]] = 0;
219233
}
220234

235+
for (int64_t i = 0; i < (fg_fake_num - fake_num) * 4; ++i) {
236+
bbox_inside_weight->emplace_back(T(1.));
237+
}
238+
221239
for (int64_t i = 0; i < anchor_num; ++i) {
222-
if (target_label[i] == 1) fg_inds->emplace_back(i);
240+
if (target_label[i] == 1) {
241+
fg_inds->emplace_back(i);
242+
fg_fake->emplace_back(i);
243+
}
223244
if (target_label[i] == 0) bg_inds->emplace_back(i);
224245
}
225246
fg_num = fg_inds->size();
@@ -248,7 +269,8 @@ std::vector<Tensor> SampleRpnFgBgGt(const platform::CPUDeviceContext& ctx,
248269
std::vector<int> bg_inds;
249270
std::vector<int> gt_inds;
250271
std::vector<int> tgt_lbl;
251-
272+
std::vector<int> fg_fake;
273+
std::vector<T> bbox_inside_weight;
252274
// Calculate the max IoU between anchors and gt boxes
253275
// Map from anchor to gt box that has highest overlap
254276
auto place = ctx.GetPlace();
@@ -275,32 +297,37 @@ std::vector<Tensor> SampleRpnFgBgGt(const platform::CPUDeviceContext& ctx,
275297
// Follow the Faster RCNN's implementation
276298
ScoreAssign(anchor_by_gt_overlap_data, anchor_to_gt_max, gt_to_anchor_max,
277299
rpn_batch_size_per_im, rpn_fg_fraction, rpn_positive_overlap,
278-
rpn_negative_overlap, &fg_inds, &bg_inds, &tgt_lbl, engine,
279-
use_random);
300+
rpn_negative_overlap, &fg_inds, &bg_inds, &tgt_lbl, &fg_fake,
301+
&bbox_inside_weight, engine, use_random);
280302

281303
int fg_num = fg_inds.size();
282304
int bg_num = bg_inds.size();
283-
gt_inds.reserve(fg_num);
284-
for (int i = 0; i < fg_num; ++i) {
285-
gt_inds.emplace_back(argmax[fg_inds[i]]);
305+
int fg_fake_num = fg_fake.size();
306+
gt_inds.reserve(fg_fake_num);
307+
for (int i = 0; i < fg_fake_num; ++i) {
308+
gt_inds.emplace_back(argmax[fg_fake[i]]);
286309
}
287-
288-
Tensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t;
289-
int* loc_index_data = loc_index_t.mutable_data<int>({fg_num}, place);
310+
Tensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t, bbox_inside_weight_t;
311+
int* loc_index_data = loc_index_t.mutable_data<int>({fg_fake_num}, place);
290312
int* score_index_data =
291313
score_index_t.mutable_data<int>({fg_num + bg_num}, place);
292314
int* tgt_lbl_data = tgt_lbl_t.mutable_data<int>({fg_num + bg_num}, place);
293-
int* gt_inds_data = gt_inds_t.mutable_data<int>({fg_num}, place);
294-
std::copy(fg_inds.begin(), fg_inds.end(), loc_index_data);
315+
int* gt_inds_data = gt_inds_t.mutable_data<int>({fg_fake_num}, place);
316+
T* bbox_inside_weight_data =
317+
bbox_inside_weight_t.mutable_data<T>({fg_fake_num, 4}, place);
318+
std::copy(fg_fake.begin(), fg_fake.end(), loc_index_data);
295319
std::copy(fg_inds.begin(), fg_inds.end(), score_index_data);
296320
std::copy(bg_inds.begin(), bg_inds.end(), score_index_data + fg_num);
297321
std::copy(tgt_lbl.begin(), tgt_lbl.end(), tgt_lbl_data);
298322
std::copy(gt_inds.begin(), gt_inds.end(), gt_inds_data);
323+
std::copy(bbox_inside_weight.begin(), bbox_inside_weight.end(),
324+
bbox_inside_weight_data);
299325
std::vector<Tensor> loc_score_tgtlbl_gt;
300326
loc_score_tgtlbl_gt.emplace_back(loc_index_t);
301327
loc_score_tgtlbl_gt.emplace_back(score_index_t);
302328
loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t);
303329
loc_score_tgtlbl_gt.emplace_back(gt_inds_t);
330+
loc_score_tgtlbl_gt.emplace_back(bbox_inside_weight_t);
304331

305332
return loc_score_tgtlbl_gt;
306333
}
@@ -318,6 +345,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
318345
auto* score_index = context.Output<LoDTensor>("ScoreIndex");
319346
auto* tgt_bbox = context.Output<LoDTensor>("TargetBBox");
320347
auto* tgt_lbl = context.Output<LoDTensor>("TargetLabel");
348+
auto* bbox_inside_weight = context.Output<LoDTensor>("BBoxInsideWeight");
321349

322350
PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), 1UL,
323351
"RpnTargetAssignOp gt_boxes needs 1 level of LoD");
@@ -340,7 +368,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
340368
score_index->mutable_data<int>({max_num}, place);
341369
tgt_bbox->mutable_data<T>({max_num, 4}, place);
342370
tgt_lbl->mutable_data<int>({max_num, 1}, place);
343-
371+
bbox_inside_weight->mutable_data<T>({max_num, 4}, place);
344372
auto& dev_ctx = context.device_context<platform::CPUDeviceContext>();
345373

346374
std::random_device rnd;
@@ -394,6 +422,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
394422
Tensor sampled_score_index = loc_score_tgtlbl_gt[1];
395423
Tensor sampled_tgtlbl = loc_score_tgtlbl_gt[2];
396424
Tensor sampled_gt_index = loc_score_tgtlbl_gt[3];
425+
Tensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4];
397426

398427
int loc_num = sampled_loc_index.dims()[0];
399428
int score_num = sampled_score_index.dims()[0];
@@ -432,6 +461,8 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
432461
AppendRpns<int>(score_index, total_score_num, &sampled_score_index_unmap);
433462
AppendRpns<T>(tgt_bbox, total_loc_num * 4, &sampled_tgt_bbox);
434463
AppendRpns<int>(tgt_lbl, total_score_num, &sampled_tgtlbl);
464+
AppendRpns<T>(bbox_inside_weight, total_loc_num * 4,
465+
&sampled_bbox_inside_weight);
435466
total_loc_num += loc_num;
436467

437468
total_score_num += score_num;
@@ -448,10 +479,12 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
448479
score_index->set_lod(loc_score);
449480
tgt_bbox->set_lod(lod_loc);
450481
tgt_lbl->set_lod(loc_score);
482+
bbox_inside_weight->set_lod(lod_loc);
451483
loc_index->Resize({total_loc_num});
452484
score_index->Resize({total_score_num});
453485
tgt_bbox->Resize({total_loc_num, 4});
454486
tgt_lbl->Resize({total_score_num, 1});
487+
bbox_inside_weight->Resize({total_loc_num, 4});
455488
}
456489
};
457490

@@ -514,6 +547,9 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
514547
"TargetLabel",
515548
"(Tensor<int>), The target labels of each anchor with shape "
516549
"[F + B, 1], F and B are sampled foreground and backgroud number.");
550+
AddOutput("BBoxInsideWeight",
551+
"(Tensor), The bbox inside weight with shape "
552+
"[F, 4], F is the sampled foreground number.");
517553
AddComment(R"DOC(
518554
This operator can be, for a given set of ground truth bboxes and the
519555
anchors, to assign classification and regression targets to each prediction.

0 commit comments

Comments
 (0)