@@ -52,6 +52,9 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
52
52
PADDLE_ENFORCE (
53
53
ctx->HasOutput (" TargetBBox" ),
54
54
" Output(TargetBBox) of RpnTargetAssignOp should not be null" );
55
+ PADDLE_ENFORCE (
56
+ ctx->HasOutput (" BBoxInsideWeight" ),
57
+ " Output(BBoxInsideWeight) of RpnTargetAssignOp should not be null" );
55
58
56
59
auto anchor_dims = ctx->GetInputDim (" Anchor" );
57
60
auto gt_boxes_dims = ctx->GetInputDim (" GtBoxes" );
@@ -68,6 +71,7 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
68
71
ctx->SetOutputDim (" ScoreIndex" , {-1 });
69
72
ctx->SetOutputDim (" TargetLabel" , {-1 , 1 });
70
73
ctx->SetOutputDim (" TargetBBox" , {-1 , 4 });
74
+ ctx->SetOutputDim (" BBoxInsideWeight" , {-1 , 4 });
71
75
}
72
76
73
77
protected:
@@ -169,6 +173,7 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data,
169
173
const float rpn_positive_overlap,
170
174
const float rpn_negative_overlap, std::vector<int >* fg_inds,
171
175
std::vector<int >* bg_inds, std::vector<int >* tgt_lbl,
176
+ std::vector<int >* fg_fake, std::vector<T>* bbox_inside_weight,
172
177
std::minstd_rand engine, bool use_random) {
173
178
float epsilon = 0.00001 ;
174
179
int anchor_num = anchor_to_gt_max.dims ()[0 ];
@@ -201,25 +206,41 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data,
201
206
// Reservoir Sampling
202
207
int fg_num = static_cast <int >(rpn_fg_fraction * rpn_batch_size_per_im);
203
208
ReservoirSampling (fg_num, &fg_inds_fake, engine, use_random);
204
- fg_num = static_cast <int >(fg_inds_fake.size ());
205
- for (int64_t i = 0 ; i < fg_num ; ++i) {
209
+ int fg_fake_num = static_cast <int >(fg_inds_fake.size ());
210
+ for (int64_t i = 0 ; i < fg_fake_num ; ++i) {
206
211
target_label[fg_inds_fake[i]] = 1 ;
207
212
}
208
213
209
- int bg_num = rpn_batch_size_per_im - fg_num ;
214
+ int bg_num = rpn_batch_size_per_im - fg_fake_num ;
210
215
for (int64_t i = 0 ; i < anchor_num; ++i) {
211
216
if (anchor_to_gt_max_data[i] < rpn_negative_overlap) {
212
217
bg_inds_fake.push_back (i);
213
218
}
214
219
}
215
220
ReservoirSampling (bg_num, &bg_inds_fake, engine, use_random);
216
221
bg_num = static_cast <int >(bg_inds_fake.size ());
222
+ int fake_num = 0 ;
217
223
for (int64_t i = 0 ; i < bg_num; ++i) {
224
+ // fg fake found
225
+ if (target_label[bg_inds_fake[i]] == 1 ) {
226
+ fake_num++;
227
+ fg_fake->emplace_back (fg_inds_fake[0 ]);
228
+ for (int j = 0 ; j < 4 ; ++j) {
229
+ bbox_inside_weight->emplace_back (T (0 .));
230
+ }
231
+ }
218
232
target_label[bg_inds_fake[i]] = 0 ;
219
233
}
220
234
235
+ for (int64_t i = 0 ; i < (fg_fake_num - fake_num) * 4 ; ++i) {
236
+ bbox_inside_weight->emplace_back (T (1 .));
237
+ }
238
+
221
239
for (int64_t i = 0 ; i < anchor_num; ++i) {
222
- if (target_label[i] == 1 ) fg_inds->emplace_back (i);
240
+ if (target_label[i] == 1 ) {
241
+ fg_inds->emplace_back (i);
242
+ fg_fake->emplace_back (i);
243
+ }
223
244
if (target_label[i] == 0 ) bg_inds->emplace_back (i);
224
245
}
225
246
fg_num = fg_inds->size ();
@@ -248,7 +269,8 @@ std::vector<Tensor> SampleRpnFgBgGt(const platform::CPUDeviceContext& ctx,
248
269
std::vector<int > bg_inds;
249
270
std::vector<int > gt_inds;
250
271
std::vector<int > tgt_lbl;
251
-
272
+ std::vector<int > fg_fake;
273
+ std::vector<T> bbox_inside_weight;
252
274
// Calculate the max IoU between anchors and gt boxes
253
275
// Map from anchor to gt box that has highest overlap
254
276
auto place = ctx.GetPlace ();
@@ -275,32 +297,37 @@ std::vector<Tensor> SampleRpnFgBgGt(const platform::CPUDeviceContext& ctx,
275
297
// Follow the Faster RCNN's implementation
276
298
ScoreAssign (anchor_by_gt_overlap_data, anchor_to_gt_max, gt_to_anchor_max,
277
299
rpn_batch_size_per_im, rpn_fg_fraction, rpn_positive_overlap,
278
- rpn_negative_overlap, &fg_inds, &bg_inds, &tgt_lbl, engine ,
279
- use_random);
300
+ rpn_negative_overlap, &fg_inds, &bg_inds, &tgt_lbl, &fg_fake ,
301
+ &bbox_inside_weight, engine, use_random);
280
302
281
303
int fg_num = fg_inds.size ();
282
304
int bg_num = bg_inds.size ();
283
- gt_inds.reserve (fg_num);
284
- for (int i = 0 ; i < fg_num; ++i) {
285
- gt_inds.emplace_back (argmax[fg_inds[i]]);
305
+ int fg_fake_num = fg_fake.size ();
306
+ gt_inds.reserve (fg_fake_num);
307
+ for (int i = 0 ; i < fg_fake_num; ++i) {
308
+ gt_inds.emplace_back (argmax[fg_fake[i]]);
286
309
}
287
-
288
- Tensor loc_index_t , score_index_t , tgt_lbl_t , gt_inds_t ;
289
- int * loc_index_data = loc_index_t .mutable_data <int >({fg_num}, place);
310
+ Tensor loc_index_t , score_index_t , tgt_lbl_t , gt_inds_t , bbox_inside_weight_t ;
311
+ int * loc_index_data = loc_index_t .mutable_data <int >({fg_fake_num}, place);
290
312
int * score_index_data =
291
313
score_index_t .mutable_data <int >({fg_num + bg_num}, place);
292
314
int * tgt_lbl_data = tgt_lbl_t .mutable_data <int >({fg_num + bg_num}, place);
293
- int * gt_inds_data = gt_inds_t .mutable_data <int >({fg_num}, place);
294
- std::copy (fg_inds.begin (), fg_inds.end (), loc_index_data);
315
+ int * gt_inds_data = gt_inds_t .mutable_data <int >({fg_fake_num}, place);
316
+ T* bbox_inside_weight_data =
317
+ bbox_inside_weight_t .mutable_data <T>({fg_fake_num, 4 }, place);
318
+ std::copy (fg_fake.begin (), fg_fake.end (), loc_index_data);
295
319
std::copy (fg_inds.begin (), fg_inds.end (), score_index_data);
296
320
std::copy (bg_inds.begin (), bg_inds.end (), score_index_data + fg_num);
297
321
std::copy (tgt_lbl.begin (), tgt_lbl.end (), tgt_lbl_data);
298
322
std::copy (gt_inds.begin (), gt_inds.end (), gt_inds_data);
323
+ std::copy (bbox_inside_weight.begin (), bbox_inside_weight.end (),
324
+ bbox_inside_weight_data);
299
325
std::vector<Tensor> loc_score_tgtlbl_gt;
300
326
loc_score_tgtlbl_gt.emplace_back (loc_index_t );
301
327
loc_score_tgtlbl_gt.emplace_back (score_index_t );
302
328
loc_score_tgtlbl_gt.emplace_back (tgt_lbl_t );
303
329
loc_score_tgtlbl_gt.emplace_back (gt_inds_t );
330
+ loc_score_tgtlbl_gt.emplace_back (bbox_inside_weight_t );
304
331
305
332
return loc_score_tgtlbl_gt;
306
333
}
@@ -318,6 +345,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
318
345
auto * score_index = context.Output <LoDTensor>(" ScoreIndex" );
319
346
auto * tgt_bbox = context.Output <LoDTensor>(" TargetBBox" );
320
347
auto * tgt_lbl = context.Output <LoDTensor>(" TargetLabel" );
348
+ auto * bbox_inside_weight = context.Output <LoDTensor>(" BBoxInsideWeight" );
321
349
322
350
PADDLE_ENFORCE_EQ (gt_boxes->lod ().size (), 1UL ,
323
351
" RpnTargetAssignOp gt_boxes needs 1 level of LoD" );
@@ -340,7 +368,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
340
368
score_index->mutable_data <int >({max_num}, place);
341
369
tgt_bbox->mutable_data <T>({max_num, 4 }, place);
342
370
tgt_lbl->mutable_data <int >({max_num, 1 }, place);
343
-
371
+ bbox_inside_weight-> mutable_data <T>({max_num, 4 }, place);
344
372
auto & dev_ctx = context.device_context <platform::CPUDeviceContext>();
345
373
346
374
std::random_device rnd;
@@ -394,6 +422,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
394
422
Tensor sampled_score_index = loc_score_tgtlbl_gt[1 ];
395
423
Tensor sampled_tgtlbl = loc_score_tgtlbl_gt[2 ];
396
424
Tensor sampled_gt_index = loc_score_tgtlbl_gt[3 ];
425
+ Tensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4 ];
397
426
398
427
int loc_num = sampled_loc_index.dims ()[0 ];
399
428
int score_num = sampled_score_index.dims ()[0 ];
@@ -432,6 +461,8 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
432
461
AppendRpns<int >(score_index, total_score_num, &sampled_score_index_unmap);
433
462
AppendRpns<T>(tgt_bbox, total_loc_num * 4 , &sampled_tgt_bbox);
434
463
AppendRpns<int >(tgt_lbl, total_score_num, &sampled_tgtlbl);
464
+ AppendRpns<T>(bbox_inside_weight, total_loc_num * 4 ,
465
+ &sampled_bbox_inside_weight);
435
466
total_loc_num += loc_num;
436
467
437
468
total_score_num += score_num;
@@ -448,10 +479,12 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
448
479
score_index->set_lod (loc_score);
449
480
tgt_bbox->set_lod (lod_loc);
450
481
tgt_lbl->set_lod (loc_score);
482
+ bbox_inside_weight->set_lod (lod_loc);
451
483
loc_index->Resize ({total_loc_num});
452
484
score_index->Resize ({total_score_num});
453
485
tgt_bbox->Resize ({total_loc_num, 4 });
454
486
tgt_lbl->Resize ({total_score_num, 1 });
487
+ bbox_inside_weight->Resize ({total_loc_num, 4 });
455
488
}
456
489
};
457
490
@@ -514,6 +547,9 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
514
547
" TargetLabel" ,
515
548
" (Tensor<int>), The target labels of each anchor with shape "
516
549
" [F + B, 1], F and B are sampled foreground and backgroud number." );
550
+ AddOutput (" BBoxInsideWeight" ,
551
+ " (Tensor), The bbox inside weight with shape "
552
+ " [F, 4], F is the sampled foreground number." );
517
553
AddComment (R"DOC(
518
554
This operator can be, for a given set of ground truth bboxes and the
519
555
anchors, to assign classification and regression targets to each prediction.
0 commit comments