@@ -12,10 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
See the License for the specific language governing permissions and
13
13
limitations under the License. */
14
14
15
+ #include < cmath>
16
+ #include < cstring>
15
17
#include < string>
16
18
#include < vector>
17
19
#include " paddle/fluid/framework/op_registry.h"
18
- #include " paddle/fluid/framework/var_type .h"
20
+ #include " paddle/fluid/operators/detail/safe_ref .h"
19
21
#include " paddle/fluid/operators/gather.h"
20
22
#include " paddle/fluid/operators/math/math_function.h"
21
23
@@ -25,21 +27,17 @@ namespace operators {
25
27
using Tensor = framework::Tensor;
26
28
using LoDTensor = framework::LoDTensor;
27
29
28
- struct AppendProposalsFunctor {
29
- LoDTensor *out_;
30
- int64_t offset_;
31
- Tensor *to_add_;
30
+ static const double kBBoxClipDefault = std::log(1000.0 / 16.0 );
32
31
33
- AppendProposalsFunctor (LoDTensor *out, int64_t offset, Tensor *to_add)
34
- : out_(out), offset_(offset), to_add_(to_add) {}
35
-
36
- template <typename T>
37
- void apply () const {
38
- auto *out_data = out_->data <T>();
39
- auto *to_add_data = to_add_->data <T>();
40
- memcpy (out_data + offset_, to_add_data, to_add_->numel () * sizeof (T));
41
- }
42
- };
32
+ static void AppendProposals (Tensor *dst, int64_t offset, const Tensor &src) {
33
+ auto *out_data = dst->data <void >();
34
+ auto *to_add_data = src.data <void >();
35
+ size_t size_of_t = framework::SizeOfType (src.type ());
36
+ offset *= size_of_t ;
37
+ std::memcpy (
38
+ reinterpret_cast <void *>(reinterpret_cast <uintptr_t >(out_data) + offset),
39
+ to_add_data, src.numel () * size_of_t );
40
+ }
43
41
44
42
class GenerateProposalsOp : public framework ::OperatorWithKernel {
45
43
public:
@@ -75,8 +73,9 @@ class GenerateProposalsOp : public framework::OperatorWithKernel {
75
73
};
76
74
77
75
template <class T >
78
- void BoxCoder (const platform::DeviceContext &ctx, Tensor *all_anchors,
79
- Tensor *bbox_deltas, Tensor *variances, Tensor *proposals) {
76
+ static inline void BoxCoder (const platform::DeviceContext &ctx,
77
+ Tensor *all_anchors, Tensor *bbox_deltas,
78
+ Tensor *variances, Tensor *proposals) {
80
79
T *proposals_data = proposals->mutable_data <T>(ctx.GetPlace ());
81
80
82
81
int64_t row = all_anchors->dims ()[0 ];
@@ -108,22 +107,22 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors,
108
107
anchor_center_y;
109
108
bbox_width = std::exp (std::min<T>(variances_data[i * len + 2 ] *
110
109
bbox_deltas_data[i * len + 2 ],
111
- std::log ( 1000.0 / 16.0 ) )) *
110
+ kBBoxClipDefault )) *
112
111
anchor_width;
113
112
bbox_height = std::exp (std::min<T>(variances_data[i * len + 3 ] *
114
113
bbox_deltas_data[i * len + 3 ],
115
- std::log ( 1000.0 / 16.0 ) )) *
114
+ kBBoxClipDefault )) *
116
115
anchor_height;
117
116
} else {
118
117
bbox_center_x =
119
118
bbox_deltas_data[i * len] * anchor_width + anchor_center_x;
120
119
bbox_center_y =
121
120
bbox_deltas_data[i * len + 1 ] * anchor_height + anchor_center_y;
122
121
bbox_width = std::exp (std::min<T>(bbox_deltas_data[i * len + 2 ],
123
- std::log ( 1000.0 / 16.0 ) )) *
122
+ kBBoxClipDefault )) *
124
123
anchor_width;
125
124
bbox_height = std::exp (std::min<T>(bbox_deltas_data[i * len + 3 ],
126
- std::log ( 1000.0 / 16.0 ) )) *
125
+ kBBoxClipDefault )) *
127
126
anchor_height;
128
127
}
129
128
@@ -136,30 +135,32 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors,
136
135
}
137
136
138
137
template <class T >
139
- void ClipTiledBoxes (const platform::DeviceContext &ctx, const Tensor &im_info ,
140
- Tensor *boxes) {
138
+ static inline void ClipTiledBoxes (const platform::DeviceContext &ctx,
139
+ const Tensor &im_info, Tensor *boxes) {
141
140
T *boxes_data = boxes->mutable_data <T>(ctx.GetPlace ());
142
141
const T *im_info_data = im_info.data <T>();
142
+ T zero (0 );
143
143
for (int64_t i = 0 ; i < boxes->numel (); ++i) {
144
144
if (i % 4 == 0 ) {
145
145
boxes_data[i] =
146
- std::max (std::min (boxes_data[i], im_info_data[1 ] - 1 ), 0 . 0f );
146
+ std::max (std::min (boxes_data[i], im_info_data[1 ] - 1 ), zero );
147
147
} else if (i % 4 == 1 ) {
148
148
boxes_data[i] =
149
- std::max (std::min (boxes_data[i], im_info_data[0 ] - 1 ), 0 . 0f );
149
+ std::max (std::min (boxes_data[i], im_info_data[0 ] - 1 ), zero );
150
150
} else if (i % 4 == 2 ) {
151
151
boxes_data[i] =
152
- std::max (std::min (boxes_data[i], im_info_data[1 ] - 1 ), 0 . 0f );
152
+ std::max (std::min (boxes_data[i], im_info_data[1 ] - 1 ), zero );
153
153
} else {
154
154
boxes_data[i] =
155
- std::max (std::min (boxes_data[i], im_info_data[0 ] - 1 ), 0 . 0f );
155
+ std::max (std::min (boxes_data[i], im_info_data[0 ] - 1 ), zero );
156
156
}
157
157
}
158
158
}
159
159
160
160
template <class T >
161
- void FilterBoxes (const platform::DeviceContext &ctx, Tensor *boxes,
162
- float min_size, const Tensor &im_info, Tensor *keep) {
161
+ static inline void FilterBoxes (const platform::DeviceContext &ctx,
162
+ Tensor *boxes, float min_size,
163
+ const Tensor &im_info, Tensor *keep) {
163
164
const T *im_info_data = im_info.data <T>();
164
165
T *boxes_data = boxes->mutable_data <T>(ctx.GetPlace ());
165
166
T im_scale = im_info_data[2 ];
@@ -185,24 +186,24 @@ void FilterBoxes(const platform::DeviceContext &ctx, Tensor *boxes,
185
186
keep->Resize ({keep_len});
186
187
}
187
188
188
- bool SortScorePairDescend (const std::pair<float , int > &pair1,
189
- const std::pair<float , int > &pair2) {
190
- return pair1.first > pair2.first ;
191
- }
192
-
193
189
template <class T >
194
- void GetMaxScoreIndex (const std::vector<T> &scores,
195
- std::vector<std::pair<T, int >> *sorted_indices) {
190
+ static inline std::vector<std::pair<T, int >> GetSortedScoreIndex (
191
+ const std::vector<T> &scores) {
192
+ std::vector<std::pair<T, int >> sorted_indices;
193
+ sorted_indices.reserve (scores.size ());
196
194
for (size_t i = 0 ; i < scores.size (); ++i) {
197
- sorted_indices-> push_back ( std::make_pair ( scores[i], i) );
195
+ sorted_indices. emplace_back ( scores[i], i);
198
196
}
199
197
// Sort the score pair according to the scores in descending order
200
- std::stable_sort (sorted_indices->begin (), sorted_indices->end (),
201
- SortScorePairDescend);
198
+ std::stable_sort (sorted_indices.begin (), sorted_indices.end (),
199
+ [](const std::pair<T, int > &a, const std::pair<T, int > &b) {
200
+ return a.first < b.first ;
201
+ });
202
+ return sorted_indices;
202
203
}
203
204
204
205
template <class T >
205
- T BBoxArea (const T *box, const bool normalized) {
206
+ static inline T BBoxArea (const T *box, bool normalized) {
206
207
if (box[2 ] < box[0 ] || box[3 ] < box[1 ]) {
207
208
// If coordinate values are is invalid
208
209
// (e.g. xmax < xmin or ymax < ymin), return 0.
@@ -220,7 +221,7 @@ T BBoxArea(const T *box, const bool normalized) {
220
221
}
221
222
222
223
template <class T >
223
- T JaccardOverlap (const T *box1, const T *box2, const bool normalized) {
224
+ static inline T JaccardOverlap (const T *box1, const T *box2, bool normalized) {
224
225
if (box2[0 ] > box1[2 ] || box2[2 ] < box1[0 ] || box2[1 ] > box1[3 ] ||
225
226
box2[3 ] < box1[1 ]) {
226
227
return static_cast <T>(0 .);
@@ -229,39 +230,49 @@ T JaccardOverlap(const T *box1, const T *box2, const bool normalized) {
229
230
const T inter_ymin = std::max (box1[1 ], box2[1 ]);
230
231
const T inter_xmax = std::min (box1[2 ], box2[2 ]);
231
232
const T inter_ymax = std::min (box1[3 ], box2[3 ]);
232
- const T inter_w = std::max (0 . 0f , inter_xmax - inter_xmin + 1 );
233
- const T inter_h = std::max (0 . 0f , inter_ymax - inter_ymin + 1 );
233
+ const T inter_w = std::max (T ( 0 ) , inter_xmax - inter_xmin + 1 );
234
+ const T inter_h = std::max (T ( 0 ) , inter_ymax - inter_ymin + 1 );
234
235
const T inter_area = inter_w * inter_h;
235
236
const T bbox1_area = BBoxArea<T>(box1, normalized);
236
237
const T bbox2_area = BBoxArea<T>(box2, normalized);
237
238
return inter_area / (bbox1_area + bbox2_area - inter_area);
238
239
}
239
240
}
240
241
242
+ template <typename T>
243
+ static inline Tensor VectorToTensor (const std::vector<T> &selected_indices,
244
+ int selected_num) {
245
+ Tensor keep_nms;
246
+ keep_nms.Resize ({selected_num});
247
+ auto *keep_data = keep_nms.mutable_data <T>(platform::CPUPlace ());
248
+ for (int i = 0 ; i < selected_num; ++i) {
249
+ keep_data[i] = selected_indices[i];
250
+ }
251
+ return keep_nms;
252
+ }
253
+
241
254
template <class T >
242
- Tensor NMS (const platform::DeviceContext &ctx, Tensor *bbox, Tensor *scores ,
243
- const T nms_threshold, const float eta) {
255
+ static inline Tensor NMS (const platform::DeviceContext &ctx, Tensor *bbox,
256
+ Tensor *scores, T nms_threshold, float eta) {
244
257
PADDLE_ENFORCE_NOT_NULL (bbox);
245
258
int64_t num_boxes = bbox->dims ()[0 ];
246
259
// 4: [xmin ymin xmax ymax]
247
260
int64_t box_size = bbox->dims ()[1 ];
248
261
249
262
std::vector<T> scores_data (num_boxes);
250
263
std::copy_n (scores->data <T>(), num_boxes, scores_data.begin ());
251
- std::vector<std::pair<T, int >> sorted_indices;
252
- GetMaxScoreIndex <T>(scores_data, &sorted_indices );
264
+ std::vector<std::pair<T, int >> sorted_indices =
265
+ GetSortedScoreIndex <T>(scores_data);
253
266
254
267
std::vector<int > selected_indices;
255
268
int selected_num = 0 ;
256
269
T adaptive_threshold = nms_threshold;
257
270
const T *bbox_data = bbox->data <T>();
258
- bool flag;
259
271
while (sorted_indices.size () != 0 ) {
260
- int idx = sorted_indices.front ().second ;
261
- flag = true ;
262
- for (size_t k = 0 ; k < selected_indices. size (); ++k ) {
272
+ int idx = sorted_indices.back ().second ;
273
+ bool flag = true ;
274
+ for (int kept_idx : selected_indices) {
263
275
if (flag) {
264
- const int kept_idx = selected_indices[k];
265
276
T overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
266
277
bbox_data + kept_idx * box_size, false );
267
278
flag = (overlap <= adaptive_threshold);
@@ -271,32 +282,29 @@ Tensor NMS(const platform::DeviceContext &ctx, Tensor *bbox, Tensor *scores,
271
282
}
272
283
if (flag) {
273
284
selected_indices.push_back (idx);
274
- selected_num++ ;
285
+ ++selected_num ;
275
286
}
276
- sorted_indices.erase (sorted_indices.begin ());
287
+ sorted_indices.erase (sorted_indices.end ());
277
288
if (flag && eta < 1 && adaptive_threshold > 0.5 ) {
278
289
adaptive_threshold *= eta;
279
290
}
280
291
}
281
- Tensor keep_nms;
282
- keep_nms.Resize ({selected_num});
283
- int *keep_data = keep_nms.mutable_data <int >(ctx.GetPlace ());
284
- for (int i = 0 ; i < selected_num; ++i) {
285
- keep_data[i] = selected_indices[i];
286
- }
287
-
288
- return keep_nms;
292
+ return VectorToTensor (selected_indices, selected_num);
289
293
}
290
294
291
- template <typename DeviceContext, typename T>
295
+ template <typename T>
292
296
class GenerateProposalsKernel : public framework ::OpKernel<T> {
293
297
public:
294
298
void Compute (const framework::ExecutionContext &context) const override {
295
299
auto *scores = context.Input <Tensor>(" Scores" );
296
300
auto *bbox_deltas = context.Input <Tensor>(" BboxDeltas" );
297
301
auto *im_info = context.Input <Tensor>(" ImInfo" );
298
- auto *anchors = context.Input <Tensor>(" Anchors" );
299
- auto *variances = context.Input <Tensor>(" Variances" );
302
+ auto anchors = detail::Ref (context.Input <Tensor>(" Anchors" ),
303
+ " Cannot find input Anchors(%s) in scope" ,
304
+ context.Inputs (" Anchors" )[0 ]);
305
+ auto variances = detail::Ref (context.Input <Tensor>(" Variances" ),
306
+ " Cannot find input Variances(%s) in scope" ,
307
+ context.Inputs (" Variances" )[0 ]);
300
308
301
309
auto *rpn_rois = context.Output <LoDTensor>(" RpnRois" );
302
310
auto *rpn_roi_probs = context.Output <LoDTensor>(" RpnRoiProbs" );
@@ -307,15 +315,16 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
307
315
float min_size = context.Attr <float >(" min_size" );
308
316
float eta = context.Attr <float >(" eta" );
309
317
310
- auto &dev_ctx = context.template device_context <DeviceContext>();
318
+ auto &dev_ctx =
319
+ context.template device_context <platform::CPUDeviceContext>();
311
320
312
- auto scores_dim = scores->dims ();
321
+ auto & scores_dim = scores->dims ();
313
322
int64_t num = scores_dim[0 ];
314
323
int64_t c_score = scores_dim[1 ];
315
324
int64_t h_score = scores_dim[2 ];
316
325
int64_t w_score = scores_dim[3 ];
317
326
318
- auto bbox_dim = bbox_deltas->dims ();
327
+ auto & bbox_dim = bbox_deltas->dims ();
319
328
int64_t c_bbox = bbox_dim[1 ];
320
329
int64_t h_bbox = bbox_dim[2 ];
321
330
int64_t w_bbox = bbox_dim[3 ];
@@ -330,17 +339,17 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
330
339
scores_swap.mutable_data <T>({num, h_score, w_score, c_score},
331
340
dev_ctx.GetPlace ());
332
341
333
- math::Transpose<DeviceContext , T, 4 > trans;
342
+ math::Transpose<platform::CPUDeviceContext , T, 4 > trans;
334
343
std::vector<int > axis = {0 , 2 , 3 , 1 };
335
344
trans (dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis);
336
345
trans (dev_ctx, *scores, &scores_swap, axis);
337
346
338
347
framework::LoD lod;
339
- std::vector< size_t > lod0 ( 1 , 0 );
340
- Tensor *anchor = const_cast <framework::Tensor *>(anchors) ;
341
- anchor-> Resize ({anchors-> numel () / 4 , 4 } );
342
- Tensor *var = const_cast <framework::Tensor *>(variances );
343
- var-> Resize ({var-> numel () / 4 , 4 });
348
+ lod. resize ( 1 );
349
+ auto &lod0 = lod[ 0 ] ;
350
+ lod0. push_back ( 0 );
351
+ anchors. Resize ({anchors. numel () / 4 , 4 } );
352
+ variances. Resize ({variances. numel () / 4 , 4 });
344
353
345
354
int64_t num_proposals = 0 ;
346
355
for (int64_t i = 0 ; i < num; ++i) {
@@ -352,32 +361,25 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
352
361
scores_slice.Resize ({h_score * w_score * c_score, 1 });
353
362
354
363
std::pair<Tensor, Tensor> tensor_pair =
355
- ProposalForOneImage (dev_ctx, im_info_slice, *anchor, *var ,
364
+ ProposalForOneImage (dev_ctx, im_info_slice, anchors, variances ,
356
365
bbox_deltas_slice, scores_slice, pre_nms_top_n,
357
366
post_nms_top_n, nms_thresh, min_size, eta);
358
- Tensor proposals = tensor_pair.first ;
359
- Tensor scores = tensor_pair.second ;
360
-
361
- framework::VisitDataType (
362
- framework::ToDataType (rpn_rois->type ()),
363
- AppendProposalsFunctor (rpn_rois, 4 * num_proposals, &proposals));
364
- framework::VisitDataType (
365
- framework::ToDataType (rpn_roi_probs->type ()),
366
- AppendProposalsFunctor (rpn_roi_probs, num_proposals, &scores));
367
+ Tensor &proposals = tensor_pair.first ;
368
+ Tensor &scores = tensor_pair.second ;
367
369
370
+ AppendProposals (rpn_rois, 4 * num_proposals, proposals);
371
+ AppendProposals (rpn_roi_probs, num_proposals, scores);
368
372
num_proposals += proposals.dims ()[0 ];
369
- lod0.emplace_back (num_proposals);
373
+ lod0.push_back (num_proposals);
370
374
}
371
-
372
- lod.emplace_back (lod0);
373
375
rpn_rois->set_lod (lod);
374
376
rpn_roi_probs->set_lod (lod);
375
377
rpn_rois->Resize ({num_proposals, 4 });
376
378
rpn_roi_probs->Resize ({num_proposals, 1 });
377
379
}
378
380
379
381
std::pair<Tensor, Tensor> ProposalForOneImage (
380
- const DeviceContext &ctx, const Tensor &im_info_slice,
382
+ const platform::CPUDeviceContext &ctx, const Tensor &im_info_slice,
381
383
const Tensor &anchors, const Tensor &variances,
382
384
const Tensor &bbox_deltas_slice, // [M, 4]
383
385
const Tensor &scores_slice, // [N, 1]
@@ -392,10 +394,9 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
392
394
for (int i = 0 ; i < scores_slice.numel (); ++i) {
393
395
index[i] = i;
394
396
}
395
- std::function<bool (const int64_t &, const int64_t &)> compare =
396
- [scores_data](const int64_t &i, const int64_t &j) {
397
- return scores_data[i] > scores_data[j];
398
- };
397
+ auto compare = [scores_data](const int64_t &i, const int64_t &j) {
398
+ return scores_data[i] > scores_data[j];
399
+ };
399
400
400
401
if (pre_nms_top_n <= 0 || pre_nms_top_n >= scores_slice.numel ()) {
401
402
std::sort (index, index + scores_slice.numel (), compare);
@@ -469,12 +470,12 @@ class GenerateProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
469
470
Generate Proposals OP
470
471
471
472
This operator proposes rois according to each box with their probability to be a foreground object and
472
- the box can be calculated by anchors. Bbox_deltais and scores are the output of RPN. Final proposals
473
+ the box can be calculated by anchors. Bbox_details and scores are the output of RPN. Final proposals
473
474
could be used to train detection net.
474
475
475
476
Scores is the probability for each box to be an object. In format of (N, A, H, W) where N is batch size, A is number
476
477
of anchors, H and W are height and width of the feature map.
477
- BboxDeltas is the differece between predicted box locatoin and anchor location. In format of (N, 4*A, H, W)
478
+ BboxDeltas is the differece between predicted box location and anchor location. In format of (N, 4*A, H, W)
478
479
479
480
For generating proposals, this operator transposes and resizes scores and bbox_deltas in size of (H*W*A, 1) and (H*W*A, 4) and
480
481
calculate box locations as proposals candidates. Then clip boxes to image and remove predicted boxes with small area.
@@ -490,6 +491,5 @@ namespace ops = paddle::operators;
490
491
REGISTER_OPERATOR (generate_proposals, ops::GenerateProposalsOp,
491
492
ops::GenerateProposalsOpMaker,
492
493
paddle::framework::EmptyGradOpMaker);
493
- REGISTER_OP_CPU_KERNEL (
494
- generate_proposals,
495
- ops::GenerateProposalsKernel<paddle::platform::CPUDeviceContext, float >);
494
+ REGISTER_OP_CPU_KERNEL (generate_proposals, ops::GenerateProposalsKernel<float >,
495
+ ops::GenerateProposalsKernel<double >);
0 commit comments