Skip to content

Commit 50aa6ba

Browse files
Noplzqingqing01
authored andcommitted
add rpn target assign op (#11449)
* Add region proposal network (RPN) target assign operator and Python API for Faster-RCNN.
1 parent 6a749d1 commit 50aa6ba

File tree

4 files changed

+517
-2
lines changed

4 files changed

+517
-2
lines changed

paddle/fluid/operators/detection/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ anchor_generator_op.cu)
2727
detection_library(target_assign_op SRCS target_assign_op.cc
2828
target_assign_op.cu)
2929
detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
30-
polygon_box_transform_op.cu)
30+
polygon_box_transform_op.cu)
31+
detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)
3132

3233
# Export local libraries to parent
3334
set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE)
Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include <random>
16+
#include "paddle/fluid/framework/op_registry.h"
17+
#include "paddle/fluid/operators/math/math_function.h"
18+
19+
namespace paddle {
20+
namespace operators {
21+
22+
using Tensor = framework::Tensor;
23+
using LoDTensor = framework::LoDTensor;
24+
template <typename T, int MajorType = Eigen::RowMajor,
25+
typename IndexType = Eigen::DenseIndex>
26+
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
27+
28+
class RpnTargetAssignOp : public framework::OperatorWithKernel {
29+
public:
30+
using framework::OperatorWithKernel::OperatorWithKernel;
31+
32+
void InferShape(framework::InferShapeContext* ctx) const override {
33+
PADDLE_ENFORCE(ctx->HasInput("DistMat"),
34+
"Input(DistMat) of RpnTargetAssignOp should not be null");
35+
36+
PADDLE_ENFORCE(
37+
ctx->HasOutput("LocationIndex"),
38+
"Output(LocationIndex) of RpnTargetAssignOp should not be null");
39+
PADDLE_ENFORCE(
40+
ctx->HasOutput("ScoreIndex"),
41+
"Output(ScoreIndex) of RpnTargetAssignOp should not be null");
42+
PADDLE_ENFORCE(
43+
ctx->HasOutput("TargetLabel"),
44+
"Output(TargetLabel) of RpnTargetAssignOp should not be null");
45+
46+
auto in_dims = ctx->GetInputDim("DistMat");
47+
PADDLE_ENFORCE_EQ(in_dims.size(), 2,
48+
"The rank of Input(DistMat) must be 2.");
49+
}
50+
};
51+
52+
template <typename T>
53+
class RpnTargetAssignKernel : public framework::OpKernel<T> {
54+
public:
55+
void ScoreAssign(const T* dist_data, const Tensor& anchor_to_gt_max,
56+
const int row, const int col, const float pos_threshold,
57+
const float neg_threshold, int64_t* target_label_data,
58+
std::vector<int>* fg_inds, std::vector<int>* bg_inds) const {
59+
int fg_offset = fg_inds->size();
60+
int bg_offset = bg_inds->size();
61+
for (int64_t i = 0; i < row; ++i) {
62+
const T* v = dist_data + i * col;
63+
T max_dist = *std::max_element(v, v + col);
64+
for (int64_t j = 0; j < col; ++j) {
65+
T val = dist_data[i * col + j];
66+
if (val == max_dist) target_label_data[j] = 1;
67+
}
68+
}
69+
70+
// Pick the fg/bg and count the number
71+
for (int64_t j = 0; j < col; ++j) {
72+
if (anchor_to_gt_max.data<T>()[j] > pos_threshold) {
73+
target_label_data[j] = 1;
74+
} else if (anchor_to_gt_max.data<T>()[j] < neg_threshold) {
75+
target_label_data[j] = 0;
76+
}
77+
if (target_label_data[j] == 1) {
78+
fg_inds->push_back(fg_offset + j);
79+
} else if (target_label_data[j] == 0) {
80+
bg_inds->push_back(bg_offset + j);
81+
}
82+
}
83+
}
84+
85+
void ReservoirSampling(const int num, const int offset,
86+
std::minstd_rand engine,
87+
std::vector<int>* inds) const {
88+
std::uniform_real_distribution<float> uniform(0, 1);
89+
if (inds->size() > num) {
90+
for (int i = num; i < inds->size(); ++i) {
91+
int rng_ind = std::floor(uniform(engine) * i);
92+
if (rng_ind < num)
93+
std::iter_swap(inds->begin() + rng_ind + offset,
94+
inds->begin() + i + offset);
95+
}
96+
}
97+
}
98+
99+
void RpnTargetAssign(const framework::ExecutionContext& ctx,
100+
const Tensor& dist, const float pos_threshold,
101+
const float neg_threshold, const int rpn_batch_size,
102+
const int fg_num, std::minstd_rand engine,
103+
std::vector<int>* fg_inds, std::vector<int>* bg_inds,
104+
int64_t* target_label_data) const {
105+
auto* dist_data = dist.data<T>();
106+
int64_t row = dist.dims()[0];
107+
int64_t col = dist.dims()[1];
108+
int fg_offset = fg_inds->size();
109+
int bg_offset = bg_inds->size();
110+
111+
// Calculate the max IoU between anchors and gt boxes
112+
Tensor anchor_to_gt_max;
113+
anchor_to_gt_max.mutable_data<T>(
114+
framework::make_ddim({static_cast<int64_t>(col), 1}),
115+
platform::CPUPlace());
116+
auto& place = *ctx.template device_context<platform::CPUDeviceContext>()
117+
.eigen_device();
118+
auto x = EigenMatrix<T>::From(dist);
119+
auto x_col_max = EigenMatrix<T>::From(anchor_to_gt_max);
120+
x_col_max.device(place) =
121+
x.maximum(Eigen::DSizes<int, 1>(0))
122+
.reshape(Eigen::DSizes<int, 2>(static_cast<int64_t>(col), 1));
123+
// Follow the Faster RCNN's implementation
124+
ScoreAssign(dist_data, anchor_to_gt_max, row, col, pos_threshold,
125+
neg_threshold, target_label_data, fg_inds, bg_inds);
126+
// Reservoir Sampling
127+
ReservoirSampling(fg_num, fg_offset, engine, fg_inds);
128+
int bg_num = rpn_batch_size - fg_inds->size();
129+
ReservoirSampling(bg_num, bg_offset, engine, bg_inds);
130+
}
131+
132+
void Compute(const framework::ExecutionContext& context) const override {
133+
auto* dist = context.Input<LoDTensor>("DistMat");
134+
auto* loc_index = context.Output<Tensor>("LocationIndex");
135+
auto* score_index = context.Output<Tensor>("ScoreIndex");
136+
auto* tgt_lbl = context.Output<Tensor>("TargetLabel");
137+
138+
auto col = dist->dims()[1];
139+
int64_t n = dist->lod().size() == 0UL
140+
? 1
141+
: static_cast<int64_t>(dist->lod().back().size() - 1);
142+
if (dist->lod().size()) {
143+
PADDLE_ENFORCE_EQ(dist->lod().size(), 1UL,
144+
"Only support 1 level of LoD.");
145+
}
146+
int rpn_batch_size = context.Attr<int>("rpn_batch_size_per_im");
147+
float pos_threshold = context.Attr<float>("rpn_positive_overlap");
148+
float neg_threshold = context.Attr<float>("rpn_negative_overlap");
149+
float fg_fraction = context.Attr<float>("fg_fraction");
150+
151+
int fg_num = static_cast<int>(rpn_batch_size * fg_fraction);
152+
153+
int64_t* target_label_data =
154+
tgt_lbl->mutable_data<int64_t>({n * col, 1}, context.GetPlace());
155+
156+
auto& dev_ctx = context.device_context<platform::CPUDeviceContext>();
157+
math::SetConstant<platform::CPUDeviceContext, int64_t> iset;
158+
iset(dev_ctx, tgt_lbl, static_cast<int>(-1));
159+
160+
std::vector<int> fg_inds;
161+
std::vector<int> bg_inds;
162+
std::random_device rnd;
163+
std::minstd_rand engine;
164+
int seed =
165+
context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd();
166+
engine.seed(seed);
167+
168+
if (n == 1) {
169+
RpnTargetAssign(context, *dist, pos_threshold, neg_threshold,
170+
rpn_batch_size, fg_num, engine, &fg_inds, &bg_inds,
171+
target_label_data);
172+
} else {
173+
auto lod = dist->lod().back();
174+
for (size_t i = 0; i < lod.size() - 1; ++i) {
175+
Tensor one_ins = dist->Slice(lod[i], lod[i + 1]);
176+
RpnTargetAssign(context, one_ins, pos_threshold, neg_threshold,
177+
rpn_batch_size, fg_num, engine, &fg_inds, &bg_inds,
178+
target_label_data + i * col);
179+
}
180+
}
181+
int* loc_index_data = loc_index->mutable_data<int>(
182+
{static_cast<int>(fg_inds.size())}, context.GetPlace());
183+
int* score_index_data = score_index->mutable_data<int>(
184+
{static_cast<int>(fg_inds.size() + bg_inds.size())},
185+
context.GetPlace());
186+
memcpy(loc_index_data, reinterpret_cast<int*>(&fg_inds[0]),
187+
fg_inds.size() * sizeof(int));
188+
memcpy(score_index_data, reinterpret_cast<int*>(&fg_inds[0]),
189+
fg_inds.size() * sizeof(int));
190+
memcpy(score_index_data + fg_inds.size(),
191+
reinterpret_cast<int*>(&bg_inds[0]), bg_inds.size() * sizeof(int));
192+
}
193+
};
194+
195+
class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
196+
public:
197+
void Make() override {
198+
AddInput(
199+
"DistMat",
200+
"(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape "
201+
"[K, M]. It is pair-wise distance matrix between the entities "
202+
"represented by each row and each column. For example, assumed one "
203+
"entity is A with shape [K], another entity is B with shape [M]. The "
204+
"DistMat[i][j] is the distance between A[i] and B[j]. The bigger "
205+
"the distance is, the better macthing the pairs are. Please note, "
206+
"This tensor can contain LoD information to represent a batch of "
207+
"inputs. One instance of this batch can contain different numbers of "
208+
"entities.");
209+
AddAttr<float>(
210+
"rpn_positive_overlap",
211+
"Minimum overlap required between an anchor and ground-truth "
212+
"box for the (anchor, gt box) pair to be a positive example.")
213+
.SetDefault(0.7);
214+
AddAttr<float>(
215+
"rpn_negative_overlap",
216+
"Maximum overlap allowed between an anchor and ground-truth "
217+
"box for the (anchor, gt box) pair to be a negative examples.")
218+
.SetDefault(0.3);
219+
AddAttr<float>(
220+
"fg_fraction",
221+
"Target fraction of RoI minibatch that "
222+
"is labeled foreground (i.e. class > 0), 0-th class is background.")
223+
.SetDefault(0.25);
224+
AddAttr<int>("rpn_batch_size_per_im",
225+
"Total number of RPN examples per image.")
226+
.SetDefault(256);
227+
AddAttr<bool>("fix_seed",
228+
"A flag indicating whether to use a fixed seed to generate "
229+
"random mask. NOTE: DO NOT set this flag to true in "
230+
"training. Setting this flag to true is only useful in "
231+
"unittest.")
232+
.SetDefault(false);
233+
AddAttr<int>("seed", "RpnTargetAssign random seed.").SetDefault(0);
234+
AddOutput(
235+
"LocationIndex",
236+
"(Tensor), The indexes of foreground anchors in all RPN anchors, the "
237+
"shape of the LocationIndex is [F], F depends on the value of input "
238+
"tensor and attributes.");
239+
AddOutput(
240+
"ScoreIndex",
241+
"(Tensor), The indexes of foreground and background anchors in all "
242+
"RPN anchors(The rest anchors are ignored). The shape of the "
243+
"ScoreIndex is [F + B], F and B depend on the value of input "
244+
"tensor and attributes.");
245+
AddOutput("TargetLabel",
246+
"(Tensor<int64_t>), The target labels of each anchor with shape "
247+
"[K * M, 1], "
248+
"K and M is the same as they are in DistMat.");
249+
AddComment(R"DOC(
250+
This operator can be, for given the IoU between the ground truth bboxes and the
251+
anchors, to assign classification and regression targets to each prediction.
252+
The Score index and LocationIndex will be generated according to the DistMat.
253+
The rest anchors would not contibute to the RPN training loss
254+
255+
ScoreIndex is composed of foreground anchor indexes(positive labels) and
256+
background anchor indexes(negative labels). LocationIndex is exactly same
257+
as the foreground anchor indexes since we can not assign regression target to
258+
the background anchors.
259+
260+
The classification targets(TargetLabel) is a binary class label (of being
261+
an object or not). Following the paper of Faster-RCNN, the positive labels
262+
are two kinds of anchors: (i) the anchor/anchors with the highest IoU
263+
overlap with a ground-truth box, or (ii) an anchor that has an IoU overlap
264+
higher than rpn_positive_overlap(0.7) with any ground-truth box. Note that
265+
a single ground-truth box may assign positive labels to multiple anchors.
266+
A non-positive anchor is when its IoU ratio is lower than rpn_negative_overlap
267+
(0.3) for all ground-truth boxes. Anchors that are neither positive nor
268+
negative do not contribute to the training objective.
269+
270+
)DOC");
271+
}
272+
};
273+
274+
} // namespace operators
275+
} // namespace paddle
276+
277+
namespace ops = paddle::operators;
278+
REGISTER_OPERATOR(rpn_target_assign, ops::RpnTargetAssignOp,
279+
ops::RpnTargetAssignOpMaker,
280+
paddle::framework::EmptyGradOpMaker);
281+
REGISTER_OP_CPU_KERNEL(rpn_target_assign, ops::RpnTargetAssignKernel<float>,
282+
ops::RpnTargetAssignKernel<double>);

0 commit comments

Comments
 (0)