Skip to content

Commit 666c94e

Browse files
Noplzqingqing01
authored andcommitted
Add default prior box var for box_coder_op (#11164)
* add normalize switch to box_coder_op * add default prior box var * update according to the review
1 parent c12c041 commit 666c94e

File tree

4 files changed

+127
-62
lines changed

4 files changed

+127
-62
lines changed

paddle/fluid/operators/detection/box_coder_op.cc

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,21 @@ class BoxCoderOp : public framework::OperatorWithKernel {
2222
void InferShape(framework::InferShapeContext *ctx) const override {
2323
PADDLE_ENFORCE(ctx->HasInput("PriorBox"),
2424
"Input(PriorBox) of BoxCoderOp should not be null.");
25-
PADDLE_ENFORCE(ctx->HasInput("PriorBoxVar"),
26-
"Input(PriorBoxVar) of BoxCoderOp should not be null.");
2725
PADDLE_ENFORCE(ctx->HasInput("TargetBox"),
2826
"Input(TargetBox) of BoxCoderOp should not be null.");
2927
PADDLE_ENFORCE(ctx->HasOutput("OutputBox"),
3028
"Output(OutputBox) of BoxCoderOp should not be null.");
3129

3230
auto prior_box_dims = ctx->GetInputDim("PriorBox");
33-
auto prior_box_var_dims = ctx->GetInputDim("PriorBoxVar");
3431
auto target_box_dims = ctx->GetInputDim("TargetBox");
3532

3633
PADDLE_ENFORCE_EQ(prior_box_dims.size(), 2,
3734
"The rank of Input of PriorBoxVar must be 2");
3835
PADDLE_ENFORCE_EQ(prior_box_dims[1], 4, "The shape of PriorBox is [N, 4]");
39-
PADDLE_ENFORCE_EQ(prior_box_dims, prior_box_var_dims);
36+
if (ctx->HasInput("PriorBoxVar")) {
37+
auto prior_box_var_dims = ctx->GetInputDim("PriorBoxVar");
38+
PADDLE_ENFORCE_EQ(prior_box_dims, prior_box_var_dims);
39+
}
4040

4141
auto code_type = GetBoxCodeType(ctx->Attrs().Get<std::string>("code_type"));
4242
if (code_type == BoxCodeType::kEncodeCenterSize) {
@@ -71,9 +71,11 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
7171
"of the coordinate system. [xmax, ymax] is the right bottom "
7272
"coordinate of the anchor box.");
7373
AddInput("PriorBoxVar",
74-
"(Tensor, default Tensor<float>) "
74+
"(Tensor, default Tensor<float>, optional) "
7575
"PriorBoxVar is a 2-D Tensor with shape [M, 4] holds M group "
76-
"of variance.");
76+
"of variance. PriorBoxVar will set all elements to 1 by "
77+
"default.")
78+
.AsDispensable();
7779
AddInput(
7880
"TargetBox",
7981
"(LoDTensor or Tensor) This input can be a 2-D LoDTensor with shape "
@@ -131,5 +133,6 @@ width and height.
131133
namespace ops = paddle::operators;
132134
REGISTER_OPERATOR(box_coder, ops::BoxCoderOp, ops::BoxCoderOpMaker,
133135
paddle::framework::EmptyGradOpMaker);
134-
REGISTER_OP_CPU_KERNEL(box_coder, ops::BoxCoderKernel<float>,
135-
ops::BoxCoderKernel<double>);
136+
REGISTER_OP_CPU_KERNEL(
137+
box_coder, ops::BoxCoderKernel<paddle::platform::CPUDeviceContext, float>,
138+
ops::BoxCoderKernel<paddle::platform::CPUDeviceContext, double>);

paddle/fluid/operators/detection/box_coder_op.cu

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,18 @@ __global__ void EncodeCenterSizeKernel(const T* prior_box_data,
4848
target_box_data[row_idx * len + 1] +
4949
(normalized == false);
5050

51-
output[idx * len] = (target_box_center_x - prior_box_center_x) /
52-
prior_box_width / prior_box_var_data[col_idx * len];
53-
output[idx * len + 1] = (target_box_center_y - prior_box_center_y) /
54-
prior_box_height /
55-
prior_box_var_data[col_idx * len + 1];
56-
output[idx * len + 2] = log(fabs(target_box_width / prior_box_width)) /
57-
prior_box_var_data[col_idx * len + 2];
58-
output[idx * len + 3] = log(fabs(target_box_height / prior_box_height)) /
59-
prior_box_var_data[col_idx * len + 3];
51+
output[idx * len] =
52+
(target_box_center_x - prior_box_center_x) / prior_box_width;
53+
output[idx * len + 1] =
54+
(target_box_center_y - prior_box_center_y) / prior_box_height;
55+
output[idx * len + 2] = log(fabs(target_box_width / prior_box_width));
56+
output[idx * len + 3] = log(fabs(target_box_height / prior_box_height));
57+
if (prior_box_var_data) {
58+
output[idx * len] /= prior_box_var_data[col_idx * len];
59+
output[idx * len + 1] /= prior_box_var_data[col_idx * len + 1];
60+
output[idx * len + 2] /= prior_box_var_data[col_idx * len + 2];
61+
output[idx * len + 3] /= prior_box_var_data[col_idx * len + 3];
62+
}
6063
}
6164
}
6265

@@ -79,20 +82,31 @@ __global__ void DecodeCenterSizeKernel(const T* prior_box_data,
7982
T prior_box_center_y = (prior_box_data[col_idx * len + 3] +
8083
prior_box_data[col_idx * len + 1]) /
8184
2;
82-
83-
T target_box_width = exp(prior_box_var_data[col_idx * len + 2] *
85+
T target_box_width, target_box_height;
86+
T target_box_center_x, target_box_center_y;
87+
if (prior_box_var_data) {
88+
target_box_width = exp(prior_box_var_data[col_idx * len + 2] *
8489
target_box_data[idx * len + 2]) *
8590
prior_box_width;
86-
T target_box_height = exp(prior_box_var_data[col_idx * len + 3] *
91+
target_box_height = exp(prior_box_var_data[col_idx * len + 3] *
8792
target_box_data[idx * len + 3]) *
8893
prior_box_height;
89-
T target_box_center_x = prior_box_var_data[col_idx * len] *
94+
target_box_center_x = prior_box_var_data[col_idx * len] *
9095
target_box_data[idx * len] * prior_box_width +
9196
prior_box_center_x;
92-
T target_box_center_y = prior_box_var_data[col_idx * len + 1] *
97+
target_box_center_y = prior_box_var_data[col_idx * len + 1] *
9398
target_box_data[idx * len + 1] *
9499
prior_box_height +
95100
prior_box_center_y;
101+
} else {
102+
target_box_width = exp(target_box_data[idx * len + 2]) * prior_box_width;
103+
target_box_height =
104+
exp(target_box_data[idx * len + 3]) * prior_box_height;
105+
target_box_center_x =
106+
target_box_data[idx * len] * prior_box_width + prior_box_center_x;
107+
target_box_center_y = target_box_data[idx * len + 1] * prior_box_height +
108+
prior_box_center_y;
109+
}
96110

97111
output[idx * len] = target_box_center_x - target_box_width / 2;
98112
output[idx * len + 1] = target_box_center_y - target_box_height / 2;
@@ -103,7 +117,7 @@ __global__ void DecodeCenterSizeKernel(const T* prior_box_data,
103117
}
104118
}
105119

106-
template <typename T>
120+
template <typename DeviceContext, typename T>
107121
class BoxCoderCUDAKernel : public framework::OpKernel<T> {
108122
public:
109123
void Compute(const framework::ExecutionContext& context) const override {
@@ -114,6 +128,11 @@ class BoxCoderCUDAKernel : public framework::OpKernel<T> {
114128
auto* target_box = context.Input<framework::LoDTensor>("TargetBox");
115129
auto* output_box = context.Output<framework::Tensor>("OutputBox");
116130

131+
const T* prior_box_data = prior_box->data<T>();
132+
const T* target_box_data = target_box->data<T>();
133+
const T* prior_box_var_data = nullptr;
134+
if (prior_box_var) prior_box_var_data = prior_box_var->data<T>();
135+
117136
if (target_box->lod().size()) {
118137
PADDLE_ENFORCE_EQ(target_box->lod().size(), 1,
119138
"Only support 1 level of LoD.");
@@ -125,10 +144,6 @@ class BoxCoderCUDAKernel : public framework::OpKernel<T> {
125144
int grid = (row * col + block - 1) / block;
126145
auto& device_ctx = context.cuda_device_context();
127146

128-
const T* prior_box_data = prior_box->data<T>();
129-
const T* prior_box_var_data = prior_box_var->data<T>();
130-
const T* target_box_data = target_box->data<T>();
131-
132147
output_box->mutable_data<T>({row, col, len}, context.GetPlace());
133148
T* output = output_box->data<T>();
134149

@@ -150,5 +165,7 @@ class BoxCoderCUDAKernel : public framework::OpKernel<T> {
150165
} // namespace paddle
151166

152167
namespace ops = paddle::operators;
153-
REGISTER_OP_CUDA_KERNEL(box_coder, ops::BoxCoderCUDAKernel<float>,
154-
ops::BoxCoderCUDAKernel<double>);
168+
REGISTER_OP_CUDA_KERNEL(
169+
box_coder,
170+
ops::BoxCoderCUDAKernel<paddle::platform::CUDADeviceContext, float>,
171+
ops::BoxCoderCUDAKernel<paddle::platform::CUDADeviceContext, double>);

paddle/fluid/operators/detection/box_coder_op.h

Lines changed: 52 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,20 @@ inline BoxCodeType GetBoxCodeType(const std::string& type) {
2828
PADDLE_THROW("Not support type %s.", type);
2929
}
3030

31-
template <typename T>
31+
template <typename DeviceContext, typename T>
3232
class BoxCoderKernel : public framework::OpKernel<T> {
3333
public:
34-
void EncodeCenterSize(const framework::Tensor& target_box,
35-
const framework::Tensor& prior_box,
36-
const framework::Tensor& prior_box_var,
34+
void EncodeCenterSize(const framework::Tensor* target_box,
35+
const framework::Tensor* prior_box,
36+
const framework::Tensor* prior_box_var,
3737
const bool normalized, T* output) const {
38-
int64_t row = target_box.dims()[0];
39-
int64_t col = prior_box.dims()[0];
40-
int64_t len = prior_box.dims()[1];
41-
auto* target_box_data = target_box.data<T>();
42-
auto* prior_box_data = prior_box.data<T>();
43-
auto* prior_box_var_data = prior_box_var.data<T>();
38+
int64_t row = target_box->dims()[0];
39+
int64_t col = prior_box->dims()[0];
40+
int64_t len = prior_box->dims()[1];
41+
auto* target_box_data = target_box->data<T>();
42+
auto* prior_box_data = prior_box->data<T>();
43+
const T* prior_box_var_data = nullptr;
44+
if (prior_box_var) prior_box_var_data = prior_box_var->data<T>();
4445

4546
for (int64_t i = 0; i < row; ++i) {
4647
for (int64_t j = 0; j < col; ++j) {
@@ -65,30 +66,35 @@ class BoxCoderKernel : public framework::OpKernel<T> {
6566
(normalized == false);
6667

6768
size_t offset = i * col * len + j * len;
68-
output[offset] = (target_box_center_x - prior_box_center_x) /
69-
prior_box_width / prior_box_var_data[j * len];
70-
output[offset + 1] = (target_box_center_y - prior_box_center_y) /
71-
prior_box_height / prior_box_var_data[j * len + 1];
69+
output[offset] =
70+
(target_box_center_x - prior_box_center_x) / prior_box_width;
71+
output[offset + 1] =
72+
(target_box_center_y - prior_box_center_y) / prior_box_height;
7273
output[offset + 2] =
73-
std::log(std::fabs(target_box_width / prior_box_width)) /
74-
prior_box_var_data[j * len + 2];
74+
std::log(std::fabs(target_box_width / prior_box_width));
7575
output[offset + 3] =
76-
std::log(std::fabs(target_box_height / prior_box_height)) /
77-
prior_box_var_data[j * len + 3];
76+
std::log(std::fabs(target_box_height / prior_box_height));
77+
if (prior_box_var) {
78+
output[offset] /= prior_box_var_data[j * len];
79+
output[offset + 1] /= prior_box_var_data[j * len + 1];
80+
output[offset + 2] /= prior_box_var_data[j * len + 2];
81+
output[offset + 3] /= prior_box_var_data[j * len + 3];
82+
}
7883
}
7984
}
8085
}
81-
void DecodeCenterSize(const framework::Tensor& target_box,
82-
const framework::Tensor& prior_box,
83-
const framework::Tensor& prior_box_var,
86+
void DecodeCenterSize(const framework::Tensor* target_box,
87+
const framework::Tensor* prior_box,
88+
const framework::Tensor* prior_box_var,
8489
const bool normalized, T* output) const {
85-
int64_t row = target_box.dims()[0];
86-
int64_t col = prior_box.dims()[0];
87-
int64_t len = prior_box.dims()[1];
90+
int64_t row = target_box->dims()[0];
91+
int64_t col = prior_box->dims()[0];
92+
int64_t len = prior_box->dims()[1];
8893

89-
auto* target_box_data = target_box.data<T>();
90-
auto* prior_box_data = prior_box.data<T>();
91-
auto* prior_box_var_data = prior_box_var.data<T>();
94+
auto* target_box_data = target_box->data<T>();
95+
auto* prior_box_data = prior_box->data<T>();
96+
const T* prior_box_var_data = nullptr;
97+
if (prior_box_var) prior_box_var_data = prior_box_var->data<T>();
9298

9399
for (int64_t i = 0; i < row; ++i) {
94100
for (int64_t j = 0; j < col; ++j) {
@@ -103,19 +109,32 @@ class BoxCoderKernel : public framework::OpKernel<T> {
103109
T prior_box_center_y =
104110
(prior_box_data[j * len + 3] + prior_box_data[j * len + 1]) / 2;
105111

106-
T target_box_center_x = prior_box_var_data[j * len] *
112+
T target_box_center_x = 0, target_box_center_y = 0;
113+
T target_box_width = 0, target_box_height = 0;
114+
if (prior_box_var) {
115+
target_box_center_x = prior_box_var_data[j * len] *
107116
target_box_data[offset] * prior_box_width +
108117
prior_box_center_x;
109-
T target_box_center_y = prior_box_var_data[j * len + 1] *
118+
target_box_center_y = prior_box_var_data[j * len + 1] *
110119
target_box_data[offset + 1] *
111120
prior_box_height +
112121
prior_box_center_y;
113-
T target_box_width = std::exp(prior_box_var_data[j * len + 2] *
122+
target_box_width = std::exp(prior_box_var_data[j * len + 2] *
114123
target_box_data[offset + 2]) *
115124
prior_box_width;
116-
T target_box_height = std::exp(prior_box_var_data[j * len + 3] *
125+
target_box_height = std::exp(prior_box_var_data[j * len + 3] *
117126
target_box_data[offset + 3]) *
118127
prior_box_height;
128+
} else {
129+
target_box_center_x =
130+
target_box_data[offset] * prior_box_width + prior_box_center_x;
131+
target_box_center_y = target_box_data[offset + 1] * prior_box_height +
132+
prior_box_center_y;
133+
target_box_width =
134+
std::exp(target_box_data[offset + 2]) * prior_box_width;
135+
target_box_height =
136+
std::exp(target_box_data[offset + 3]) * prior_box_height;
137+
}
119138

120139
output[offset] = target_box_center_x - target_box_width / 2;
121140
output[offset + 1] = target_box_center_y - target_box_height / 2;
@@ -147,10 +166,10 @@ class BoxCoderKernel : public framework::OpKernel<T> {
147166
bool normalized = context.Attr<bool>("box_normalized");
148167
T* output = output_box->data<T>();
149168
if (code_type == BoxCodeType::kEncodeCenterSize) {
150-
EncodeCenterSize(*target_box, *prior_box, *prior_box_var, normalized,
169+
EncodeCenterSize(target_box, prior_box, prior_box_var, normalized,
151170
output);
152171
} else if (code_type == BoxCodeType::kDecodeCenterSize) {
153-
DecodeCenterSize(*target_box, *prior_box, *prior_box_var, normalized,
172+
DecodeCenterSize(target_box, prior_box, prior_box_var, normalized,
154173
output);
155174
}
156175
}

python/paddle/fluid/tests/unittests/test_box_coder_op.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,32 @@ def setUp(self):
120120
self.outputs = {'OutputBox': output_box}
121121

122122

123+
class TestBoxCoderOpWithoutBoxVar(OpTest):
124+
def test_check_output(self):
125+
self.check_output()
126+
127+
def setUp(self):
128+
self.op_type = "box_coder"
129+
lod = [[0, 1, 2, 3, 4, 5]]
130+
prior_box = np.random.random((10, 4)).astype('float32')
131+
prior_box_var = np.ones((10, 4)).astype('float32')
132+
target_box = np.random.random((5, 10, 4)).astype('float32')
133+
code_type = "DecodeCenterSize"
134+
box_normalized = False
135+
output_box = batch_box_coder(prior_box, prior_box_var, target_box,
136+
lod[0], code_type, box_normalized)
137+
138+
self.inputs = {
139+
'PriorBox': prior_box,
140+
'TargetBox': target_box,
141+
}
142+
self.attrs = {
143+
'code_type': 'decode_center_size',
144+
'box_normalized': False
145+
}
146+
self.outputs = {'OutputBox': output_box}
147+
148+
123149
class TestBoxCoderOpWithLoD(OpTest):
124150
def test_check_output(self):
125151
self.check_output()

0 commit comments

Comments
 (0)