Skip to content

Commit 6d2cfe9

Browse files
Merge pull request #4866 from gongweibao/blockexpand
Add im2sequence op.
2 parents 23f5c18 + 09544bc commit 6d2cfe9

File tree

4 files changed

+484
-0
lines changed

4 files changed

+484
-0
lines changed

paddle/operators/im2sequence_op.cc

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/operators/im2sequence_op.h"
16+
17+
namespace paddle {
18+
namespace operators {
19+
20+
class Im2SequenceOp : public framework::OperatorWithKernel {
21+
public:
22+
using framework::OperatorWithKernel::OperatorWithKernel;
23+
24+
protected:
25+
void InferShape(framework::InferShapeContext* ctx) const override {
26+
PADDLE_ENFORCE(ctx->HasInput("X"),
27+
"Input(X) of Im2SequenceOp should not be null.");
28+
PADDLE_ENFORCE(ctx->HasOutput("Out"),
29+
"Output(Out) of Im2SequenceOp op should not be null.");
30+
31+
auto in_dim = ctx->GetInputDim("X");
32+
PADDLE_ENFORCE_EQ(in_dim.size(), 4,
33+
"Input(X) format must be 4D tensor, eg., NCHW.");
34+
35+
auto kernels = ctx->Attrs().Get<std::vector<int>>("kernels");
36+
auto strides = ctx->Attrs().Get<std::vector<int>>("strides");
37+
auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
38+
39+
int batch_size = in_dim[0];
40+
int img_channels = in_dim[1];
41+
int img_height = in_dim[2];
42+
int img_width = in_dim[3];
43+
44+
int output_height = OutputSize(img_height, kernels[0], paddings[0],
45+
paddings[2], strides[0]);
46+
int output_width =
47+
OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]);
48+
49+
ctx->SetOutputDim("Out", {batch_size * output_height * output_width,
50+
img_channels * kernels[0] * kernels[1]});
51+
}
52+
};
53+
54+
class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
55+
public:
56+
Im2SequenceOpMaker(OpProto* proto, OpAttrChecker* op_checker)
57+
: OpProtoAndCheckerMaker(proto, op_checker) {
58+
AddInput("X",
59+
"(Tensor) The input tensor has NCHW format."
60+
"N: batch size"
61+
"C: channels"
62+
"H: height"
63+
"W: width");
64+
AddOutput("Out", "(LodTensor) The output data of im2sequence op,");
65+
AddAttr<std::vector<int>>("kernels",
66+
"(vector<int>), the "
67+
"kernels(kernel_height, kernel_width)");
68+
AddAttr<std::vector<int>>("strides",
69+
"(vector<int> default:{1, 1}), the "
70+
"strides(h_stride, w_stride)")
71+
.SetDefault({1, 1});
72+
AddAttr<std::vector<int>>("paddings",
73+
"(vector<int> default:{0, 0, 0, 0}), the "
74+
"paddings(up_pad, left_pad, down_pad, right_pad)")
75+
.SetDefault({0, 0, 0, 0});
76+
AddComment(R"DOC(
77+
This op uses kernels to scan images and converts these images to sequences.
78+
After expanding, The number of time steps are output_height * output_width
79+
and the dimension of each time step is kernel_height * kernel_width * channels,
80+
in which:
81+
82+
output_height =
83+
1 + (padding_height + padding_down + img_height - kernel_height + stride_height - 1) /
84+
stride_height;
85+
output_width =
86+
1 + (padding_left + padding+right + img_width - kernel_width + stride_width - 1) /
87+
stride_width;
88+
89+
This op can be used after convolution neural network, and before recurrent neural network.
90+
91+
Given:
92+
93+
x = [[[[ 6. 2. 1.]
94+
[ 8. 3. 5.]
95+
[ 0. 2. 6.]]
96+
97+
[[ 2. 4. 4.]
98+
[ 6. 3. 0.]
99+
[ 6. 4. 7.]]]
100+
101+
[[[ 6. 7. 1.]
102+
[ 5. 7. 9.]
103+
[ 2. 4. 8.]]
104+
105+
[[ 1. 2. 1.]
106+
[ 1. 3. 5.]
107+
[ 9. 0. 8.]]]]
108+
x.dims = {2, 2, 3, 3}
109+
110+
And:
111+
112+
kernels = [2, 2]
113+
strides = [1, 1]
114+
paddings = [0, 0, 0, 0]
115+
116+
Then:
117+
118+
output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.]
119+
[ 2. 1. 3. 5. 4. 4. 3. 0.]
120+
[ 8. 3. 0. 2. 6. 3. 6. 4.]
121+
[ 3. 5. 2. 6. 3. 0. 4. 7.]
122+
[ 6. 7. 5. 7. 1. 2. 1. 3.]
123+
[ 7. 1. 7. 9. 2. 1. 3. 5.]
124+
[ 5. 7. 2. 4. 1. 3. 9. 0.]
125+
[ 7. 9. 4. 8. 3. 5. 0. 8.]]
126+
output.dims = {8, 9}
127+
output.lod = [[0, 4, 8]]
128+
129+
)DOC");
130+
}
131+
};
132+
133+
class Im2SequenceGradOp : public framework::OperatorWithKernel {
134+
public:
135+
using framework::OperatorWithKernel::OperatorWithKernel;
136+
137+
protected:
138+
void InferShape(framework::InferShapeContext* ctx) const override {
139+
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
140+
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
141+
"Input(Out@GRAD) shouldn't be null.");
142+
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
143+
}
144+
};
145+
146+
} // namespace operators
147+
} // namespace paddle
148+
149+
namespace ops = paddle::operators;
150+
REGISTER_OP(im2sequence, ops::Im2SequenceOp, ops::Im2SequenceOpMaker,
151+
im2sequence_grad, ops::Im2SequenceGradOp);
152+
REGISTER_OP_CPU_KERNEL(
153+
im2sequence,
154+
ops::Im2SequenceKernel<paddle::platform::CPUDeviceContext, float>);
155+
REGISTER_OP_CPU_KERNEL(
156+
im2sequence_grad,
157+
ops::Im2SequenceGradKernel<paddle::platform::CPUDeviceContext, float>);

paddle/operators/im2sequence_op.cu

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#define EIGEN_USE_GPU
16+
#include "paddle/operators/im2sequence_op.h"
17+
18+
namespace ops = paddle::operators;
19+
20+
REGISTER_OP_CUDA_KERNEL(
21+
im2sequence,
22+
ops::Im2SequenceKernel<paddle::platform::CUDADeviceContext, float>);
23+
REGISTER_OP_CUDA_KERNEL(
24+
im2sequence_grad,
25+
ops::Im2SequenceGradKernel<paddle::platform::CUDADeviceContext, float>);

paddle/operators/im2sequence_op.h

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
You may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
17+
#include "paddle/framework/data_layout.h"
18+
#include "paddle/framework/eigen.h"
19+
#include "paddle/framework/op_registry.h"
20+
#include "paddle/operators/math/im2col.h"
21+
#include "paddle/operators/math/math_function.h"
22+
23+
namespace paddle {
24+
namespace operators {
25+
26+
using Tensor = framework::Tensor;
27+
using LoDTensor = framework::LoDTensor;
28+
29+
inline int OutputSize(int input_size, int filter_size, int padding_0,
30+
int padding_1, int stride) {
31+
const int output_size =
32+
(input_size + padding_0 + padding_1 - filter_size) / stride + 1;
33+
return output_size;
34+
}
35+
36+
template <typename DeviceContext, typename T>
37+
class Im2SequenceKernel : public framework::OpKernel<T> {
38+
public:
39+
void Compute(const framework::ExecutionContext& ctx) const override {
40+
const Tensor* in = ctx.Input<Tensor>("X");
41+
LoDTensor* out = ctx.Output<LoDTensor>("Out");
42+
out->mutable_data<T>(ctx.GetPlace());
43+
// TODO(wanghaoshuang): Add layout checker after 'set_layout'
44+
// being available for python API
45+
// PADDLE_ENFORCE_EQ(in->layout(), framework::DataLayout::kNCHW,
46+
// "Input(X) layout must be NCHW");
47+
auto in_dim = in->dims();
48+
int batch_size = in_dim[0];
49+
int img_channels = in_dim[1];
50+
int img_height = in_dim[2];
51+
int img_width = in_dim[3];
52+
53+
auto kernels = ctx.Attr<std::vector<int>>("kernels");
54+
auto strides = ctx.Attr<std::vector<int>>("strides");
55+
auto paddings = ctx.Attr<std::vector<int>>("paddings");
56+
int output_height = OutputSize(img_height, kernels[0], paddings[0],
57+
paddings[2], strides[0]);
58+
int output_width =
59+
OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]);
60+
61+
const std::vector<int> dilations({1, 1});
62+
63+
auto out_dims = out->dims();
64+
out->Resize({batch_size, out->numel() / batch_size});
65+
for (int i = 0; i < batch_size; i++) {
66+
const Tensor src =
67+
in->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
68+
Tensor dst = out->Slice(i, i + 1).Resize(
69+
{output_height, output_width, img_channels, kernels[0], kernels[1]});
70+
71+
math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f;
72+
auto& dev_ctx = ctx.template device_context<DeviceContext>();
73+
f(dev_ctx, src, dilations, strides, paddings, &dst);
74+
}
75+
out->Resize(out_dims);
76+
77+
// set lod information
78+
// TODO(wanghaoshuang): Move this to InferShape
79+
framework::LoD lod(1);
80+
lod[0].reserve(batch_size + 1);
81+
for (int i = 0, offset = 0; i < batch_size + 1; ++i) {
82+
lod[0][i] = offset;
83+
offset += output_height * output_width;
84+
}
85+
out->set_lod(lod);
86+
}
87+
};
88+
89+
template <typename DeviceContext, typename T>
90+
class Im2SequenceGradKernel : public framework::OpKernel<T> {
91+
public:
92+
void Compute(const framework::ExecutionContext& ctx) const override {
93+
auto* in = ctx.Input<Tensor>("X");
94+
Tensor* d_out =
95+
const_cast<Tensor*>(ctx.Input<Tensor>(framework::GradVarName("Out")));
96+
auto* d_x = ctx.Output<Tensor>(framework::GradVarName("X"));
97+
d_x->mutable_data<T>(ctx.GetPlace());
98+
99+
auto x_v = framework::EigenVector<T>::Flatten(*d_x);
100+
auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
101+
x_v.device(place) = x_v.constant(0.0);
102+
103+
auto in_dim = in->dims();
104+
int batch_size = in_dim[0];
105+
int img_channels = in_dim[1];
106+
int img_height = in_dim[2];
107+
int img_width = in_dim[3];
108+
109+
auto kernels = ctx.Attr<std::vector<int>>("kernels");
110+
auto strides = ctx.Attr<std::vector<int>>("strides");
111+
auto paddings = ctx.Attr<std::vector<int>>("paddings");
112+
int output_height = OutputSize(img_height, kernels[0], paddings[0],
113+
paddings[2], strides[0]);
114+
int output_width =
115+
OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]);
116+
117+
const std::vector<int> dilations({1, 1});
118+
119+
auto d_out_dims = d_out->dims();
120+
d_out->Resize({batch_size, d_out->numel() / batch_size});
121+
for (int i = 0; i < batch_size; i++) {
122+
Tensor dst =
123+
d_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
124+
const Tensor src = d_out->Slice(i, i + 1).Resize(
125+
{output_height, output_width, img_channels, kernels[0], kernels[1]});
126+
math::Col2ImFunctor<math::ColFormat::kOCF, DeviceContext, T> f;
127+
auto& dev_ctx = ctx.template device_context<DeviceContext>();
128+
f(dev_ctx, src, dilations, strides, paddings, &dst);
129+
}
130+
d_out->Resize(d_out_dims);
131+
}
132+
};
133+
134+
} // namespace operators
135+
} // namespace paddle

0 commit comments

Comments
 (0)