Skip to content

Commit 2660107

Browse files
[cherry-pick] Add Asypadding for conv fusion. (#21041) (#21439)
* Add Asypadding for conv fusion. test=develop reference: pr/20042 * Fix eigen build link error * Change back file mode * Use math function & add more checks.
1 parent e06f443 commit 2660107

File tree

3 files changed

+362
-19
lines changed

3 files changed

+362
-19
lines changed

paddle/fluid/operators/conv_fusion_op.cc

Lines changed: 79 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,85 @@ class Conv2DFusionOpInferShape : public framework::InferShapeBase {
7373
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
7474
std::vector<int> dilations =
7575
ctx->Attrs().Get<std::vector<int>>("dilations");
76+
std::string padding_algorithm =
77+
ctx->Attrs().Get<std::string>("padding_algorithm");
78+
int groups = ctx->Attrs().Get<int>("groups");
7679

77-
std::vector<int64_t> oshape({in_dims[0], filter_dims[0]});
78-
for (size_t i = 0; i < strides.size(); ++i) {
79-
oshape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
80-
dilations[i], paddings[i], strides[i]));
80+
framework::DDim in_data_dims;
81+
in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size());
82+
83+
PADDLE_ENFORCE_EQ(
84+
in_dims.size() == 4 || in_dims.size() == 5, true,
85+
"ShapeError: Conv_fusion input should be 4-D or 5-D tensor. But "
86+
"received: %u-D Tensor,"
87+
"the shape of Conv_fusion input is [%s]",
88+
in_dims.size(), in_dims);
89+
90+
PADDLE_ENFORCE_EQ(in_dims.size(), filter_dims.size(),
91+
"ShapeError: Conv_fusion input dimension and filter "
92+
"dimension should be the "
93+
"equal."
94+
"But received: the shape of Conv_fusion input is [%s], "
95+
"input dimension of Conv_fusion "
96+
"input is [%d],"
97+
"the shape of filter is [%s], the filter dimension of "
98+
"Conv_fusion is [%d]",
99+
in_dims, in_dims.size(), filter_dims, filter_dims.size());
100+
101+
int in_sub_stride_size = in_dims.size() - strides.size();
102+
PADDLE_ENFORCE_EQ(
103+
in_dims.size() - strides.size() == 2U, true,
104+
"ShapeError: the dimension of input minus the dimension of "
105+
"stride must be euqal to 2."
106+
"But received: the dimension of input minus the dimension "
107+
"of stride is [%d], the"
108+
"input dimension of Conv_fusion is [%d], the shape of Conv_fusion "
109+
"input "
110+
"is [%s], the stride"
111+
"dimension of Conv_fusion is [%d]",
112+
in_sub_stride_size, in_dims.size(), in_dims, strides.size());
113+
114+
const auto input_channels = in_dims[1];
115+
116+
PADDLE_ENFORCE_EQ(
117+
input_channels, filter_dims[1] * groups,
118+
"ShapeError: The number of input channels should be equal to filter "
119+
"channels * groups. But received: the input channels is [%d], the shape"
120+
"of input is [%s], the filter channel is [%d], the shape of filter is "
121+
"[%s],"
122+
"the groups is [%d]",
123+
in_dims[1], in_dims, filter_dims[1], filter_dims, groups);
124+
PADDLE_ENFORCE_EQ(
125+
filter_dims[0] % groups, 0,
126+
"ShapeError: The number of output channels should be divided by groups."
127+
"But received: the output channels is [%d], the shape of filter is [%s]"
128+
"(the first dimension of filter is output channel), the groups is [%d]",
129+
filter_dims[0], filter_dims, groups);
130+
131+
framework::DDim filter_data_dims =
132+
framework::slice_ddim(filter_dims, 2, filter_dims.size());
133+
std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
134+
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
135+
in_data_dims, strides, ksize);
136+
137+
std::vector<int64_t> output_shape({in_dims[0]});
138+
output_shape.push_back(filter_dims[0]);
139+
140+
for (size_t i = 0; i < in_data_dims.size(); ++i) {
141+
if ((!ctx->IsRuntime()) &&
142+
(in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
143+
output_shape.push_back(-1);
144+
} else {
145+
output_shape.push_back(
146+
ConvOutputSize(in_data_dims[i], filter_dims[i + 2], dilations[i],
147+
paddings[2 * i], paddings[2 * i + 1], strides[i]));
148+
}
81149
}
82-
PADDLE_ENFORCE(ctx->HasOutput("Output"),
83-
"Output(Output) of ConvOp should not be null.");
84-
ctx->SetOutputDim("Output", framework::make_ddim(oshape));
150+
151+
PADDLE_ENFORCE_EQ(ctx->HasOutput("Output"), true,
152+
"Output(Output) of ConvOp should not be null.");
153+
ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
154+
85155
std::vector<int> channels =
86156
ctx->Attrs().Get<std::vector<int>>("split_channels");
87157
if (channels.size()) {
@@ -90,7 +160,8 @@ class Conv2DFusionOpInferShape : public framework::InferShapeBase {
90160
std::vector<framework::DDim> oshapes;
91161
oshapes.reserve(channels.size());
92162
for (size_t i = 0; i < channels.size(); ++i) {
93-
oshapes.push_back({oshape[0], channels[i], oshape[2], oshape[3]});
163+
oshapes.push_back(
164+
{output_shape[0], channels[i], output_shape[2], output_shape[3]});
94165
}
95166
ctx->SetOutputsDim("Outputs", oshapes);
96167
}

paddle/fluid/operators/conv_fusion_op.cu.cc renamed to paddle/fluid/operators/conv_fusion_op.cu

Lines changed: 98 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

1515
#include "paddle/fluid/framework/op_registry.h"
16+
#include "paddle/fluid/operators/conv_cudnn_helper.h"
1617
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
18+
#include "paddle/fluid/operators/conv_op.h"
19+
#include "paddle/fluid/operators/math/padding.h"
1720
#include "paddle/fluid/platform/cudnn_helper.h"
1821

1922
DECLARE_int64(cudnn_exhaustive_search_times);
@@ -41,9 +44,10 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
4144
auto* input = ctx.Input<Tensor>("Input");
4245
auto* filter = ctx.Input<Tensor>("Filter");
4346
auto* bias = ctx.Input<Tensor>("Bias");
44-
PADDLE_ENFORCE(bias, "The bias should not be null.");
47+
PADDLE_ENFORCE_NOT_NULL(bias, "The bias should not be null.");
4548
auto* residual = ctx.Input<Tensor>("ResidualData");
4649
auto* output = ctx.Output<Tensor>("Output");
50+
output->mutable_data<T>(ctx.GetPlace());
4751

4852
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
4953
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
@@ -55,11 +59,96 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
5559
bool exhaustive_search =
5660
FLAGS_cudnn_exhaustive_search || ctx.Attr<bool>("exhaustive_search");
5761

58-
const T* input_data = input->data<T>();
62+
// const T* input_data = input->data<T>();
5963
const T* filter_data = filter->data<T>();
6064
const T* bias_data = bias->data<T>();
61-
T* output_data = output->mutable_data<T>(ctx.GetPlace());
65+
// T* output_data = output->mutable_data<T>(ctx.GetPlace());
66+
67+
const std::string padding_algorithm =
68+
ctx.Attr<std::string>("padding_algorithm");
69+
const std::string data_format = ctx.Attr<std::string>("data_format");
70+
71+
Tensor transformed_input_channel(input->type());
72+
Tensor transformed_output(output->type());
73+
T* output_data = nullptr;
74+
75+
transformed_input_channel = *input;
76+
transformed_output = *output;
77+
output_data = transformed_output.data<T>();
6278
const T* residual_data = residual ? residual->data<T>() : output_data;
79+
// update padding and dilation
80+
auto in_dims = transformed_input_channel.dims();
81+
auto filter_dims = filter->dims();
82+
framework::DDim in_data_dims;
83+
in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size());
84+
85+
framework::DDim filter_data_dims =
86+
framework::slice_ddim(filter_dims, 2, filter_dims.size());
87+
std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
88+
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
89+
in_data_dims, strides, ksize);
90+
91+
int data_dim = strides.size(); // 2d or 3d
92+
bool is_sys_pad = math::IsSymmetricPadding(paddings, data_dim);
93+
94+
Tensor transformed_input;
95+
std::vector<int> padding_common(data_dim, 0);
96+
if (!is_sys_pad) {
97+
std::vector<int> padding_diff(data_dim);
98+
std::vector<int> new_input_shape_vec(data_dim + 2);
99+
new_input_shape_vec[0] = transformed_input_channel.dims()[0];
100+
new_input_shape_vec[1] = transformed_input_channel.dims()[1];
101+
102+
std::vector<int> input_pad(transformed_input_channel.dims().size() * 2,
103+
0);
104+
for (size_t i = 0; i < data_dim; ++i) {
105+
padding_diff[i] = std::abs(paddings[2 * i] - paddings[2 * i + 1]);
106+
padding_common[i] = std::min(paddings[2 * i], paddings[2 * i + 1]);
107+
new_input_shape_vec[i + 2] =
108+
transformed_input_channel.dims()[i + 2] + padding_diff[i];
109+
input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i];
110+
input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i];
111+
}
112+
framework::DDim new_input_shape(
113+
framework::make_ddim(new_input_shape_vec));
114+
transformed_input.Resize(new_input_shape);
115+
auto& dev_ctx =
116+
ctx.template device_context<paddle::platform::CUDADeviceContext>();
117+
118+
transformed_input =
119+
ctx.AllocateTmpTensor<T, paddle::platform::CUDADeviceContext>(
120+
new_input_shape, dev_ctx);
121+
const int rank = transformed_input_channel.dims().size();
122+
T pad_value(0.0);
123+
switch (rank) {
124+
case 4: {
125+
math::PadFunction<paddle::platform::CUDADeviceContext, T, 4>(
126+
ctx, input_pad, transformed_input_channel, pad_value,
127+
&transformed_input);
128+
} break;
129+
case 5: {
130+
math::PadFunction<paddle::platform::CUDADeviceContext, T, 5>(
131+
ctx, input_pad, transformed_input_channel, pad_value,
132+
&transformed_input);
133+
} break;
134+
default:
135+
PADDLE_THROW("ConvOp only support tensors with 4 or 5 dimensions.");
136+
}
137+
138+
} else {
139+
transformed_input = transformed_input_channel;
140+
if (paddings.size() == data_dim) {
141+
for (size_t i = 0; i < data_dim; ++i) {
142+
padding_common[i] = paddings[i];
143+
}
144+
} else {
145+
for (size_t i = 0; i < data_dim; ++i) {
146+
padding_common[i] = paddings[2 * i];
147+
}
148+
}
149+
}
150+
151+
const T* input_data = transformed_input.data<T>();
63152

64153
// ------------------- cudnn descriptors ---------------------
65154
ScopedTensorDescriptor input_desc;
@@ -74,18 +163,19 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
74163
}
75164

76165
cudnnConvolutionDescriptor_t cudnn_conv_desc =
77-
conv_desc.descriptor<T>(paddings, strides, dilations);
166+
conv_desc.descriptor<T>(padding_common, strides, dilations);
78167
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionGroupCount(
79168
cudnn_conv_desc, groups));
80169

81170
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
82-
layout, framework::vectorize<int>(input->dims()));
171+
layout, framework::vectorize<int>(transformed_input.dims()));
83172
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
84-
layout, framework::vectorize<int>(output->dims()));
173+
layout, framework::vectorize<int>(transformed_output.dims()));
85174
cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor<T>(
86175
layout, framework::vectorize<int>(filter->dims()));
87176
// Now only support NCHW
88-
std::vector<int> bias_dim = {1, static_cast<int>(output->dims()[1]), 1, 1};
177+
std::vector<int> bias_dim = {
178+
1, static_cast<int>(transformed_output.dims()[1]), 1, 1};
89179
cudnnTensorDescriptor_t cudnn_bias_desc =
90180
bias_desc.descriptor<T>(layout, bias_dim);
91181
cudnnActivationDescriptor_t cudnn_act_desc =
@@ -109,7 +199,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
109199
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
110200
cudnn_conv_desc, CUDNN_DEFAULT_MATH));
111201

112-
auto x_dims = framework::vectorize(input->dims());
202+
auto x_dims = framework::vectorize(transformed_input.dims());
113203
auto f_dims = framework::vectorize(filter->dims());
114204
if (!exhaustive_search) {
115205
CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm(

0 commit comments

Comments
 (0)