Skip to content

Commit ad349e7

Browse files
authored
Merge pull request #14452 from NHZlX/fix_avg_pool_trt_bug
fix avg pool trt bug
2 parents 1d9b2a4 + e62872d commit ad349e7

File tree

8 files changed

+324
-61
lines changed

8 files changed

+324
-61
lines changed

paddle/fluid/inference/tensorrt/convert/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
1818
nv_test(test_trt_conv_op SRCS test_conv2d_op.cc conv2d_op.cc
1919
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine conv_op conv_transpose_op SERIAL)
2020
nv_test(test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc
21-
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine pool_op SERIAL)
21+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine pool_op tensorrt_plugin SERIAL)
2222
nv_test(test_trt_elementwise_op SRCS test_elementwise_op.cc elementwise_op.cc
2323
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_plugin
2424
elementwise_add_op elementwise_mul_op SERIAL)

paddle/fluid/inference/tensorrt/convert/pool2d_op.cc

Lines changed: 89 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,57 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

1515
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
16+
#include "paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.h"
1617

1718
namespace paddle {
1819
namespace inference {
1920
namespace tensorrt {
2021

22+
void DealCeilMode(const nvinfer1::Dims &input_shape, std::vector<int> ksize,
23+
std::vector<int> strides, std::vector<int> paddings,
24+
nvinfer1::DimsHW *pre_pad, nvinfer1::DimsHW *post_pad,
25+
int input_dims) {
26+
int input_height = input_shape.d[input_dims - 2];
27+
int input_width = input_shape.d[input_dims - 1];
28+
int floor_h_output_size =
29+
(input_height - ksize[0] + 2 * paddings[0]) / strides[0] + 1;
30+
int ceil_h_output_size =
31+
(input_height - ksize[0] + 2 * paddings[0] + strides[0] - 1) /
32+
strides[0] +
33+
1;
34+
35+
int floor_w_output_size =
36+
(input_width - ksize[1] + 2 * paddings[1]) / strides[1] + 1;
37+
int ceil_w_output_size =
38+
(input_width - ksize[1] + 2 * paddings[1] + strides[1] - 1) / strides[1] +
39+
1;
40+
if (floor_h_output_size != ceil_h_output_size) {
41+
post_pad->h() = strides[0] - 1;
42+
}
43+
44+
if (floor_w_output_size != ceil_w_output_size) {
45+
post_pad->w() = strides[1] - 1;
46+
}
47+
}
48+
2149
/*
2250
* Pool2dOp, IPoolingLayer in TRT. This Layer doesn't has weights.
2351
*/
2452
class Pool2dOpConverter : public OpConverter {
2553
public:
26-
void operator()(const framework::proto::OpDesc& op,
27-
const framework::Scope& scope, bool test_mode) override {
28-
VLOG(3)
54+
void operator()(const framework::proto::OpDesc &op,
55+
const framework::Scope &scope, bool test_mode) override {
56+
VLOG(40)
2957
<< "convert a fluid pool2d op to tensorrt pool2d layer without bias";
3058
framework::OpDesc op_desc(op, nullptr);
3159
// Declare inputs
3260
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
3361
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
34-
auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]);
62+
auto *input1 = engine_->GetITensor(op_desc.Input("X")[0]);
63+
nvinfer1::Dims input_shape = input1->getDimensions();
64+
int input_dims = input_shape.nbDims;
65+
66+
PADDLE_ENFORCE_EQ(input_dims, 3UL);
3567

3668
bool global_pooling = boost::get<bool>(op_desc.GetAttr("global_pooling"));
3769
std::string pool_type =
@@ -44,23 +76,6 @@ class Pool2dOpConverter : public OpConverter {
4476
boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
4577
bool ceil_mode = boost::get<bool>(op_desc.GetAttr("ceil_mode"));
4678

47-
nvinfer1::Dims input_shape = input1->getDimensions();
48-
int nbDims = input_shape.nbDims;
49-
nvinfer1::DimsHW nv_ksize(ksize[0], ksize[1]);
50-
nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
51-
nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
52-
53-
if (global_pooling == true) {
54-
nv_ksize.d[0] = input_shape.d[nbDims - 2];
55-
nv_ksize.d[1] = input_shape.d[nbDims - 1];
56-
nv_strides.h() = 1;
57-
nv_strides.w() = 1;
58-
nv_paddings.h() = 0;
59-
nv_paddings.w() = 0;
60-
}
61-
62-
PADDLE_ENFORCE_EQ(input1->getDimensions().nbDims, 3UL);
63-
6479
nvinfer1::PoolingType nv_pool_type = nvinfer1::PoolingType::kMAX;
6580
if (pool_type == "max") {
6681
nv_pool_type = nvinfer1::PoolingType::kMAX;
@@ -70,42 +85,63 @@ class Pool2dOpConverter : public OpConverter {
7085
PADDLE_THROW("TensorRT unsupported pooling type!");
7186
}
7287

73-
if (ceil_mode) {
74-
nvinfer1::DimsHW pre_pad(0, 0);
75-
nvinfer1::DimsHW post_pad(0, 0);
76-
int input_height = input_shape.d[nbDims - 2];
77-
int input_width = input_shape.d[nbDims - 1];
78-
int floor_h_output_size =
79-
(input_height - ksize[0] + 2 * paddings[0]) / strides[0] + 1;
80-
int ceil_h_output_size =
81-
(input_height - ksize[0] + 2 * paddings[0] + strides[0] - 1) /
82-
strides[0] +
83-
1;
84-
85-
int floor_w_output_size =
86-
(input_width - ksize[1] + 2 * paddings[1]) / strides[1] + 1;
87-
int ceil_w_output_size =
88-
(input_width - ksize[1] + 2 * paddings[1] + strides[1] - 1) /
89-
strides[1] +
90-
1;
91-
if (floor_h_output_size != ceil_h_output_size) {
92-
post_pad.h() = strides[0] - 1;
88+
nvinfer1::DimsHW nv_ksize(ksize[0], ksize[1]);
89+
nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
90+
nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
91+
92+
nvinfer1::ILayer *layer = nullptr;
93+
94+
if (global_pooling == true) {
95+
nv_ksize.d[0] = input_shape.d[input_dims - 2];
96+
nv_ksize.d[1] = input_shape.d[input_dims - 1];
97+
auto *layer = TRT_ENGINE_ADD_LAYER(
98+
engine_, Pooling, *const_cast<nvinfer1::ITensor *>(input1),
99+
nv_pool_type, nv_ksize);
100+
PADDLE_ENFORCE_NOT_NULL(layer, "pool layer could not be created.");
101+
auto output_name = op_desc.Output("Out")[0];
102+
layer->setName(("pool2d (Output: " + output_name + ")").c_str());
103+
layer->getOutput(0)->setName(output_name.c_str());
104+
engine_->SetITensor(output_name, layer->getOutput(0));
105+
if (test_mode) {
106+
engine_->DeclareOutput(output_name);
93107
}
108+
return;
109+
}
94110

95-
if (floor_w_output_size != ceil_w_output_size) {
96-
post_pad.w() = strides[1] - 1;
111+
if (pool_type == "max") {
112+
nvinfer1::DimsHW pre_pad(paddings[0], paddings[1]);
113+
nvinfer1::DimsHW post_pad(paddings[0], paddings[1]);
114+
if (ceil_mode) {
115+
// If ceil mode is true, we will pad the appropriate size to the input.
116+
DealCeilMode(input_shape, ksize, strides, paddings, &pre_pad, &post_pad,
117+
input_dims);
118+
auto *pad_layer = TRT_ENGINE_ADD_LAYER(
119+
engine_, Padding, *const_cast<nvinfer1::ITensor *>(input1), pre_pad,
120+
post_pad);
121+
PADDLE_ENFORCE_NOT_NULL(
122+
pad_layer, "pad layer in poolOp converter could not be created.");
123+
input1 = pad_layer->getOutput(0);
124+
}
125+
auto *pool_layer = TRT_ENGINE_ADD_LAYER(
126+
engine_, Pooling, *const_cast<nvinfer1::ITensor *>(input1),
127+
nv_pool_type, nv_ksize);
128+
PADDLE_ENFORCE_NOT_NULL(pool_layer, "pool layer could not be created.");
129+
pool_layer->setStride(nv_strides);
130+
pool_layer->setPadding(nv_paddings);
131+
layer = pool_layer;
132+
} else {
133+
// Average pooling needs to exclude the padding pixels from the average
134+
// mean.
135+
// It is not supported well by TRT, we use a plugin here.
136+
std::vector<int> input_shape_v;
137+
for (int i = 0; i < input_dims; i++) {
138+
input_shape_v.push_back(input_shape.d[i]);
97139
}
98-
auto* layer = TRT_ENGINE_ADD_LAYER(
99-
engine_, Padding, *const_cast<nvinfer1::ITensor*>(input1), pre_pad,
100-
post_pad);
101-
input1 = layer->getOutput(0);
140+
plugin::AvgPoolPlugin *plugin = new plugin::AvgPoolPlugin(
141+
ceil_mode, ksize, strides, paddings, input_shape_v);
142+
auto *avg_pool_layer = engine_->AddPlugin(&input1, 1, plugin);
143+
layer = avg_pool_layer;
102144
}
103-
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling,
104-
*const_cast<nvinfer1::ITensor*>(input1),
105-
nv_pool_type, nv_ksize);
106-
PADDLE_ENFORCE_NOT_NULL(layer, "pool layer could not be created.");
107-
layer->setStride(nv_strides);
108-
layer->setPadding(nv_paddings);
109145

110146
auto output_name = op_desc.Output("Out")[0];
111147
layer->setName(("pool2d (Output: " + output_name + ")").c_str());

paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,31 +20,32 @@ namespace paddle {
2020
namespace inference {
2121
namespace tensorrt {
2222

23-
void test_pool2d(bool global_pooling, bool ceil_mode) {
23+
void test_pool2d(bool global_pooling, bool ceil_mode,
24+
std::string pool_type = "max") {
2425
framework::Scope scope;
2526
std::unordered_set<std::string> parameters;
2627
TRTConvertValidation validator(5, parameters, scope, 1 << 15);
2728

2829
// The ITensor's Dims should not contain the batch size.
2930
// So, the ITensor's Dims of input and output should be C * H * W.
30-
validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 13, 14));
31+
validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 6, 7));
3132
if (global_pooling)
3233
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 1, 1));
3334
else if (ceil_mode)
34-
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 6, 7));
35+
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 3, 4));
3536
else
36-
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 6, 6));
37+
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 3, 3));
3738

3839
// Prepare Op description
3940
framework::OpDesc desc;
4041
desc.SetType("pool2d");
4142
desc.SetInput("X", {"pool2d-X"});
4243
desc.SetOutput("Out", {"pool2d-Out"});
4344

44-
std::vector<int> ksize({3, 3});
45+
std::vector<int> ksize({2, 2});
4546
std::vector<int> strides({2, 2});
4647
std::vector<int> paddings({0, 0});
47-
std::string pooling_t = "max";
48+
std::string pooling_t = pool_type;
4849

4950
desc.SetAttr("pooling_type", pooling_t);
5051
desc.SetAttr("ksize", ksize);
@@ -63,7 +64,8 @@ void test_pool2d(bool global_pooling, bool ceil_mode) {
6364
TEST(Pool2dOpConverter, normal) { test_pool2d(false, false); }
6465
TEST(Pool2dOpConverter, test_global_pooling) { test_pool2d(true, false); }
6566

66-
TEST(Pool2dOpConverter, test_ceil_mode) { test_pool2d(false, true); }
67+
TEST(Pool2dOpConverter, max_ceil_test) { test_pool2d(false, true); }
68+
TEST(Pool2dOpConverter, avg_ceil_test) { test_pool2d(false, true, "avg"); }
6769

6870
} // namespace tensorrt
6971
} // namespace inference
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
nv_library(tensorrt_plugin
22
SRCS trt_plugin.cc split_op_plugin.cu elementwise_op_plugin.cu prelu_op_plugin.cu
3+
avg_pool_op_plugin.cu
34
DEPS enforce tensorrt_engine)
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.h"
16+
#include "paddle/fluid/operators/math/pooling.h"
17+
18+
namespace paddle {
19+
namespace inference {
20+
namespace tensorrt {
21+
namespace plugin {
22+
23+
nvinfer1::Dims AvgPoolPlugin::getOutputDimensions(
24+
int index, const nvinfer1::Dims* inputDims, int nbInputs) {
25+
assert(nbInputs == 1);
26+
assert(index == 0);
27+
assert(inputDims[0].nbDims == 3);
28+
nvinfer1::Dims const& input_dims = inputDims[0];
29+
30+
nvinfer1::Dims output_dims = input_dims;
31+
32+
output_dims.d[1] = output_shape_[1];
33+
output_dims.d[2] = output_shape_[2];
34+
return output_dims;
35+
}
36+
37+
int AvgPoolPlugin::enqueue(int batchSize, const void* const* inputs,
38+
void** outputs, void* workspace,
39+
cudaStream_t stream) {
40+
auto const& input_dims = this->getInputDims(0);
41+
int input_size = 0;
42+
float const* idata = reinterpret_cast<float const*>(inputs[0]);
43+
float** odatas = reinterpret_cast<float**>(outputs);
44+
45+
paddle::operators::math::AvgPool<float> pool_process;
46+
paddle::operators::math::Pool2dDirectCUDAFunctor<
47+
paddle::operators::math::AvgPool<float>, float>
48+
pool2d_forward;
49+
50+
std::vector<int> input_shape = input_shape_;
51+
std::vector<int> output_shape = output_shape_;
52+
input_shape.insert(input_shape.begin(), batchSize);
53+
output_shape.insert(output_shape.begin(), batchSize);
54+
55+
pool2d_forward(idata, input_shape, output_shape, ksize_, strides_, paddings_,
56+
pool_process, true, odatas[0], stream);
57+
58+
return cudaGetLastError() != cudaSuccess;
59+
}
60+
61+
} // namespace plugin
62+
} // namespace tensorrt
63+
} // namespace inference
64+
} // namespace paddle

0 commit comments

Comments
 (0)