Skip to content

Commit 85c4912

Browse files
authored
Merge pull request #12355 from NHZlX/add_tensorrt_pooling_converter
Add tensorrt pooling converter
2 parents 5bea9c1 + 4f71a3b commit 85c4912

File tree

8 files changed

+210
-15
lines changed

8 files changed

+210
-15
lines changed

paddle/fluid/inference/tensorrt/convert/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Add TRT tests
22
nv_library(tensorrt_converter
3-
SRCS mul_op.cc conv2d_op.cc fc_op.cc
3+
SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc
44
DEPS tensorrt_engine mul_op)
55

66
nv_test(test_op_converter SRCS test_op_converter.cc DEPS
@@ -13,3 +13,6 @@ nv_test(test_trt_fc_op SRCS test_fc_op.cc fc_op.cc
1313
DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL)
1414
nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
1515
DEPS ${FLUID_CORE_MODULES} tensorrt_engine activation_op SERIAL)
16+
17+
nv_test(test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc
18+
DEPS ${FLUID_CORE_MODULES} tensorrt_engine pool_op SERIAL)
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
16+
17+
namespace paddle {
18+
namespace inference {
19+
namespace tensorrt {
20+
21+
/*
22+
* Pool2dOp, IPoolingLayer in TRT. This Layer doesn't has weights.
23+
*/
24+
class Pool2dOpConverter : public OpConverter {
25+
public:
26+
void operator()(const framework::proto::OpDesc& op,
27+
const framework::Scope& scope, bool test_mode) override {
28+
VLOG(4)
29+
<< "convert a fluid pool2d op to tensorrt pool2d layer without bias";
30+
framework::OpDesc op_desc(op, nullptr);
31+
// Declare inputs
32+
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
33+
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
34+
auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]);
35+
36+
std::string pool_type =
37+
boost::get<std::string>(op_desc.GetAttr("pooling_type"));
38+
std::vector<int> ksize =
39+
boost::get<std::vector<int>>(op_desc.GetAttr("ksize"));
40+
std::vector<int> strides =
41+
boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
42+
std::vector<int> paddings =
43+
boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
44+
45+
const nvinfer1::DimsHW nv_ksize(ksize[0], ksize[1]);
46+
const nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
47+
const nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
48+
49+
PADDLE_ENFORCE_EQ(input1->getDimensions().nbDims, 3UL);
50+
51+
nvinfer1::PoolingType nv_pool_type = nvinfer1::PoolingType::kMAX;
52+
if (pool_type == "max") {
53+
nv_pool_type = nvinfer1::PoolingType::kMAX;
54+
} else if (pool_type == "avg") {
55+
nv_pool_type = nvinfer1::PoolingType::kAVERAGE;
56+
} else {
57+
PADDLE_THROW("TensorRT unsupported pooling type!");
58+
}
59+
60+
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling,
61+
*const_cast<nvinfer1::ITensor*>(input1),
62+
nv_pool_type, nv_ksize);
63+
PADDLE_ENFORCE_NOT_NULL(layer, "pool layer could not be created.");
64+
layer->setStride(nv_strides);
65+
layer->setPadding(nv_paddings);
66+
67+
auto output_name = op_desc.Output("Out")[0];
68+
engine_->SetITensor(output_name, layer->getOutput(0));
69+
if (test_mode) {
70+
engine_->DeclareOutput(output_name);
71+
}
72+
}
73+
};
74+
75+
} // namespace tensorrt
76+
} // namespace inference
77+
} // namespace paddle
78+
79+
USE_OP(pool2d);
80+
REGISTER_TRT_OP_CONVERTER(pool2d, Pool2dOpConverter);

paddle/fluid/inference/tensorrt/convert/test_activation_op.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ TEST(ReluOpConverter, main) {
3737
validator.SetOp(*desc.Proto());
3838
LOG(INFO) << "execute";
3939

40-
validator.Execute(1);
40+
validator.Execute(5);
4141
}
4242

4343
} // namespace tensorrt

paddle/fluid/inference/tensorrt/convert/test_fc_op.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@ TEST(fc_op, test) {
2424
std::unordered_set<std::string> parameters({"mul-Y"});
2525
framework::Scope scope;
2626
TRTConvertValidation validator(10, parameters, scope, 1000);
27-
validator.DeclInputVar("mul-X", nvinfer1::Dims4(1, 10, 1, 1));
27+
validator.DeclInputVar("mul-X", nvinfer1::Dims3(10, 1, 1));
2828
validator.DeclParamVar("mul-Y", nvinfer1::Dims2(10, 2));
29-
// validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2));
3029
validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(1, 2));
3130

3231
// Prepare Op description
@@ -38,7 +37,7 @@ TEST(fc_op, test) {
3837

3938
validator.SetOp(*desc.Proto());
4039

41-
validator.Execute(1);
40+
validator.Execute(10);
4241
}
4342

4443
} // namespace tensorrt

paddle/fluid/inference/tensorrt/convert/test_mul_op.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ namespace tensorrt {
2323
TEST(MulOpConverter, main) {
2424
framework::Scope scope;
2525
std::unordered_set<std::string> parameters;
26-
TRTConvertValidation validator(10, parameters, scope, 1000);
26+
TRTConvertValidation validator(10, parameters, scope, 1000, false);
2727
validator.DeclInputVar("mul-X", nvinfer1::Dims2(10, 6));
2828
validator.DeclInputVar("mul-Y", nvinfer1::Dims2(6, 10));
2929
validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(10, 10));
@@ -39,7 +39,7 @@ TEST(MulOpConverter, main) {
3939
validator.SetOp(*desc.Proto());
4040
LOG(INFO) << "execute";
4141

42-
validator.Execute(1);
42+
validator.Execute(2);
4343
}
4444

4545
} // namespace tensorrt
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
#include <gtest/gtest.h>
15+
#include <fstream>
16+
#include "paddle/fluid/framework/op_registry.h"
17+
#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
18+
19+
namespace paddle {
20+
namespace inference {
21+
namespace tensorrt {
22+
23+
TEST(Pool2dOpConverter, main) {
24+
framework::Scope scope;
25+
std::unordered_set<std::string> parameters;
26+
TRTConvertValidation validator(5, parameters, scope, 1 << 15);
27+
28+
// The ITensor's Dims should not contain the batch size.
29+
// So, the ITensor's Dims of input and output should be C * H * W.
30+
validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 4, 4));
31+
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 2, 2));
32+
33+
// Prepare Op description
34+
framework::OpDesc desc;
35+
desc.SetType("pool2d");
36+
desc.SetInput("X", {"pool2d-X"});
37+
desc.SetOutput("Out", {"pool2d-Out"});
38+
39+
std::vector<int> ksize({2, 2});
40+
std::vector<int> strides({2, 2});
41+
std::vector<int> paddings({0, 0});
42+
std::string pooling_t = "max";
43+
44+
desc.SetAttr("pooling_type", pooling_t);
45+
desc.SetAttr("ksize", ksize);
46+
desc.SetAttr("strides", strides);
47+
desc.SetAttr("paddings", paddings);
48+
49+
LOG(INFO) << "set OP";
50+
validator.SetOp(*desc.Proto());
51+
LOG(INFO) << "execute";
52+
53+
validator.Execute(3);
54+
}
55+
56+
} // namespace tensorrt
57+
} // namespace inference
58+
} // namespace paddle
59+
60+
USE_OP(pool2d);

paddle/fluid/inference/tensorrt/convert/ut_helper.h

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,16 @@ class TRTConvertValidation {
6363
public:
6464
TRTConvertValidation() = delete;
6565

66-
TRTConvertValidation(int batch_size,
66+
TRTConvertValidation(int max_batch_size,
6767
const std::unordered_set<std::string>& parameters,
6868
framework::Scope& scope, // NOLINT
69-
int workspace_size = 1 << 10)
70-
: parameters_(parameters), scope_(scope) {
69+
int workspace_size = 1 << 10, bool if_add_batch = true)
70+
: parameters_(parameters),
71+
scope_(scope),
72+
if_add_batch_(if_add_batch),
73+
max_batch_size_(max_batch_size) {
7174
// create engine.
72-
engine_.reset(new TensorRTEngine(batch_size, workspace_size, &stream_));
75+
engine_.reset(new TensorRTEngine(max_batch_size, workspace_size, &stream_));
7376
engine_->InitNetwork();
7477

7578
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
@@ -84,20 +87,26 @@ class TRTConvertValidation {
8487

8588
// Declare a parameter varaible in the scope.
8689
void DeclParamVar(const std::string& name, const nvinfer1::Dims& dims) {
87-
DeclVar(name, dims);
90+
DeclVar(name, dims, true);
8891
}
8992

9093
void DeclOutputVar(const std::string& name, const nvinfer1::Dims& dims) {
9194
DeclVar(name, dims);
9295
}
9396

9497
// Declare a variable in a fluid Scope.
95-
void DeclVar(const std::string& name, const nvinfer1::Dims& dims) {
98+
void DeclVar(const std::string& name, const nvinfer1::Dims& dims,
99+
bool is_param = false) {
96100
platform::CPUPlace place;
97101
platform::CPUDeviceContext ctx(place);
98102

99103
// Init Fluid tensor.
100104
std::vector<int> dim_vec(dims.d, dims.d + dims.nbDims);
105+
// There is no batchsize in ITensor's shape, but We should add it to
106+
// tensor's shape of fluid. If the variable is not parameter and the
107+
// if_add_batch_ flag is true, add the max batchsize to dim_vec.
108+
if (is_param != true && if_add_batch_ == true)
109+
dim_vec.insert(dim_vec.begin(), max_batch_size_);
101110
auto* x = scope_.Var(name);
102111
auto* x_tensor = x->GetMutable<framework::LoDTensor>();
103112
x_tensor->Resize(framework::make_ddim(dim_vec));
@@ -131,6 +140,7 @@ class TRTConvertValidation {
131140

132141
void Execute(int batch_size) {
133142
// Execute Fluid Op
143+
PADDLE_ENFORCE_LE(batch_size, max_batch_size_);
134144
platform::CPUPlace place;
135145
platform::CPUDeviceContext ctx(place);
136146
op_->Run(scope_, place);
@@ -149,9 +159,15 @@ class TRTConvertValidation {
149159
auto* var = scope_.FindVar(output);
150160
auto tensor = var->GetMutable<framework::LoDTensor>();
151161
framework::TensorToVector(*tensor, ctx, &fluid_out);
162+
163+
size_t fluid_out_size = fluid_out.size();
164+
if (if_add_batch_ == true) {
165+
fluid_out_size =
166+
batch_size * (framework::product(tensor->dims()) / max_batch_size_);
167+
}
152168
// Compare two output
153169
ASSERT_FALSE(fluid_out.empty());
154-
for (size_t i = 0; i < fluid_out.size(); i++) {
170+
for (size_t i = 0; i < fluid_out_size; i++) {
155171
// Loose the threshold for CI in different machine model.
156172
EXPECT_LT(std::abs(fluid_out[i] - trt_out[i]), 2e-5);
157173
}
@@ -167,6 +183,12 @@ class TRTConvertValidation {
167183
std::unique_ptr<framework::OpDesc> op_desc_;
168184
const std::unordered_set<std::string>& parameters_;
169185
framework::Scope& scope_;
186+
// The ITensor of trt does not cotain the batch size,
187+
// bug, in most cases, we need to set batch size for
188+
// fluid's tensor shape. This variable indicates
189+
// whether to add batch size to tensor shape of fluid.
190+
bool if_add_batch_;
191+
int max_batch_size_;
170192
};
171193

172194
} // namespace tensorrt

paddle/fluid/inference/tensorrt/test_engine.cc

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
113113
ASSERT_EQ(y_cpu[1], 14.5);
114114
}
115115

116-
TEST_F(TensorRTEngineTest, test_conv2d_temp) {
116+
TEST_F(TensorRTEngineTest, test_conv2d) {
117117
// Weight in CPU memory.
118118
float raw_weight[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
119119
float raw_bias[1] = {0};
@@ -146,6 +146,37 @@ TEST_F(TensorRTEngineTest, test_conv2d_temp) {
146146
ASSERT_EQ(y_cpu[1], 6.0);
147147
}
148148

149+
TEST_F(TensorRTEngineTest, test_pool2d) {
150+
// Weight in CPU memory.
151+
auto* x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
152+
nvinfer1::Dims3{1, 2, 2});
153+
154+
nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kAVERAGE;
155+
auto* pool_layer =
156+
TRT_ENGINE_ADD_LAYER(engine_, Pooling, *const_cast<nvinfer1::ITensor*>(x),
157+
pool_t, nvinfer1::DimsHW{2, 2});
158+
159+
PADDLE_ENFORCE(pool_layer != nullptr);
160+
pool_layer->setStride(nvinfer1::DimsHW{1, 1});
161+
pool_layer->setPadding(nvinfer1::DimsHW{0, 0});
162+
163+
engine_->DeclareOutput(pool_layer, 0, "y");
164+
engine_->FreezeNetwork();
165+
ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
166+
167+
float x_v[8] = {1.0, 2.0, 5.0, 0.0, 2.0, 3.0, 5.0, 10.0};
168+
engine_->SetInputFromCPU("x", reinterpret_cast<void*>(&x_v),
169+
8 * sizeof(float));
170+
engine_->Execute(2);
171+
172+
LOG(INFO) << "to get output";
173+
float* y_cpu = new float[2];
174+
engine_->GetOutputInCPU("y", &y_cpu[0], 2 * sizeof(float));
175+
176+
ASSERT_EQ(y_cpu[0], 2.0);
177+
ASSERT_EQ(y_cpu[1], 5.0);
178+
}
179+
149180
} // namespace tensorrt
150181
} // namespace inference
151182
} // namespace paddle

0 commit comments

Comments
 (0)