Skip to content

Commit 2f27c04

Browse files
authored
Merge pull request #14440 from hjchen2/develop
Add PRelu tensorRT plugin and Conv2d transpose op converter
2 parents d971d5b + 6a7b995 commit 2f27c04

File tree

14 files changed

+541
-79
lines changed

14 files changed

+541
-79
lines changed

paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ void IrAnalysisComposePass::InitTensorRTAttrs(Argument *argument) {
4545
std::unordered_set<std::string> teller_set(
4646
{"mul", "conv2d", "pool2d", "relu", "softmax", "sigmoid",
4747
"depthwise_conv2d", "batch_norm", "concat", "tanh", "pad",
48-
"elementwise_add", "dropout", "split"});
48+
"elementwise_add", "dropout", "split", "prelu", "conv2d_transpose"});
4949
if (!node->IsOp()) return false;
5050

5151
if (teller_set.count(node->Op()->Type())) {

paddle/fluid/inference/api/analysis_predictor.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,4 +549,6 @@ USE_TRT_CONVERTER(concat);
549549
USE_TRT_CONVERTER(dropout);
550550
USE_TRT_CONVERTER(pad);
551551
USE_TRT_CONVERTER(split);
552+
USE_TRT_CONVERTER(prelu);
553+
USE_TRT_CONVERTER(conv2d_transpose);
552554
#endif

paddle/fluid/inference/tensorrt/convert/CMakeLists.txt

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
nv_library(tensorrt_converter
33
SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
44
batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc
5-
pad_op.cc split_op.cc
5+
pad_op.cc split_op.cc prelu_op.cc
66
DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
77

88
nv_test(test_op_converter SRCS test_op_converter.cc DEPS
@@ -16,7 +16,7 @@ nv_test(test_trt_fc_op SRCS test_fc_op.cc fc_op.cc
1616
nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
1717
DEPS ${FLUID_CORE_MODULES} tensorrt_engine activation_op SERIAL)
1818
nv_test(test_trt_conv_op SRCS test_conv2d_op.cc conv2d_op.cc
19-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine conv_op SERIAL)
19+
DEPS ${FLUID_CORE_MODULES} tensorrt_engine conv_op conv_transpose_op SERIAL)
2020
nv_test(test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc
2121
DEPS ${FLUID_CORE_MODULES} tensorrt_engine pool_op SERIAL)
2222
nv_test(test_trt_elementwise_op SRCS test_elementwise_op.cc elementwise_op.cc
@@ -33,4 +33,7 @@ nv_test(test_trt_pad_op SRCS test_pad_op.cc pad_op.cc
3333
DEPS ${FLUID_CORE_MODULES} tensorrt_engine pad_op SERIAL)
3434
nv_test(test_trt_split_op SRCS test_split_op.cc split_op.cc
3535
DEPS ${FLUID_CORE_MODULES} tensorrt_engine tensorrt_plugin
36-
split_op concat_op SERIAL)
36+
split_op concat_op SERIAL)
37+
nv_test(test_trt_prelu_op SRCS test_prelu_op.cc prelu_op.cc
38+
DEPS ${FLUID_CORE_MODULES} tensorrt_engine tensorrt_plugin
39+
prelu_op SERIAL)

paddle/fluid/inference/tensorrt/convert/conv2d_op.cc

Lines changed: 118 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -18,92 +18,139 @@ namespace paddle {
1818
namespace inference {
1919
namespace tensorrt {
2020

21-
bool to_skip_merging_optimize(TensorRTEngine* engine_,
21+
bool to_skip_merging_optimize(TensorRTEngine* engine,
2222
const std::vector<int>& filters,
2323
const std::vector<int>& strides,
2424
const std::vector<int>& paddings,
2525
std::string input_name) {
26-
if (engine_->itensor_quote_num[input_name] > 0) {
26+
if (engine->itensor_quote_num[input_name] > 0) {
2727
return true;
2828
}
2929
if (filters[0] == 1 && filters[1] == 1 && strides[0] == 1 &&
3030
strides[1] == 1 && paddings[0] == 0 && paddings[1] == 0)
31-
engine_->itensor_quote_num[input_name] += 1;
31+
engine->itensor_quote_num[input_name] += 1;
3232

3333
return false;
3434
}
3535

36+
template <typename RegistFunc, typename SetDilationFunc>
37+
void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
38+
const framework::Scope& scope, bool test_mode,
39+
RegistFunc fadd_layer, SetDilationFunc fset_dilation,
40+
const std::string& name) {
41+
VLOG(3) << "convert a fluid " << name << " op to tensorrt layer without bias";
42+
43+
framework::OpDesc op_desc(op, nullptr);
44+
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1);
45+
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1); // Y is a weight
46+
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1);
47+
48+
PADDLE_ENFORCE(engine != nullptr);
49+
auto* X = engine->GetITensor(op_desc.Input("Input").front());
50+
51+
// Declare weights
52+
auto* Y_v = scope.FindVar(op_desc.Input("Filter").front());
53+
PADDLE_ENFORCE_NOT_NULL(Y_v);
54+
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
55+
56+
platform::CPUPlace cpu_place;
57+
std::unique_ptr<framework::LoDTensor> weight_tensor(
58+
new framework::LoDTensor());
59+
weight_tensor->Resize(Y_t->dims());
60+
TensorCopySync((*Y_t), cpu_place, weight_tensor.get());
61+
62+
auto* weight_data = weight_tensor->mutable_data<float>(platform::CPUPlace());
63+
64+
PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
65+
const int n_output = weight_tensor->dims()[0];
66+
const int n_input = weight_tensor->dims()[1];
67+
const int filter_h = weight_tensor->dims()[2];
68+
const int filter_w = weight_tensor->dims()[3];
69+
const int groups = boost::get<int>(op_desc.GetAttr("groups"));
70+
const std::vector<int> dilations =
71+
boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
72+
const std::vector<int> strides =
73+
boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
74+
const std::vector<int> paddings =
75+
boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
76+
77+
nvinfer1::DimsHW nv_ksize(filter_h, filter_w);
78+
nvinfer1::DimsHW nv_dilations(dilations[0], dilations[1]);
79+
nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
80+
nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
81+
82+
TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
83+
static_cast<void*>(weight_data),
84+
static_cast<size_t>(weight_tensor->numel())};
85+
86+
TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
87+
auto* layer = fadd_layer(const_cast<nvinfer1::ITensor*>(X), n_output, n_input,
88+
nv_ksize, weight, bias);
89+
PADDLE_ENFORCE(layer != nullptr);
90+
layer->setStride(nv_strides);
91+
layer->setPadding(nv_paddings);
92+
layer->setNbGroups(groups);
93+
// set dilations
94+
fset_dilation(layer, nv_dilations);
95+
96+
auto output_name = op_desc.Output("Output").front();
97+
layer->setName((name + " (Output: " + output_name + ")").c_str());
98+
engine->weight_map[op_desc.Input("Filter").front()] =
99+
std::move(weight_tensor);
100+
layer->getOutput(0)->setName(output_name.c_str());
101+
engine->SetITensor(output_name, layer->getOutput(0));
102+
103+
if (test_mode ||
104+
to_skip_merging_optimize(engine, {filter_h, filter_w}, strides, paddings,
105+
op_desc.Input("Input").front())) {
106+
engine->DeclareOutput(output_name);
107+
}
108+
}
109+
36110
class Conv2dOpConverter : public OpConverter {
37111
public:
38112
void operator()(const framework::proto::OpDesc& op,
39113
const framework::Scope& scope, bool test_mode) override {
40-
VLOG(3) << "convert a fluid conv2d op to tensorrt conv layer without bias";
41-
42-
framework::OpDesc op_desc(op, nullptr);
43-
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1);
44-
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1); // Y is a weight
45-
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1);
46-
47-
auto* X = engine_->GetITensor(op_desc.Input("Input").front());
48-
49-
// Declare weights
50-
auto* Y_v = scope.FindVar(op_desc.Input("Filter").front());
51-
PADDLE_ENFORCE_NOT_NULL(Y_v);
52-
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
53-
54-
platform::CPUPlace cpu_place;
55-
std::unique_ptr<framework::LoDTensor> weight_tensor(
56-
new framework::LoDTensor());
57-
weight_tensor->Resize(Y_t->dims());
58-
TensorCopySync((*Y_t), cpu_place, weight_tensor.get());
59-
60-
auto* weight_data =
61-
weight_tensor->mutable_data<float>(platform::CPUPlace());
62-
63-
PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
64-
const int n_output = weight_tensor->dims()[0];
65-
const int filter_h = weight_tensor->dims()[2];
66-
const int filter_w = weight_tensor->dims()[3];
67-
68-
const int groups = boost::get<int>(op_desc.GetAttr("groups"));
69-
const std::vector<int> dilations =
70-
boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
71-
const std::vector<int> strides =
72-
boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
73-
const std::vector<int> paddings =
74-
boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
75-
76-
nvinfer1::DimsHW nv_ksize(filter_h, filter_w);
77-
nvinfer1::DimsHW nv_dilations(dilations[0], dilations[1]);
78-
nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
79-
nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
80-
81-
TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
82-
static_cast<void*>(weight_data),
83-
weight_tensor->memory_size() / sizeof(float)};
84-
85-
TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
86-
auto* layer = TRT_ENGINE_ADD_LAYER(
87-
engine_, Convolution, *const_cast<nvinfer1::ITensor*>(X), n_output,
88-
nv_ksize, weight.get(), bias.get());
89-
PADDLE_ENFORCE(layer != nullptr);
90-
layer->setStride(nv_strides);
91-
layer->setPadding(nv_paddings);
92-
layer->setDilation(nv_dilations);
93-
layer->setNbGroups(groups);
94-
95-
auto output_name = op_desc.Output("Output").front();
96-
layer->setName(("conv2d (Output: " + output_name + ")").c_str());
97-
engine_->weight_map[op_desc.Input("Filter").front()] =
98-
std::move(weight_tensor);
99-
layer->getOutput(0)->setName(output_name.c_str());
100-
engine_->SetITensor(output_name, layer->getOutput(0));
101-
102-
if (test_mode ||
103-
to_skip_merging_optimize(engine_, {filter_h, filter_w}, strides,
104-
paddings, op_desc.Input("Input").front())) {
105-
engine_->DeclareOutput(output_name);
106-
}
114+
ConvertConv2d(
115+
engine_, op, scope, test_mode,
116+
[&](nvinfer1::ITensor* inputs, int n_output, /* Conv output maps */
117+
int n_input, /* Conv input maps */
118+
nvinfer1::DimsHW& ksize, TensorRTEngine::Weight& weight,
119+
TensorRTEngine::Weight& bias) -> nvinfer1::IConvolutionLayer* {
120+
auto* layer =
121+
TRT_ENGINE_ADD_LAYER(engine_, Convolution, *inputs, n_output,
122+
ksize, weight.get(), bias.get());
123+
return layer;
124+
},
125+
[](nvinfer1::IConvolutionLayer* layer, nvinfer1::DimsHW& dilations) {
126+
layer->setDilation(dilations);
127+
},
128+
"conv2d");
129+
}
130+
};
131+
132+
class Deconv2dOpConverter : public OpConverter {
133+
public:
134+
void operator()(const framework::proto::OpDesc& op,
135+
const framework::Scope& scope, bool test_mode) override {
136+
ConvertConv2d(
137+
engine_, op, scope, test_mode,
138+
[&](nvinfer1::ITensor* inputs, int n_output, /* Deconv input maps */
139+
int n_input, /* Deconv output maps */
140+
nvinfer1::DimsHW& ksize, TensorRTEngine::Weight& weight,
141+
TensorRTEngine::Weight& bias) -> nvinfer1::IDeconvolutionLayer* {
142+
auto* layer =
143+
TRT_ENGINE_ADD_LAYER(engine_, Deconvolution, *inputs, n_input,
144+
ksize, weight.get(), bias.get());
145+
return layer;
146+
},
147+
[](nvinfer1::IDeconvolutionLayer* layer, nvinfer1::DimsHW& dilations) {
148+
PADDLE_ENFORCE(
149+
dilations.d[0] == 1 && dilations.d[1] == 1,
150+
"Dilations must be (1, 1) for tensorRT, but given (%d, %d)",
151+
dilations.d[0], dilations.d[1]);
152+
},
153+
"conv2d_transpose");
107154
}
108155
};
109156

@@ -112,3 +159,4 @@ class Conv2dOpConverter : public OpConverter {
112159
} // namespace paddle
113160

114161
REGISTER_TRT_OP_CONVERTER(conv2d, Conv2dOpConverter);
162+
REGISTER_TRT_OP_CONVERTER(conv2d_transpose, Deconv2dOpConverter);

paddle/fluid/inference/tensorrt/convert/elementwise_op.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ class ElementwiseWeightOpConverter : public OpConverter {
3434

3535
auto* X = engine_->GetITensor(op_desc.Input("X").front());
3636
nvinfer1::Dims dims_x = X->getDimensions();
37-
PADDLE_ENFORCE(dims_x.nbDims >= 3);
37+
PADDLE_ENFORCE(dims_x.nbDims >= 3, "x dims experts 3, but %d is given.",
38+
dims_x.nbDims);
3839

3940
auto* Y_v = scope.FindVar(op_desc.Input("Y").front());
4041
PADDLE_ENFORCE_NOT_NULL(Y_v);
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
16+
#include "paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h"
17+
18+
namespace paddle {
19+
namespace inference {
20+
namespace tensorrt {
21+
22+
/*
23+
* PRelu converter from fluid to tensorRT.
24+
*/
25+
class PReluOpConverter : public OpConverter {
26+
public:
27+
void operator()(const framework::proto::OpDesc& op,
28+
const framework::Scope& scope, bool test_mode) override {
29+
VLOG(4) << "convert fluid prelu op to tensorrt prelu layer";
30+
31+
framework::OpDesc op_desc(op, nullptr);
32+
// Declare inputs
33+
int input_num = op_desc.Input("X").size();
34+
PADDLE_ENFORCE(input_num == 1);
35+
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
36+
// Get output
37+
size_t output_num = op_desc.Output("Out").size();
38+
PADDLE_ENFORCE(output_num == 1);
39+
// Get attrs
40+
std::string mode = boost::get<std::string>(op_desc.GetAttr("mode"));
41+
//
42+
auto* alpha_var = scope.FindVar(op_desc.Input("Alpha")[0]);
43+
PADDLE_ENFORCE_NOT_NULL(alpha_var);
44+
auto* alpha_tensor = alpha_var->GetMutable<framework::LoDTensor>();
45+
46+
platform::CUDAPlace place;
47+
std::unique_ptr<framework::LoDTensor> alpha_tensor_device(
48+
new framework::LoDTensor());
49+
alpha_tensor_device->Resize(alpha_tensor->dims());
50+
TensorCopySync(*alpha_tensor, place, alpha_tensor_device.get());
51+
float* alpha_data = alpha_tensor_device->mutable_data<float>(place);
52+
53+
// Transform alpha to TensorRTEngine::Weight
54+
TensorRTEngine::Weight alpha_rt(nvinfer1::DataType::kFLOAT,
55+
static_cast<void*>(alpha_data),
56+
alpha_tensor_device->numel());
57+
PReluPlugin* plugin = new PReluPlugin(alpha_rt, mode);
58+
nvinfer1::IPluginLayer* layer =
59+
engine_->AddPlugin(&input, input_num, plugin);
60+
// keep alpha tensor to avoid release it's memory
61+
engine_->weight_map[op_desc.Input("Alpha")[0]] =
62+
std::move(alpha_tensor_device);
63+
64+
std::string layer_name = "prelu (Output: ";
65+
auto output_name = op_desc.Output("Out")[0];
66+
layer->getOutput(0)->setName(output_name.c_str());
67+
engine_->SetITensor(output_name, layer->getOutput(0));
68+
layer_name += output_name;
69+
if (test_mode) {
70+
engine_->DeclareOutput(output_name);
71+
}
72+
layer->setName((layer_name + ")").c_str());
73+
}
74+
};
75+
76+
} // namespace tensorrt
77+
} // namespace inference
78+
} // namespace paddle
79+
80+
REGISTER_TRT_OP_CONVERTER(prelu, PReluOpConverter);

paddle/fluid/inference/tensorrt/convert/split_op.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class SplitOpConverter : public OpConverter {
2626
public:
2727
void operator()(const framework::proto::OpDesc& op,
2828
const framework::Scope& scope, bool test_mode) override {
29-
VLOG(40) << "convert a fluid split op to tensorrt split layer";
29+
VLOG(4) << "convert a fluid split op to tensorrt split layer";
3030

3131
framework::OpDesc op_desc(op, nullptr);
3232
// Declare inputs

0 commit comments

Comments
 (0)