Skip to content

Commit ec9eb22

Browse files
authored
Merge pull request #13039 from NHZlX/release_trt_submit
Tensorrt support : mobilenet resnet50
2 parents 1ca2cde + 0e40429 commit ec9eb22

29 files changed

+642
-192
lines changed

paddle/fluid/inference/analysis/analyzer.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
namespace paddle {
2626

27-
DEFINE_bool(inference_analysis_enable_tensorrt_subgraph_engine, true,
27+
DEFINE_bool(inference_analysis_enable_tensorrt_subgraph_engine, false,
2828
"Enable subgraph to TensorRT engine for acceleration");
2929

3030
DEFINE_string(inference_analysis_graphviz_log_root, "./",
@@ -44,7 +44,8 @@ class DfgPassManagerImpl final : public DfgPassManager {
4444
if (FLAGS_inference_analysis_enable_tensorrt_subgraph_engine) {
4545
auto trt_teller = [&](const Node* node) {
4646
std::unordered_set<std::string> teller_set(
47-
{"elementwise_add", "mul", "conv2d", "pool2d", "relu", "softmax"});
47+
{"elementwise_add", "mul", "conv2d", "pool2d", "relu", "softmax",
48+
"depthwise_conv2d", "batch_norm", "concat"});
4849
if (!node->IsFunction()) return false;
4950

5051
const auto* func = static_cast<const Function*>(node);

paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,6 @@
2323
namespace paddle {
2424
namespace inference {
2525

26-
DEFINE_int32(tensorrt_max_batchsize, 3, "TensorRT maximum batch size");
27-
DEFINE_int32(tensorrt_workspace_size, 2048, "TensorRT workspace size");
28-
2926
namespace analysis {
3027

3128
using framework::proto::ProgramDesc;
@@ -52,7 +49,6 @@ bool DataFlowGraphToFluidPass::Initialize(Argument *argument) {
5249
bool DataFlowGraphToFluidPass::Finalize() { return true; }
5350

5451
void DataFlowGraphToFluidPass::Run(DataFlowGraph *graph) {
55-
FilterRedundantOutputOfSubGraph(graph);
5652
LOG(INFO) << "graph.inputs " << graph->inputs.size();
5753
for (auto &node : GraphTraits<DataFlowGraph>(graph).nodes_in_TS()) {
5854
if (node.deleted()) continue;
@@ -191,8 +187,6 @@ void CreateTrtEngineOp(Node *node, const DataFlowGraph &graph,
191187
// Set attrs
192188
SetAttr(desc.Proto(), "subgraph", block->SerializeAsString());
193189
SetAttr(desc.Proto(), "engine_uniq_key", "trt-" + std::to_string(counter++));
194-
SetAttr(desc.Proto(), "max_batch", FLAGS_tensorrt_max_batchsize);
195-
SetAttr(desc.Proto(), "max_workspace", FLAGS_tensorrt_workspace_size);
196190
SetAttr(desc.Proto(), "parameters", ExtractParameters(graph.nodes.nodes()));
197191
SetAttr(desc.Proto(), "output_name_mapping", output_mapping);
198192
node->SetPbMsg(desc.Proto()->SerializeAsString());

paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@
2727
namespace paddle {
2828
namespace inference {
2929

30-
DECLARE_int32(tensorrt_max_batchsize);
31-
DECLARE_int32(tensorrt_workspace_size);
32-
3330
namespace analysis {
3431
class DataFlowGraphToFluidPass final : public DataFlowGraphPass {
3532
public:

paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
9292
auto *in = graph->nodes.GetMutable(var2id.at(in_var.arguments(k)));
9393
in->outlinks.push_back(o);
9494
o->inlinks.push_back(in);
95+
unique_written_vars.insert(in);
9596
}
9697
}
9798
for (int j = 0; j < op.outputs_size(); j++) {
@@ -112,7 +113,6 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
112113
}
113114
out->inlinks.push_back(o);
114115
o->outlinks.push_back(out);
115-
unique_written_vars.insert(out);
116116
}
117117
}
118118
}

paddle/fluid/inference/analysis/subgraph_splitter.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ void SubGraphFuse::ReplaceNodesWithSubGraphs() {
153153
inlink_or_outlink_cleaner(o->inlinks);
154154
}
155155
}
156+
FilterRedundantOutputOfSubGraph(graph_);
156157
}
157158

158159
} // namespace analysis

paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "paddle/fluid/inference/analysis/analyzer.h"
1616
#include "paddle/fluid/inference/api/api_impl.h"
1717
#include "paddle/fluid/inference/api/paddle_inference_api.h"
18+
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
1819
#include "paddle/fluid/inference/utils/singleton.h"
1920
#include "paddle/fluid/operators/tensorrt_engine_op.h"
2021

@@ -32,7 +33,9 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
3233

3334
bool Init(const std::shared_ptr<framework::Scope>& parent_scope) {
3435
VLOG(3) << "Predictor::init()";
35-
36+
FLAGS_inference_analysis_enable_tensorrt_subgraph_engine = true;
37+
FLAGS_tensorrt_max_batch_size = config_.max_batch_size;
38+
FLAGS_tensorrt_workspace_size = config_.workspace_size;
3639
if (config_.use_gpu) {
3740
place_ = paddle::platform::CUDAPlace(config_.device);
3841
} else {
@@ -150,3 +153,13 @@ CreatePaddlePredictor<TensorRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
150153
}
151154

152155
} // namespace paddle
156+
157+
USE_TRT_CONVERTER(elementwise_add_weight);
158+
USE_TRT_CONVERTER(mul);
159+
USE_TRT_CONVERTER(conv2d);
160+
USE_TRT_CONVERTER(relu);
161+
USE_TRT_CONVERTER(fc);
162+
USE_TRT_CONVERTER(pool2d);
163+
USE_TRT_CONVERTER(softmax);
164+
USE_TRT_CONVERTER(batch_norm);
165+
USE_TRT_CONVERTER(concat);

paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) {
3737
config1.use_gpu = true;
3838
config1.fraction_of_gpu_memory = 0.3;
3939
config1.device = 0;
40+
config1.max_batch_size = 10;
4041

4142
auto predictor0 =
4243
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config0);

paddle/fluid/inference/api/paddle_inference_api.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,14 @@ struct AnakinConfig : public PaddlePredictor::Config {
137137
struct TensorRTConfig : public NativeConfig {
138138
// Determine whether a subgraph will be executed by TRT.
139139
int min_subgraph_size{1};
140+
// While TensorRT allows an engine optimized for a given max batch size
141+
// to run at any smaller size, the performance for those smaller
142+
// sizes may not be as well-optimized. Therefore, Max batch is best
143+
// equivalent to the runtime batch size.
144+
int max_batch_size{1};
145+
// For workspace_size, refer it from here:
146+
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
147+
int workspace_size{1 << 30};
140148
};
141149

142150
// A factory to help create different predictors.

paddle/fluid/inference/tensorrt/convert/CMakeLists.txt

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Add TRT tests
22
nv_library(tensorrt_converter
33
SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
4-
activation_op.cc softmax_op.cc
4+
batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc
55
DEPS tensorrt_engine operator scope framework_proto op_registry)
66

77
nv_test(test_op_converter SRCS test_op_converter.cc DEPS
@@ -18,9 +18,12 @@ nv_test(test_trt_conv_op SRCS test_conv2d_op.cc conv2d_op.cc
1818
DEPS ${FLUID_CORE_MODULES} tensorrt_engine conv_op SERIAL)
1919
nv_test(test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc
2020
DEPS ${FLUID_CORE_MODULES} tensorrt_engine pool_op SERIAL)
21-
2221
nv_test(test_trt_elementwise_op SRCS test_elementwise_op.cc elementwise_op.cc
2322
DEPS ${FLUID_CORE_MODULES} tensorrt_engine elementwise_add_op SERIAL)
24-
2523
nv_test(test_trt_softmax_op SRCS test_softmax_op.cc softmax_op.cc
2624
DEPS ${FLUID_CORE_MODULES} tensorrt_engine softmax_op SERIAL)
25+
nv_test(test_trt_batch_norm_op SRCS test_batch_norm_op.cc batch_norm_op.cc
26+
DEPS ${FLUID_CORE_MODULES} tensorrt_engine batch_norm_op SERIAL)
27+
28+
nv_test(test_trt_concat_op SRCS test_concat_op.cc concat_op.cc
29+
DEPS ${FLUID_CORE_MODULES} tensorrt_engine concat_op SERIAL)
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include <math.h>
16+
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
17+
18+
namespace paddle {
19+
namespace inference {
20+
namespace tensorrt {
21+
22+
class BatchNormOpConverter : public OpConverter {
23+
public:
24+
void operator()(const framework::proto::OpDesc& op,
25+
const framework::Scope& scope, bool test_mode) override {
26+
LOG(INFO) << "convert a fluid batch norm op to tensorrt batch_norm";
27+
28+
framework::OpDesc op_desc(op, nullptr);
29+
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
30+
PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1); // Bias is a weight
31+
PADDLE_ENFORCE_EQ(op_desc.Input("Mean").size(), 1); // Mean is a weight
32+
PADDLE_ENFORCE_EQ(op_desc.Input("Scale").size(), 1); // Scale is a weight
33+
PADDLE_ENFORCE_EQ(op_desc.Input("Variance").size(),
34+
1); // Variance is a weight
35+
PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1);
36+
37+
auto* X = engine_->GetITensor(op_desc.Input("X").front());
38+
// Declare weights
39+
auto* Bias_v = scope.FindVar(op_desc.Input("Bias").front());
40+
auto* Mean_v = scope.FindVar(op_desc.Input("Mean").front());
41+
auto* Scale_v = scope.FindVar(op_desc.Input("Scale").front());
42+
auto* Variance_v = scope.FindVar(op_desc.Input("Variance").front());
43+
const float eps = boost::get<float>(op_desc.GetAttr("epsilon"));
44+
45+
PADDLE_ENFORCE_NOT_NULL(Bias_v);
46+
PADDLE_ENFORCE_NOT_NULL(Mean_v);
47+
PADDLE_ENFORCE_NOT_NULL(Scale_v);
48+
PADDLE_ENFORCE_NOT_NULL(Variance_v);
49+
50+
// get tensor
51+
auto* Bias_t = Bias_v->GetMutable<framework::LoDTensor>();
52+
auto* Mean_t = Mean_v->GetMutable<framework::LoDTensor>();
53+
auto* Scale_t = Scale_v->GetMutable<framework::LoDTensor>();
54+
auto* Variance_t = Variance_v->GetMutable<framework::LoDTensor>();
55+
56+
// create temp tensor for weights
57+
framework::LoDTensor bias_tensor;
58+
framework::LoDTensor mean_tensor;
59+
framework::LoDTensor scale_tensor;
60+
framework::LoDTensor variance_tensor;
61+
62+
bias_tensor.Resize(Bias_t->dims());
63+
mean_tensor.Resize(Mean_t->dims());
64+
scale_tensor.Resize(Scale_t->dims());
65+
variance_tensor.Resize(Variance_t->dims());
66+
67+
platform::CPUPlace cpu_place;
68+
// copy data from gpu to cpu
69+
TensorCopySync((*Bias_t), cpu_place, &bias_tensor);
70+
TensorCopySync((*Mean_t), cpu_place, &mean_tensor);
71+
TensorCopySync((*Scale_t), cpu_place, &scale_tensor);
72+
TensorCopySync((*Variance_t), cpu_place, &variance_tensor);
73+
74+
auto* bias_data = bias_tensor.mutable_data<float>(platform::CPUPlace());
75+
auto* mean_data = mean_tensor.mutable_data<float>(platform::CPUPlace());
76+
auto* scale_data = scale_tensor.mutable_data<float>(platform::CPUPlace());
77+
auto* variance_data =
78+
variance_tensor.mutable_data<float>(platform::CPUPlace());
79+
80+
std::unique_ptr<framework::LoDTensor> combile_scale_tensor(
81+
new framework::LoDTensor());
82+
std::unique_ptr<framework::LoDTensor> combile_bias_tensor(
83+
new framework::LoDTensor());
84+
85+
combile_scale_tensor->Resize(scale_tensor.dims());
86+
combile_bias_tensor->Resize(bias_tensor.dims());
87+
88+
auto* combile_scale_data =
89+
combile_scale_tensor->mutable_data<float>(platform::CPUPlace());
90+
auto* combile_bias_data =
91+
combile_bias_tensor->mutable_data<float>(platform::CPUPlace());
92+
93+
size_t ele_num = combile_scale_tensor->memory_size() / sizeof(float);
94+
95+
for (size_t i = 0; i < ele_num; i++) {
96+
float scale = scale_data[i];
97+
float bias = bias_data[i];
98+
float mean = mean_data[i];
99+
float variance = variance_data[i];
100+
combile_scale_data[i] = scale / sqrtf(variance + eps);
101+
combile_bias_data[i] = bias - mean * combile_scale_data[i];
102+
}
103+
104+
TensorRTEngine::Weight scale_weights{
105+
nvinfer1::DataType::kFLOAT, static_cast<void*>(combile_scale_data),
106+
combile_scale_tensor->memory_size() / sizeof(float)};
107+
TensorRTEngine::Weight shift_weights{
108+
nvinfer1::DataType::kFLOAT, static_cast<void*>(combile_bias_data),
109+
combile_bias_tensor->memory_size() / sizeof(float)};
110+
TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr,
111+
0};
112+
113+
nvinfer1::IScaleLayer* layer =
114+
TRT_ENGINE_ADD_LAYER(engine_, Scale, *const_cast<nvinfer1::ITensor*>(X),
115+
nvinfer1::ScaleMode::kCHANNEL, shift_weights.get(),
116+
scale_weights.get(), power_weights.get());
117+
118+
auto output_name = op_desc.Output("Y").front();
119+
engine_->weight_map[op_desc.Input("Bias").front()] =
120+
std::move(combile_bias_tensor);
121+
engine_->weight_map[op_desc.Input("Scale").front()] =
122+
std::move(combile_scale_tensor);
123+
124+
engine_->SetITensor(output_name, layer->getOutput(0));
125+
126+
if (test_mode) {
127+
engine_->DeclareOutput(output_name);
128+
}
129+
}
130+
};
131+
132+
} // namespace tensorrt
133+
} // namespace inference
134+
} // namespace paddle
135+
136+
REGISTER_TRT_OP_CONVERTER(batch_norm, BatchNormOpConverter);

0 commit comments

Comments
 (0)