Skip to content

Commit 9d80edd

Browse files
NHZlXPaddle CI
authored andcommitted
Refine Batch norm trt converter
Because of the changing trt engine op from gpu to gpu mode. (cherry-pick from commit 145b20c160329428c1ec05c94287708aa1ed6f63)
1 parent 63d066b commit 9d80edd

File tree

3 files changed

+61
-42
lines changed

3 files changed

+61
-42
lines changed

paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc

Lines changed: 42 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

15-
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
1615
#include <math.h>
16+
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
1717

1818
namespace paddle {
1919
namespace inference {
@@ -23,15 +23,15 @@ class BatchNormOpConverter : public OpConverter {
2323
public:
2424
void operator()(const framework::proto::OpDesc& op,
2525
const framework::Scope& scope, bool test_mode) override {
26-
LOG(INFO)
27-
<< "convert a fluid batch norm op to tensorrt batch_norm";
26+
LOG(INFO) << "convert a fluid batch norm op to tensorrt batch_norm";
2827

2928
framework::OpDesc op_desc(op, nullptr);
3029
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
31-
PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1); // Bias is a weight
32-
PADDLE_ENFORCE_EQ(op_desc.Input("Mean").size(), 1); // Mean is a weight
30+
PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1); // Bias is a weight
31+
PADDLE_ENFORCE_EQ(op_desc.Input("Mean").size(), 1); // Mean is a weight
3332
PADDLE_ENFORCE_EQ(op_desc.Input("Scale").size(), 1); // Scale is a weight
34-
PADDLE_ENFORCE_EQ(op_desc.Input("Variance").size(), 1); // Variance is a weight
33+
PADDLE_ENFORCE_EQ(op_desc.Input("Variance").size(),
34+
1); // Variance is a weight
3535
PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1);
3636

3737
auto* X = engine_->GetITensor(op_desc.Input("X").front());
@@ -53,7 +53,6 @@ class BatchNormOpConverter : public OpConverter {
5353
auto* Scale_t = Scale_v->GetMutable<framework::LoDTensor>();
5454
auto* Variance_t = Variance_v->GetMutable<framework::LoDTensor>();
5555

56-
5756
// create temp tensor for weights
5857
framework::LoDTensor bias_tensor;
5958
framework::LoDTensor mean_tensor;
@@ -64,9 +63,9 @@ class BatchNormOpConverter : public OpConverter {
6463
mean_tensor.Resize(Mean_t->dims());
6564
scale_tensor.Resize(Scale_t->dims());
6665
variance_tensor.Resize(Variance_t->dims());
67-
66+
6867
platform::CPUPlace cpu_place;
69-
// copy data from gpu to cpu
68+
// copy data from gpu to cpu
7069
TensorCopySync((*Bias_t), cpu_place, &bias_tensor);
7170
TensorCopySync((*Mean_t), cpu_place, &mean_tensor);
7271
TensorCopySync((*Scale_t), cpu_place, &scale_tensor);
@@ -75,47 +74,53 @@ class BatchNormOpConverter : public OpConverter {
7574
auto* bias_data = bias_tensor.mutable_data<float>(platform::CPUPlace());
7675
auto* mean_data = mean_tensor.mutable_data<float>(platform::CPUPlace());
7776
auto* scale_data = scale_tensor.mutable_data<float>(platform::CPUPlace());
78-
auto* variance_data = variance_tensor.mutable_data<float>(platform::CPUPlace());
79-
80-
framework::LoDTensor *combile_scale_tensor = new framework::LoDTensor();
81-
framework::LoDTensor *combile_bias_tensor = new framework::LoDTensor();
77+
auto* variance_data =
78+
variance_tensor.mutable_data<float>(platform::CPUPlace());
79+
80+
std::unique_ptr<framework::LoDTensor> combile_scale_tensor(
81+
new framework::LoDTensor());
82+
std::unique_ptr<framework::LoDTensor> combile_bias_tensor(
83+
new framework::LoDTensor());
8284

8385
combile_scale_tensor->Resize(scale_tensor.dims());
8486
combile_bias_tensor->Resize(bias_tensor.dims());
8587

86-
auto* combile_scale_data = combile_scale_tensor->mutable_data<float>(platform::CPUPlace());
87-
auto* combile_bias_data = combile_bias_tensor->mutable_data<float>(platform::CPUPlace());
88+
auto* combile_scale_data =
89+
combile_scale_tensor->mutable_data<float>(platform::CPUPlace());
90+
auto* combile_bias_data =
91+
combile_bias_tensor->mutable_data<float>(platform::CPUPlace());
92+
93+
size_t ele_num = combile_scale_tensor->memory_size() / sizeof(float);
8894

89-
engine_->weight_map_[op_desc.Input("Bias").front()] = std::move(std::unique_ptr<framework::Tensor>(combile_bias_tensor));
90-
engine_->weight_map_[op_desc.Input("Scale").front()] = std::move(std::unique_ptr<framework::Tensor>(combile_scale_tensor));
91-
92-
size_t ele_num = combile_scale_tensor->memory_size()/sizeof(float);
93-
9495
for (size_t i = 0; i < ele_num; i++) {
95-
float scale = scale_data[i];
96-
float bias = bias_data[i];
97-
float mean = mean_data[i];
98-
float variance = variance_data[i];
99-
combile_scale_data[i] = scale / sqrtf(variance + eps);
100-
combile_bias_data[i] = bias - mean * combile_scale_data[i];
96+
float scale = scale_data[i];
97+
float bias = bias_data[i];
98+
float mean = mean_data[i];
99+
float variance = variance_data[i];
100+
combile_scale_data[i] = scale / sqrtf(variance + eps);
101+
combile_bias_data[i] = bias - mean * combile_scale_data[i];
101102
}
102103

103-
104-
TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT,
105-
static_cast<void*>(combile_scale_data),
106-
combile_scale_tensor->memory_size() / sizeof(float)};
107-
TensorRTEngine::Weight shift_weights{nvinfer1::DataType::kFLOAT,
108-
static_cast<void *>(combile_bias_data),
109-
combile_bias_tensor->memory_size()/ sizeof(float)};
104+
TensorRTEngine::Weight scale_weights{
105+
nvinfer1::DataType::kFLOAT, static_cast<void*>(combile_scale_data),
106+
combile_scale_tensor->memory_size() / sizeof(float)};
107+
TensorRTEngine::Weight shift_weights{
108+
nvinfer1::DataType::kFLOAT, static_cast<void*>(combile_bias_data),
109+
combile_bias_tensor->memory_size() / sizeof(float)};
110110
TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr,
111111
0};
112112

113-
114-
nvinfer1::IScaleLayer* layer = TRT_ENGINE_ADD_LAYER(
115-
engine_, Scale, *const_cast<nvinfer1::ITensor*>(X), nvinfer1::ScaleMode::kCHANNEL,
116-
shift_weights.get(), scale_weights.get(), power_weights.get());
113+
nvinfer1::IScaleLayer* layer =
114+
TRT_ENGINE_ADD_LAYER(engine_, Scale, *const_cast<nvinfer1::ITensor*>(X),
115+
nvinfer1::ScaleMode::kCHANNEL, shift_weights.get(),
116+
scale_weights.get(), power_weights.get());
117117

118118
auto output_name = op_desc.Output("Y").front();
119+
engine_->weight_map[op_desc.Input("Bias").front()] =
120+
std::move(combile_bias_tensor);
121+
engine_->weight_map[op_desc.Input("Scale").front()] =
122+
std::move(combile_scale_tensor);
123+
119124
engine_->SetITensor(output_name, layer->getOutput(0));
120125

121126
if (test_mode) {

paddle/fluid/inference/tensorrt/convert/test_batch_norm_op.cc

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@ namespace inference {
2121
namespace tensorrt {
2222

2323
TEST(batch_norm_op, test) {
24-
std::unordered_set<std::string> parameters({"batch_norm_scale",
25-
"batch_norm_bias", "batch_norm_mean", "batch_norm_variance" });
24+
std::unordered_set<std::string> parameters(
25+
{"batch_norm_scale", "batch_norm_bias", "batch_norm_mean",
26+
"batch_norm_variance"});
2627
framework::Scope scope;
2728
TRTConvertValidation validator(5, parameters, scope, 1 << 15);
2829
std::vector<int> param_shape{2};
@@ -38,6 +39,7 @@ TEST(batch_norm_op, test) {
3839

3940
// Prepare Op description
4041
framework::OpDesc desc;
42+
4143
desc.SetType("batch_norm");
4244
desc.SetInput("X", {"batch_norm_X"});
4345
desc.SetInput("Scale", {"batch_norm_scale"});
@@ -54,10 +56,12 @@ TEST(batch_norm_op, test) {
5456
bool is_test = true;
5557
desc.SetAttr("epsilon", eps);
5658
desc.SetAttr("is_test", is_test);
57-
59+
5860
validator.SetOp(*desc.Proto());
5961

60-
std::unordered_set<std::string> neglected_output = {"batch_norm_save_mean", "batch_norm_save_variance", "batch_norm_mean", "batch_norm_variance"};
62+
std::unordered_set<std::string> neglected_output = {
63+
"batch_norm_save_mean", "batch_norm_save_variance", "batch_norm_mean",
64+
"batch_norm_variance"};
6165
validator.Execute(3, neglected_output);
6266
}
6367

paddle/fluid/inference/tensorrt/convert/ut_helper.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,19 @@ class TRTConvertValidation {
9898
engine_->DeclareInput(name, nvinfer1::DataType::kFLOAT, dims);
9999
}
100100

101+
void DeclParamVar(const std::string& name, const std::vector<int> dim_vec) {
102+
DeclVar(name, dim_vec);
103+
}
104+
101105
// Declare a parameter varaible in the scope.
102106
void DeclParamVar(const std::string& name, const nvinfer1::Dims& dims) {
103107
DeclVar(name, dims, true);
104108
}
105109

110+
void DeclOutputVar(const std::string& name, const std::vector<int> dim_vec) {
111+
DeclVar(name, dim_vec);
112+
}
113+
106114
void DeclOutputVar(const std::string& name, const nvinfer1::Dims& dims) {
107115
DeclVar(name, dims);
108116
}
@@ -155,7 +163,8 @@ class TRTConvertValidation {
155163
}
156164
}
157165

158-
void Execute(int batch_size) {
166+
void Execute(int batch_size,
167+
std::unordered_set<std::string> neglected_output = {}) {
159168
// Execute Fluid Op
160169
PADDLE_ENFORCE_LE(batch_size, max_batch_size_);
161170
platform::CUDAPlace place;
@@ -168,6 +177,7 @@ class TRTConvertValidation {
168177
ASSERT_FALSE(op_desc_->OutputArgumentNames().empty());
169178
const size_t output_space_size = 3000;
170179
for (const auto& output : op_desc_->OutputArgumentNames()) {
180+
if (neglected_output.count(output)) continue;
171181
std::vector<float> fluid_out;
172182
std::vector<float> trt_out(output_space_size);
173183
engine_->GetOutputInCPU(output, &trt_out[0], output_space_size);

0 commit comments

Comments
 (0)