|
| 1 | +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. |
| 2 | +
|
| 3 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +you may not use this file except in compliance with the License. |
| 5 | +You may obtain a copy of the License at |
| 6 | +
|
| 7 | +http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +
|
| 9 | +Unless required by applicable law or agreed to in writing, software |
| 10 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +See the License for the specific language governing permissions and |
| 13 | +limitations under the License. */ |
| 14 | + |
| 15 | +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" |
| 16 | +#include <math.h> |
| 17 | + |
| 18 | +namespace paddle { |
| 19 | +namespace inference { |
| 20 | +namespace tensorrt { |
| 21 | + |
| 22 | +class BatchNormOpConverter : public OpConverter { |
| 23 | + public: |
| 24 | + void operator()(const framework::proto::OpDesc& op, |
| 25 | + const framework::Scope& scope, bool test_mode) override { |
| 26 | + LOG(INFO) |
| 27 | + << "convert a fluid batch norm op to tensorrt batch_norm"; |
| 28 | + |
| 29 | + framework::OpDesc op_desc(op, nullptr); |
| 30 | + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); |
| 31 | + PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1); // Bias is a weight |
| 32 | + PADDLE_ENFORCE_EQ(op_desc.Input("Mean").size(), 1); // Mean is a weight |
| 33 | + PADDLE_ENFORCE_EQ(op_desc.Input("Scale").size(), 1); // Scale is a weight |
| 34 | + PADDLE_ENFORCE_EQ(op_desc.Input("Variance").size(), 1); // Variance is a weight |
| 35 | + PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1); |
| 36 | + |
| 37 | + auto* X = engine_->GetITensor(op_desc.Input("X").front()); |
| 38 | + // Declare weights |
| 39 | + auto* Bias_v = scope.FindVar(op_desc.Input("Bias").front()); |
| 40 | + auto* Mean_v = scope.FindVar(op_desc.Input("Mean").front()); |
| 41 | + auto* Scale_v = scope.FindVar(op_desc.Input("Scale").front()); |
| 42 | + auto* Variance_v = scope.FindVar(op_desc.Input("Variance").front()); |
| 43 | + const float eps = boost::get<float>(op_desc.GetAttr("epsilon")); |
| 44 | + |
| 45 | + PADDLE_ENFORCE_NOT_NULL(Bias_v); |
| 46 | + PADDLE_ENFORCE_NOT_NULL(Mean_v); |
| 47 | + PADDLE_ENFORCE_NOT_NULL(Scale_v); |
| 48 | + PADDLE_ENFORCE_NOT_NULL(Variance_v); |
| 49 | + |
| 50 | + // get tensor |
| 51 | + auto* Bias_t = Bias_v->GetMutable<framework::LoDTensor>(); |
| 52 | + auto* Mean_t = Mean_v->GetMutable<framework::LoDTensor>(); |
| 53 | + auto* Scale_t = Scale_v->GetMutable<framework::LoDTensor>(); |
| 54 | + auto* Variance_t = Variance_v->GetMutable<framework::LoDTensor>(); |
| 55 | + |
| 56 | + |
| 57 | + // create temp tensor for weights |
| 58 | + framework::LoDTensor bias_tensor; |
| 59 | + framework::LoDTensor mean_tensor; |
| 60 | + framework::LoDTensor scale_tensor; |
| 61 | + framework::LoDTensor variance_tensor; |
| 62 | + |
| 63 | + bias_tensor.Resize(Bias_t->dims()); |
| 64 | + mean_tensor.Resize(Mean_t->dims()); |
| 65 | + scale_tensor.Resize(Scale_t->dims()); |
| 66 | + variance_tensor.Resize(Variance_t->dims()); |
| 67 | + |
| 68 | + platform::CPUPlace cpu_place; |
| 69 | + // copy data from gpu to cpu |
| 70 | + TensorCopySync((*Bias_t), cpu_place, &bias_tensor); |
| 71 | + TensorCopySync((*Mean_t), cpu_place, &mean_tensor); |
| 72 | + TensorCopySync((*Scale_t), cpu_place, &scale_tensor); |
| 73 | + TensorCopySync((*Variance_t), cpu_place, &variance_tensor); |
| 74 | + |
| 75 | + auto* bias_data = bias_tensor.mutable_data<float>(platform::CPUPlace()); |
| 76 | + auto* mean_data = mean_tensor.mutable_data<float>(platform::CPUPlace()); |
| 77 | + auto* scale_data = scale_tensor.mutable_data<float>(platform::CPUPlace()); |
| 78 | + auto* variance_data = variance_tensor.mutable_data<float>(platform::CPUPlace()); |
| 79 | + |
| 80 | + framework::LoDTensor *combile_scale_tensor = new framework::LoDTensor(); |
| 81 | + framework::LoDTensor *combile_bias_tensor = new framework::LoDTensor(); |
| 82 | + |
| 83 | + combile_scale_tensor->Resize(scale_tensor.dims()); |
| 84 | + combile_bias_tensor->Resize(bias_tensor.dims()); |
| 85 | + |
| 86 | + auto* combile_scale_data = combile_scale_tensor->mutable_data<float>(platform::CPUPlace()); |
| 87 | + auto* combile_bias_data = combile_bias_tensor->mutable_data<float>(platform::CPUPlace()); |
| 88 | + |
| 89 | + engine_->weight_map_[op_desc.Input("Bias").front()] = std::move(std::unique_ptr<framework::Tensor>(combile_bias_tensor)); |
| 90 | + engine_->weight_map_[op_desc.Input("Scale").front()] = std::move(std::unique_ptr<framework::Tensor>(combile_scale_tensor)); |
| 91 | + |
| 92 | + size_t ele_num = combile_scale_tensor->memory_size()/sizeof(float); |
| 93 | + |
| 94 | + for (size_t i = 0; i < ele_num; i++) { |
| 95 | + float scale = scale_data[i]; |
| 96 | + float bias = bias_data[i]; |
| 97 | + float mean = mean_data[i]; |
| 98 | + float variance = variance_data[i]; |
| 99 | + combile_scale_data[i] = scale / sqrtf(variance + eps); |
| 100 | + combile_bias_data[i] = bias - mean * combile_scale_data[i]; |
| 101 | + } |
| 102 | + |
| 103 | + |
| 104 | + TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT, |
| 105 | + static_cast<void*>(combile_scale_data), |
| 106 | + combile_scale_tensor->memory_size() / sizeof(float)}; |
| 107 | + TensorRTEngine::Weight shift_weights{nvinfer1::DataType::kFLOAT, |
| 108 | + static_cast<void *>(combile_bias_data), |
| 109 | + combile_bias_tensor->memory_size()/ sizeof(float)}; |
| 110 | + TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr, |
| 111 | + 0}; |
| 112 | + |
| 113 | + |
| 114 | + nvinfer1::IScaleLayer* layer = TRT_ENGINE_ADD_LAYER( |
| 115 | + engine_, Scale, *const_cast<nvinfer1::ITensor*>(X), nvinfer1::ScaleMode::kCHANNEL, |
| 116 | + shift_weights.get(), scale_weights.get(), power_weights.get()); |
| 117 | + |
| 118 | + auto output_name = op_desc.Output("Y").front(); |
| 119 | + engine_->SetITensor(output_name, layer->getOutput(0)); |
| 120 | + |
| 121 | + if (test_mode) { |
| 122 | + engine_->DeclareOutput(output_name); |
| 123 | + } |
| 124 | + } |
| 125 | +}; |
| 126 | + |
| 127 | +} // namespace tensorrt |
| 128 | +} // namespace inference |
| 129 | +} // namespace paddle |
| 130 | + |
| 131 | +REGISTER_TRT_OP_CONVERTER(batch_norm, BatchNormOpConverter); |
0 commit comments