Skip to content

Commit beb1245

Browse files
committed
add relu converter and unit-test
1 parent 9945265 commit beb1245

File tree

9 files changed

+197
-26
lines changed

9 files changed

+197
-26
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader)
22
nv_test(test_tensorrt_engine SRCS test_engine.cc engine.cc DEPS dynload_cuda)
3+
set(ENGINE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/engine.cc)
34
add_subdirectory(convert)
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1-
file(GLOB TENSORRT_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc")
2-
nv_test(test_tensorrt_op_converter SRCS test_op_converter.cc ${TENSORRT_OPS} DEPS ${FLUID_CORE_MODULES})
1+
nv_test(test_tensorrt_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES})
2+
nv_test(test_tensorrt_activation_op SRCS test_activation_op.cc ${ENGINE_FILE} activation_op.cc
3+
DEPS ${FLUID_CORE_MODULES} activation_op)
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
16+
17+
namespace paddle {
18+
namespace inference {
19+
namespace tensorrt {
20+
21+
class ReluOpConverter : public OpConverter {
22+
public:
23+
ReluOpConverter() {}
24+
void operator()(const framework::OpDesc& op) override {
25+
LOG(INFO) << "convert a fluid relu op to tensorrt activation layer whose "
26+
"type is Relu";
27+
const nvinfer1::ITensor* input_tensor =
28+
engine_->GetITensor(op.Input("X")[0]);
29+
nvinfer1::IActivationLayer* layer = TRT_ENGINE_ADD_LAYER(
30+
engine_, Activation, *const_cast<nvinfer1::ITensor*>(input_tensor),
31+
nvinfer1::ActivationType::kRELU);
32+
engine_->SetITensor(op.Output("Out")[0], layer->getOutput(0));
33+
}
34+
};
35+
36+
REGISTER_TRT_OP_CONVERTER(relu, ReluOpConverter);
37+
38+
} // namespace tensorrt
39+
} // namespace inference
40+
} // namespace paddle

paddle/fluid/inference/tensorrt/convert/op_converter.h

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,14 @@ namespace tensorrt {
3030
class OpConverter {
3131
public:
3232
OpConverter() {}
33-
3433
virtual void operator()(const framework::OpDesc& op) {}
35-
void Execute(const framework::OpDesc& op) {
34+
35+
void Execute(const framework::OpDesc& op, TensorRTEngine* engine) {
3636
std::string type = op.Type();
3737
auto it = converters_.find(type);
3838
PADDLE_ENFORCE(it != converters_.end(), "no OpConverter for optype [%s]",
3939
type);
40+
it->second->SetEngine(engine);
4041
(*it->second)(op);
4142
}
4243

@@ -50,18 +51,31 @@ class OpConverter {
5051
converters_[key] = new T;
5152
}
5253

54+
// convert fluid op to tensorrt layer
55+
void ConvertOp(const framework::OpDesc& op, TensorRTEngine* engine) {
56+
OpConverter::Global().Execute(op, engine);
57+
}
58+
59+
// convert fluid block to tensorrt network
60+
void ConvertBlock(const framework::BlockDesc& block, TensorRTEngine* engine) {
61+
for (auto op : block.AllOps()) {
62+
OpConverter::Global().Execute(*op, engine);
63+
}
64+
}
65+
66+
void SetEngine(TensorRTEngine* engine) { engine_ = engine; }
67+
5368
virtual ~OpConverter() {}
5469

70+
// TensorRT engine
71+
TensorRTEngine* engine_{nullptr};
72+
5573
private:
5674
// registered op converter map, whose key is the fluid op type, and value is
5775
// the pointer position of corresponding OpConverter class.
5876
std::unordered_map<std::string, OpConverter*> converters_;
59-
6077
// fluid inference scope
61-
framework::Scope* scope_;
62-
// tensorrt input/output tensor map, whose key is the fluid variable name,
63-
// and value is the pointer position of tensorrt tensor
64-
std::unordered_map<std::string, nvinfer1::ITensor*> tr_tensors_;
78+
framework::Scope* scope_{nullptr};
6579
};
6680

6781
#define REGISTER_TRT_OP_CONVERTER(op_type__, Converter__) \
@@ -72,18 +86,6 @@ class OpConverter {
7286
}; \
7387
trt_##op_type__##_converter trt_##op_type__##_converter__;
7488

75-
class BlockConverter {
76-
public:
77-
BlockConverter() {}
78-
79-
// convert fluid block to tensorrt network
80-
void ConvertBlock(const framework::BlockDesc& block) {
81-
for (auto op : block.AllOps()) {
82-
OpConverter::Global().Execute(*op);
83-
}
84-
}
85-
};
86-
8789
} // namespace tensorrt
8890
} // namespace inference
8991
} // namespace paddle
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include <gtest/gtest.h>
16+
#include "paddle/fluid/framework/lod_tensor.h"
17+
#include "paddle/fluid/framework/op_registry.h"
18+
#include "paddle/fluid/framework/program_desc.h"
19+
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
20+
#include "paddle/fluid/platform/device_context.h"
21+
#include "paddle/fluid/platform/place.h"
22+
23+
USE_OP(relu);
24+
25+
namespace paddle {
26+
namespace inference {
27+
namespace tensorrt {
28+
29+
void compare(float input, float expect) {
30+
framework::Scope scope;
31+
platform::CUDAPlace place;
32+
platform::CUDADeviceContext ctx(place);
33+
34+
// init fluid op and variable
35+
auto x_var = scope.Var("X");
36+
auto x_tensor = x_var->GetMutable<framework::LoDTensor>();
37+
x_tensor->Resize({1, 1});
38+
std::vector<float> init;
39+
init.push_back(input);
40+
framework::TensorFromVector(init, ctx, x_tensor);
41+
42+
auto out_var = scope.Var("Out");
43+
auto out_tensor = out_var->GetMutable<framework::LoDTensor>();
44+
out_tensor->Resize({1, 1});
45+
out_tensor->mutable_data<float>(place);
46+
47+
framework::OpDesc op_desc;
48+
op_desc.SetType("relu");
49+
op_desc.SetInput("X", {"X"});
50+
op_desc.SetOutput("Out", {"Out"});
51+
52+
auto relu_op = framework::OpRegistry::CreateOp(op_desc);
53+
54+
// run fluid op
55+
relu_op->Run(scope, place);
56+
std::vector<float> out1;
57+
framework::TensorToVector(*out_tensor, ctx, &out1);
58+
59+
// init tensorrt op
60+
cudaStream_t stream;
61+
ASSERT_EQ(0, cudaStreamCreate(&stream));
62+
TensorRTEngine* engine = new TensorRTEngine(1, 1 << 10, &stream);
63+
engine->InitNetwork();
64+
engine->DeclareInput("X", nvinfer1::DataType::kFLOAT,
65+
nvinfer1::DimsCHW{1, 1, 1});
66+
67+
OpConverter op_converter;
68+
op_converter.ConvertOp(op_desc, engine);
69+
70+
engine->DeclareOutput("Out");
71+
engine->FreezeNetwork();
72+
engine->SetInputFromCPU("X", &input, 1 * sizeof(float));
73+
74+
// run tensorrt op
75+
engine->Execute(1);
76+
77+
float out2;
78+
engine->GetOutputInCPU("Out", &out2, 1 * sizeof(float));
79+
80+
ASSERT_EQ(out1[0], out2);
81+
ASSERT_EQ(out1[0], expect);
82+
83+
delete engine;
84+
cudaStreamDestroy(stream);
85+
}
86+
87+
TEST(OpConverter, ConvertRelu) {
88+
compare(1, 1); // relu(1) = 1
89+
compare(-5, 0); // relu(-5) = 0
90+
}
91+
92+
} // namespace tensorrt
93+
} // namespace inference
94+
} // namespace paddle

paddle/fluid/inference/tensorrt/convert/test_op_converter.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ TEST(BlockConverter, ConvertBlock) {
2828
auto* conv2d_op = block->AppendOp();
2929
conv2d_op->SetType("conv2d");
3030

31-
BlockConverter converter;
32-
converter.ConvertBlock(*block);
31+
OpConverter converter;
32+
converter.ConvertBlock(*block, nullptr /*TensorRTEngine*/);
3333
}
3434

3535
} // namespace tensorrt

paddle/fluid/inference/tensorrt/engine.cc

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@ nvinfer1::ITensor* TensorRTEngine::DeclareInput(const std::string& name,
8080
PADDLE_ENFORCE(infer_network_ != nullptr, "should initnetwork first");
8181
auto* input = infer_network_->addInput(name.c_str(), dtype, dim);
8282
PADDLE_ENFORCE(input, "infer network add input %s failed", name);
83-
8483
buffer_sizes_[name] = kDataTypeSize[static_cast<int>(dtype)] * AccumDims(dim);
84+
TensorRTEngine::SetITensor(name, input);
8585
return input;
8686
}
8787

@@ -99,6 +99,19 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer* layer, int offset,
9999
buffer_sizes_[name] = 0;
100100
}
101101

102+
void TensorRTEngine::DeclareOutput(const std::string& name) {
103+
PADDLE_ENFORCE_EQ(0, buffer_sizes_.count(name), "duplicate output name %s",
104+
name);
105+
106+
auto* output = TensorRTEngine::GetITensor(name);
107+
PADDLE_ENFORCE(output != nullptr);
108+
output->setName(name.c_str());
109+
infer_network_->markOutput(*output);
110+
// output buffers' size can only be decided latter, set zero here to mark this
111+
// and will reset latter.
112+
buffer_sizes_[name] = 0;
113+
}
114+
102115
void* TensorRTEngine::GetOutputInGPU(const std::string& name) {
103116
return buffer(name);
104117
}
@@ -110,7 +123,6 @@ void TensorRTEngine::GetOutputInCPU(const std::string& name, void* dst,
110123
PADDLE_ENFORCE(it != buffer_sizes_.end());
111124
PADDLE_ENFORCE_GT(it->second, 0);
112125
PADDLE_ENFORCE_GE(max_size, it->second);
113-
114126
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(dst, buffer(name), it->second,
115127
cudaMemcpyDeviceToHost, *stream_));
116128
}
@@ -126,10 +138,24 @@ void*& TensorRTEngine::buffer(const std::string& name) {
126138
void TensorRTEngine::SetInputFromCPU(const std::string& name, void* data,
127139
size_t size) {
128140
void* buf = buffer(name);
141+
cudaMemcpyAsync(buf, data, size, cudaMemcpyHostToDevice, *stream_);
129142
PADDLE_ENFORCE_EQ(
130143
0, cudaMemcpyAsync(buf, data, size, cudaMemcpyHostToDevice, *stream_));
131144
}
132145

146+
void TensorRTEngine::SetITensor(const std::string& name,
147+
nvinfer1::ITensor* tensor) {
148+
PADDLE_ENFORCE(tensor != nullptr);
149+
PADDLE_ENFORCE_EQ(0, itensor_map_.count(name), "duplicate itensor name %s",
150+
name);
151+
itensor_map_[name] = tensor;
152+
}
153+
154+
nvinfer1::ITensor* TensorRTEngine::GetITensor(const std::string& name) {
155+
PADDLE_ENFORCE(itensor_map_.count(name), "no itensor %s", name);
156+
return itensor_map_[name];
157+
}
158+
133159
} // namespace tensorrt
134160
} // namespace inference
135161
} // namespace paddle

paddle/fluid/inference/tensorrt/engine.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ class TensorRTEngine : public EngineBase {
8080
// name.
8181
void DeclareOutput(const nvinfer1::ILayer* layer, int offset,
8282
const std::string& name);
83+
// Set the itensor_map_[name] as the network's output, and set its name.
84+
void DeclareOutput(const std::string& name);
8385

8486
// GPU memory address for an ITensor with specific name. One can operate on
8587
// these memory directly for acceleration, for example, output the converted
@@ -98,6 +100,10 @@ class TensorRTEngine : public EngineBase {
98100
// LOW EFFICENCY! Get output to CPU, this will trigger a memory copy from GPU
99101
// to CPU.
100102
void GetOutputInCPU(const std::string& name, void* dst, size_t max_size);
103+
// Fill an ITensor into map itensor_map_.
104+
void SetITensor(const std::string& name, nvinfer1::ITensor* tensor);
105+
// Get an ITensor called name.
106+
nvinfer1::ITensor* GetITensor(const std::string& name);
101107

102108
nvinfer1::ICudaEngine* engine() { return infer_engine_.get(); }
103109
nvinfer1::INetworkDefinition* network() { return infer_network_.get(); }
@@ -113,6 +119,8 @@ class TensorRTEngine : public EngineBase {
113119
std::vector<void*> buffers_;
114120
// max data size for the buffers.
115121
std::unordered_map<std::string /*name*/, size_t /*max size*/> buffer_sizes_;
122+
std::unordered_map<std::string /*name*/, nvinfer1::ITensor* /*ITensor*/>
123+
itensor_map_;
116124

117125
// TensorRT related internal members
118126
template <typename T>

paddle/fluid/inference/tensorrt/test_engine.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ TEST_F(TensorRTEngineTest, add_layer) {
7070
engine_->Execute(1);
7171

7272
LOG(INFO) << "to get output";
73-
// void* y_v =
7473
float y_cpu;
7574
engine_->GetOutputInCPU("y", &y_cpu, sizeof(float));
7675

0 commit comments

Comments
 (0)