Skip to content

Commit 6d80dd5

Browse files
authored
Merge pull request #11222 from luotao1/trt
rewrite unittest of trt_activation_op
2 parents 50104f1 + f6fb51a commit 6d80dd5

File tree

4 files changed

+41
-94
lines changed

4 files changed

+41
-94
lines changed

cmake/configure.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ if(WITH_GPU)
9292
if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
9393
message(FATAL_ERROR "TensorRT needs CUDNN >= 7.0 to compile")
9494
endif()
95+
if(${TENSORRT_MAJOR_VERSION} VERSION_LESS 4)
96+
message(FATAL_ERROR "Paddle needs TensorRT >= 4.0 to compile")
97+
endif()
9598
include_directories(${TENSORRT_INCLUDE_DIR})
9699
endif()
97100
elseif(WITH_AMD_GPU)
Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,4 @@
11
# Add TRT tests
2-
# This test is not stable
3-
# See https://paddleci.ngrok.io/viewLog.html?tab=buildLog&buildTypeId=Paddle_PrCi2&buildId=36834&_focus=8828
4-
#nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc
5-
# DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine
6-
# SERIAL)
72
nv_library(tensorrt_converter
83
SRCS mul_op.cc conv2d_op.cc fc_op.cc
94
DEPS tensorrt_engine mul_op)
@@ -16,3 +11,5 @@ nv_test(test_trt_mul_op SRCS test_mul_op.cc mul_op.cc
1611
DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL)
1712
nv_test(test_trt_fc_op SRCS test_fc_op.cc fc_op.cc
1813
DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL)
14+
nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
15+
DEPS ${FLUID_CORE_MODULES} tensorrt_engine activation_op SERIAL)

paddle/fluid/inference/tensorrt/convert/activation_op.cc

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ namespace tensorrt {
2222
class ReluOpConverter : public OpConverter {
2323
public:
2424
ReluOpConverter() {}
25-
void operator()(const framework::proto::OpDesc& op) override {
25+
void operator()(const framework::proto::OpDesc& op,
26+
const framework::Scope& scope, bool test_mode) override {
2627
// Here the two nullptr looks strange, that's because the
2728
// framework::OpDesc's constructor is strange.
2829
framework::OpDesc op_desc(op, nullptr);
@@ -33,7 +34,12 @@ class ReluOpConverter : public OpConverter {
3334
nvinfer1::IActivationLayer* layer = TRT_ENGINE_ADD_LAYER(
3435
engine_, Activation, *const_cast<nvinfer1::ITensor*>(input_tensor),
3536
nvinfer1::ActivationType::kRELU);
36-
engine_->SetITensor(op_desc.Output("Out")[0], layer->getOutput(0));
37+
auto output_name = op_desc.Output("Out")[0];
38+
engine_->SetITensor(output_name, layer->getOutput(0));
39+
if (test_mode) { // the test framework can not determine which is the
40+
// output, so place the declaration inside.
41+
engine_->DeclareOutput(output_name);
42+
}
3743
}
3844
};
3945

Lines changed: 28 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,106 +1,47 @@
11
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
22
3-
Licensed under the Apache License, Version 2.0 (the "License");
4-
you may not use this file except in compliance with the License.
5-
You may obtain a copy of the License at
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
66
7-
http://www.apache.org/licenses/LICENSE-2.0
7+
http://www.apache.org/licenses/LICENSE-2.0
88
9-
Unless required by applicable law or agreed to in writing, software
10-
distributed under the License is distributed on an "AS IS" BASIS,
11-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12-
See the License for the specific language governing permissions and
13-
limitations under the License. */
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
1414

1515
#include <gtest/gtest.h>
16-
#include "paddle/fluid/framework/lod_tensor.h"
1716
#include "paddle/fluid/framework/op_registry.h"
18-
#include "paddle/fluid/framework/program_desc.h"
19-
#include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
20-
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
21-
#include "paddle/fluid/platform/device_context.h"
22-
#include "paddle/fluid/platform/place.h"
23-
24-
USE_OP(relu);
17+
#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
2518

2619
namespace paddle {
2720
namespace inference {
2821
namespace tensorrt {
2922

30-
void Compare(const std::string op_type, float input, float expect) {
23+
TEST(ReluOpConverter, main) {
3124
framework::Scope scope;
32-
platform::CUDAPlace place;
33-
platform::CUDADeviceContext ctx(place);
34-
35-
// init fluid op and variable
36-
auto x_var = scope.Var("X");
37-
auto x_tensor = x_var->GetMutable<framework::LoDTensor>();
38-
x_tensor->Resize({1, 1});
39-
x_tensor->mutable_data<float>(place);
40-
std::vector<float> init;
41-
init.push_back(input);
42-
framework::TensorFromVector(init, ctx, x_tensor);
43-
44-
auto out_var = scope.Var("Out");
45-
auto out_tensor = out_var->GetMutable<framework::LoDTensor>();
46-
out_tensor->Resize({1, 1});
47-
out_tensor->mutable_data<float>(place);
48-
49-
framework::OpDesc op_desc;
50-
op_desc.SetType(op_type);
51-
op_desc.SetInput("X", {"X"});
52-
op_desc.SetOutput("Out", {"Out"});
53-
54-
auto op = framework::OpRegistry::CreateOp(*op_desc.Proto());
55-
56-
// run fluid op
57-
op->Run(scope, place);
58-
// get fluid output
59-
std::vector<float> out1;
60-
framework::TensorToVector(*out_tensor, ctx, &out1);
61-
62-
// init tensorrt op
63-
cudaStream_t stream;
64-
ASSERT_EQ(0, cudaStreamCreate(&stream));
65-
TensorRTEngine* engine = new TensorRTEngine(1, 1 << 10, &stream);
66-
engine->InitNetwork();
67-
engine->DeclareInput("X", nvinfer1::DataType::kFLOAT,
68-
nvinfer1::DimsCHW{1, 1, 1});
69-
// convert op
70-
OpConverter op_converter;
71-
op_converter.ConvertOp(*op_desc.Proto(), engine);
72-
73-
engine->DeclareOutput("Out");
74-
engine->FreezeNetwork();
75-
76-
// convert LoDTensor to ITensor
77-
size_t size = x_tensor->memory_size();
78-
EngineIOConverter::ConvertInput(op_type, *x_tensor,
79-
engine->buffer("X").buffer, size, &stream);
80-
// run tensorrt Outp
81-
engine->Execute(1);
82-
// convert ITensor to LoDTensor
83-
EngineIOConverter::ConvertOutput(op_type, engine->buffer("Out").buffer,
84-
out_tensor, size, &stream);
85-
// get tensorrt output
86-
std::vector<float> out2;
87-
framework::TensorToVector(*out_tensor, ctx, &out2);
88-
89-
// compare
90-
ASSERT_EQ(out1[0], out2[0]);
91-
ASSERT_EQ(out1[0], expect);
92-
93-
delete engine;
94-
cudaStreamDestroy(stream);
95-
}
96-
97-
TEST(OpConverter, ConvertRelu) {
98-
Compare("relu", 1, 1); // relu(1) = 1
99-
Compare("relu", -5, 0); // relu(-5) = 0
25+
std::unordered_set<std::string> parameters;
26+
TRTConvertValidation validator(10, parameters, scope, 1000);
27+
validator.DeclInputVar("relu-X", nvinfer1::Dims2(10, 6));
28+
validator.DeclOutputVar("relu-Out", nvinfer1::Dims2(10, 6));
29+
30+
// Prepare Op description
31+
framework::OpDesc desc;
32+
desc.SetType("relu");
33+
desc.SetInput("X", {"relu-X"});
34+
desc.SetOutput("Out", {"relu-Out"});
35+
36+
LOG(INFO) << "set OP";
37+
validator.SetOp(*desc.Proto());
38+
LOG(INFO) << "execute";
39+
40+
validator.Execute(10);
10041
}
10142

10243
} // namespace tensorrt
10344
} // namespace inference
10445
} // namespace paddle
10546

106-
USE_OP(activation);
47+
USE_OP(relu);

0 commit comments

Comments
 (0)