Skip to content

Commit 8252769

Browse files
committed
1. we delelte mul op, 2.modify fc and action op 3. modify the test inferface
1 parent 2372daf commit 8252769

File tree

7 files changed

+21
-125
lines changed

7 files changed

+21
-125
lines changed

paddle/fluid/inference/tensorrt/convert/CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
# Add TRT tests
22
nv_library(tensorrt_converter
3-
SRCS mul_op.cc conv2d_op.cc fc_op.cc
3+
SRCS conv2d_op.cc fc_op.cc
44
DEPS tensorrt_engine mul_op)
55

66
nv_test(test_op_converter SRCS test_op_converter.cc DEPS
77
${FLUID_CORE_MODULES} tensorrt_engine tensorrt_converter)
88

99
nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
10-
nv_test(test_trt_mul_op SRCS test_mul_op.cc mul_op.cc
11-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL)
1210
nv_test(test_trt_fc_op SRCS test_fc_op.cc fc_op.cc
1311
DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL)
1412
nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc

paddle/fluid/inference/tensorrt/convert/fc_op.cc

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,13 @@ void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides,
3232
for (int h = 0; h < shape.h(); ++h) {
3333
for (int w = 0; w < shape.w(); ++w) {
3434
odata[h * ostrides.h() + w * ostrides.w()] =
35-
idata[h * ostrides.h() + w * ostrides.w()];
35+
idata[h * istrides.h() + w * istrides.w()];
3636
}
3737
}
3838
}
39-
39+
// indata c * k
4040
// Reorder the data layout from CK to KC.
41-
void ReorderCKtoKC(TensorRTEngine::Weight& iweights,
41+
void ReorderCKtoKC(const TensorRTEngine::Weight& iweights,
4242
TensorRTEngine::Weight* oweights) {
4343
int c = iweights.dims[0];
4444
int k = iweights.dims[1];
@@ -79,9 +79,8 @@ class FcOpConverter : public OpConverter {
7979

8080
framework::LoDTensor tmp;
8181
tmp.Resize(Y_t->dims());
82-
memcpy(tmp.mutable_data<float>(platform::CPUPlace()), Y_t->data<float>(),
83-
Y_t->dims()[0] * Y_t->dims()[1]);
84-
82+
memcpy(tmp.mutable_data<float>(platform::CPUPlace()), weight_data,
83+
Y_t->dims()[0] * Y_t->dims()[1] * sizeof(float));
8584
TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
8685
static_cast<void*>(weight_data),
8786
Y_t->memory_size() / sizeof(float)};
@@ -93,7 +92,7 @@ class FcOpConverter : public OpConverter {
9392

9493
// The data layout of TRT FC layer's weight is different from fluid's FC,
9594
// need to reorder the elements.
96-
ReorderCKtoKC(tmp_weight, &weight);
95+
ReorderCKtoKC(weight, &tmp_weight);
9796

9897
// Currently, the framework can only handle one fluid op -> one TRT layer,
9998
// but fc fuses `mul` and `bias` (2 fluid ops), so here is a trick, just
@@ -103,7 +102,7 @@ class FcOpConverter : public OpConverter {
103102

104103
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected,
105104
*const_cast<nvinfer1::ITensor*>(X),
106-
n_output, weight.get(), bias.get());
105+
n_output, tmp_weight.get(), bias.get());
107106

108107
auto output_name = op_desc.Output("Out").front();
109108
engine_->SetITensor(output_name, layer->getOutput(0));
@@ -117,5 +116,5 @@ class FcOpConverter : public OpConverter {
117116
} // namespace inference
118117
} // namespace paddle
119118

120-
REGISTER_TRT_OP_CONVERTER(fc, FcOpConverter);
119+
REGISTER_TRT_OP_CONVERTER(mul, FcOpConverter);
121120
USE_OP(mul);

paddle/fluid/inference/tensorrt/convert/mul_op.cc

Lines changed: 0 additions & 53 deletions
This file was deleted.

paddle/fluid/inference/tensorrt/convert/test_activation_op.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ namespace tensorrt {
2323
TEST(ReluOpConverter, main) {
2424
framework::Scope scope;
2525
std::unordered_set<std::string> parameters;
26-
TRTConvertValidation validator(10, parameters, scope, 1000);
26+
TRTConvertValidation validator(1, parameters, scope, 1000);
2727
validator.DeclInputVar("relu-X", nvinfer1::Dims2(10, 6));
2828
validator.DeclOutputVar("relu-Out", nvinfer1::Dims2(10, 6));
2929

@@ -37,7 +37,7 @@ TEST(ReluOpConverter, main) {
3737
validator.SetOp(*desc.Proto());
3838
LOG(INFO) << "execute";
3939

40-
validator.Execute(10);
40+
validator.Execute(1);
4141
}
4242

4343
} // namespace tensorrt

paddle/fluid/inference/tensorrt/convert/test_fc_op.cc

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,12 @@ namespace tensorrt {
2323
TEST(fc_op, test) {
2424
std::unordered_set<std::string> parameters({"mul-Y"});
2525
framework::Scope scope;
26-
TRTConvertValidation validator(20, parameters, scope, 1000);
26+
TRTConvertValidation validator(1, parameters, scope, 1000);
2727

28-
validator.DeclInputVar("mul-X", nvinfer1::Dims4(8, 3, 1, 1));
29-
validator.DeclParamVar("mul-Y", nvinfer1::Dims2(3, 2));
30-
validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(8, 2));
28+
validator.DeclInputVar("mul-X", nvinfer1::Dims4(1, 10, 1, 1));
29+
validator.DeclParamVar("mul-Y", nvinfer1::Dims2(10, 2));
30+
// validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2));
31+
validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(1, 2));
3132

3233
// Prepare Op description
3334
framework::OpDesc desc;
@@ -38,7 +39,7 @@ TEST(fc_op, test) {
3839

3940
validator.SetOp(*desc.Proto());
4041

41-
validator.Execute(10);
42+
validator.Execute(1);
4243
}
4344

4445
} // namespace tensorrt

paddle/fluid/inference/tensorrt/convert/test_mul_op.cc

Lines changed: 0 additions & 49 deletions
This file was deleted.

paddle/fluid/inference/tensorrt/convert/ut_helper.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ namespace tensorrt {
3939
float random(float low, float high) {
4040
static std::random_device rd;
4141
static std::mt19937 mt(rd());
42-
std::uniform_real_distribution<double> dist(1.0, 10.0);
42+
std::uniform_real_distribution<double> dist(low, high);
4343
return dist(mt);
4444
}
4545

@@ -49,6 +49,7 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
4949
size_t num_elements = analysis::AccuDims(dims, dims.size());
5050
PADDLE_ENFORCE_GT(num_elements, 0);
5151
auto* data = tensor->mutable_data<float>(place);
52+
5253
for (size_t i = 0; i < num_elements; i++) {
5354
*(data + i) = random(0., 1.);
5455
}
@@ -68,7 +69,7 @@ class TRTConvertValidation {
6869
int workspace_size = 1 << 10)
6970
: parameters_(parameters), scope_(scope) {
7071
// create engine.
71-
engine_.reset(new TensorRTEngine(10, 1 << 10, &stream_));
72+
engine_.reset(new TensorRTEngine(batch_size, workspace_size, &stream_));
7273
engine_->InitNetwork();
7374

7475
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
@@ -142,8 +143,7 @@ class TRTConvertValidation {
142143
for (const auto& output : op_desc_->OutputArgumentNames()) {
143144
std::vector<float> fluid_out;
144145
std::vector<float> trt_out(output_space_size);
145-
engine_->GetOutputInCPU(output, &trt_out[0],
146-
output_space_size * sizeof(float));
146+
engine_->GetOutputInCPU(output, &trt_out[0]);
147147
cudaStreamSynchronize(*engine_->stream());
148148

149149
auto* var = scope_.FindVar(output);

0 commit comments

Comments
 (0)