Skip to content

Commit bcd67bd

Browse files
committed
add assert for GetOutput
1 parent 7382f98 commit bcd67bd

File tree

5 files changed

+20
-13
lines changed

5 files changed

+20
-13
lines changed

paddle/fluid/inference/tensorrt/convert/ut_helper.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,11 @@ class TRTConvertValidation {
139139
cudaStreamSynchronize(*engine_->stream());
140140

141141
ASSERT_FALSE(op_desc_->OutputArgumentNames().empty());
142-
const size_t output_space_size = 200;
142+
const size_t output_space_size = 2000;
143143
for (const auto& output : op_desc_->OutputArgumentNames()) {
144144
std::vector<float> fluid_out;
145145
std::vector<float> trt_out(output_space_size);
146-
engine_->GetOutputInCPU(output, &trt_out[0]);
146+
engine_->GetOutputInCPU(output, &trt_out[0], output_space_size);
147147
cudaStreamSynchronize(*engine_->stream());
148148

149149
auto* var = scope_.FindVar(output);

paddle/fluid/inference/tensorrt/engine.cc

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
22
3-
Licensed under the Apache License, Version 2.0 (the "License");
4-
you may not use this file except in compliance with the License.
3+
Licensed under the Apache License, Version 2.0 (the "License"); you may not use
4+
this file except in compliance with the License.
55
You may obtain a copy of the License at
66
77
http://www.apache.org/licenses/LICENSE-2.0
@@ -149,7 +149,8 @@ void *TensorRTEngine::GetOutputInGPU(const std::string &name) {
149149
return buffer(name).buffer;
150150
}
151151

152-
void TensorRTEngine::GetOutputInGPU(const std::string &name, void *dst) {
152+
void TensorRTEngine::GetOutputInGPU(const std::string &name, void *dst,
153+
size_t max_size) {
153154
// determine data size
154155
auto *output = TensorRTEngine::GetITensor(name);
155156
nvinfer1::Dims dims = output->getDimensions();
@@ -161,14 +162,16 @@ void TensorRTEngine::GetOutputInGPU(const std::string &name, void *dst) {
161162
PADDLE_ENFORCE(it != buffer_sizes_.end());
162163
PADDLE_ENFORCE_GT(it->second, 0);
163164
PADDLE_ENFORCE_LE(dst_size, it->second);
165+
PADDLE_ENFORCE_GE(max_size, dst_size);
164166
auto &buf = buffer(name);
165167
PADDLE_ENFORCE_NOT_NULL(buf.buffer, "buffer should be allocated before");
166168
PADDLE_ENFORCE_EQ(cudaMemcpyAsync(dst, buf.buffer, dst_size,
167169
cudaMemcpyDeviceToDevice, *stream_),
168170
0);
169171
}
170172

171-
void TensorRTEngine::GetOutputInCPU(const std::string &name, void *dst) {
173+
void TensorRTEngine::GetOutputInCPU(const std::string &name, void *dst,
174+
size_t max_size) {
172175
// determine data size
173176

174177
auto *output = TensorRTEngine::GetITensor(name);
@@ -180,6 +183,7 @@ void TensorRTEngine::GetOutputInCPU(const std::string &name, void *dst) {
180183
PADDLE_ENFORCE(it != buffer_sizes_.end());
181184
PADDLE_ENFORCE_GT(it->second, 0);
182185
PADDLE_ENFORCE_LE(dst_size, it->second);
186+
PADDLE_ENFORCE_GE(max_size, dst_size);
183187
auto &buf = buffer(name);
184188
PADDLE_ENFORCE_NOT_NULL(buf.buffer, "buffer should be allocated before");
185189
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(dst, buf.buffer, dst_size,

paddle/fluid/inference/tensorrt/engine.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,10 @@ class TensorRTEngine : public EngineBase {
106106
// Return the output's GPU memory address without copy.
107107
void* GetOutputInGPU(const std::string& name);
108108
// Copy data into dst inside the GPU device.
109-
void GetOutputInGPU(const std::string& name, void* dst);
109+
void GetOutputInGPU(const std::string& name, void* dst, size_t max_size);
110110
// LOW EFFICENCY! Get output to CPU, this will trigger a memory copy from GPU
111111
// to CPU.
112-
void GetOutputInCPU(const std::string& name, void* dst);
112+
void GetOutputInCPU(const std::string& name, void* dst, size_t max_size);
113113
// Fill an ITensor into map itensor_map_.
114114
void SetITensor(const std::string& name, nvinfer1::ITensor* tensor);
115115
// Get an ITensor called name.

paddle/fluid/inference/tensorrt/test_engine.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ TEST_F(TensorRTEngineTest, add_layer) {
7171

7272
LOG(INFO) << "to get output";
7373
float y_cpu;
74-
engine_->GetOutputInCPU("y", &y_cpu);
74+
engine_->GetOutputInCPU("y", &y_cpu, 1 * sizeof(float));
7575

7676
LOG(INFO) << "to checkout output";
7777
ASSERT_EQ(y_cpu, x_v * 2 + 3);
@@ -108,7 +108,7 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
108108
ASSERT_EQ(dims.nbDims, 3);
109109
ASSERT_EQ(dims.d[0], 2);
110110
ASSERT_EQ(dims.d[1], 1);
111-
engine_->GetOutputInCPU("y", &y_cpu[0]);
111+
engine_->GetOutputInCPU("y", &y_cpu[0], 2 * sizeof(float));
112112
ASSERT_EQ(y_cpu[0], 4.5);
113113
ASSERT_EQ(y_cpu[1], 14.5);
114114
}
@@ -141,7 +141,7 @@ TEST_F(TensorRTEngineTest, test_conv2d_temp) {
141141

142142
LOG(INFO) << "to get output";
143143
float* y_cpu = new float[18];
144-
engine_->GetOutputInCPU("y", &y_cpu[0]);
144+
engine_->GetOutputInCPU("y", &y_cpu[0], 18 * sizeof(float));
145145
ASSERT_EQ(y_cpu[0], 4.0);
146146
ASSERT_EQ(y_cpu[1], 6.0);
147147
}

paddle/fluid/operators/tensorrt_engine_op.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,11 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
100100
// tensor.
101101
// if (platform::is_cpu_place(fluid_t->place())) {
102102
// TODO(Superjomn) change this float to dtype size.
103-
engine->GetOutputInCPU(
104-
y, fluid_t->mutable_data<float>(platform::CPUPlace()));
103+
auto size = inference::analysis::AccuDims(dims.d, dims.nbDims) *
104+
FLAGS_tensorrt_engine_batch_size;
105+
engine->GetOutputInCPU(y,
106+
fluid_t->mutable_data<float>(platform::CPUPlace()),
107+
size * sizeof(float));
105108
//} else {
106109
// engine->GetOutputInGPU(
107110
// y, fluid_t->mutable_data<float>(platform::CUDAPlace()),

0 commit comments

Comments
 (0)