Skip to content

Commit 8e163f9

Browse files
authored
[Inference Tensorrt] Add attr for trt engine and handle the input seq problem for ernie var len. (#33575) (#33622)
1 parent c3807f9 commit 8e163f9

File tree

7 files changed

+253
-7
lines changed

7 files changed

+253
-7
lines changed

paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
3636
framework::OpDesc op_desc(op, nullptr);
3737
auto word_id_name = op_desc.Input("WordId").front();
3838
auto pos_id_name = op_desc.Input("PosId").front();
39+
engine_->Set("ernie_pos_name", new std::string(pos_id_name));
40+
3941
auto sent_id_name = op_desc.Input("SentId").front();
4042
auto word_emb_name = op_desc.Input("WordEmbedding").front();
4143
auto pos_emb_name = op_desc.Input("PosEmbedding").front();

paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,9 +191,15 @@ class MultiheadMatMulOpConverter : public OpConverter {
191191
std::vector<nvinfer1::ITensor*> plugin_inputs;
192192
plugin_inputs.emplace_back(fc_layer->getOutput(0));
193193
plugin_inputs.emplace_back(mask_tensor);
194-
plugin_inputs.emplace_back(engine_->GetITensor(
195-
engine_->network()->getInput(2)->getName())); // cu_seqlens,
196-
// eval_placeholder_2
194+
if (engine_->Has("ernie_pos_name")) {
195+
plugin_inputs.emplace_back(
196+
engine_->GetITensor(engine_->Get<std::string>("ernie_pos_name")));
197+
} else {
198+
plugin_inputs.emplace_back(engine_->GetITensor(
199+
engine_->network()
200+
->getInput(2)
201+
->getName())); // cu_seqlens, eval_placeholder_2
202+
}
197203
auto max_seqlen_tensor =
198204
engine_->GetITensor(engine_->network()->getInput(3)->getName());
199205
auto* shuffle_layer = TRT_ENGINE_ADD_LAYER(

paddle/fluid/inference/tensorrt/convert/slice_op.cc

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,16 @@ class SliceOpConverter : public OpConverter {
7676
std::vector<nvinfer1::ITensor*> plugin_inputs;
7777
// plugin_inputs.emplace_back(trans_layer->getOutput(0));
7878
plugin_inputs.emplace_back(input);
79-
plugin_inputs.emplace_back(engine_->GetITensor(
80-
engine_->network()->getInput(2)->getName())); // cu_seqlens,
81-
// eval_placeholder_2
79+
80+
std::string pos_name;
81+
if (engine_->Has("ernie_pos_name")) {
82+
pos_name = engine_->Get<std::string>("ernie_pos_name");
83+
} else {
84+
// hard code for compatibility
85+
pos_name = engine_->network()->getInput(2)->getName();
86+
}
87+
plugin_inputs.emplace_back(
88+
engine_->GetITensor(pos_name)); // cu_seqlens, eval_placeholder_2
8289

8390
// bool ban_fp16 = engine_->disable_trt_plugin_fp16();
8491
plugin::SpecialSlicePluginDynamic* plugin =

paddle/fluid/inference/tensorrt/engine.h

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,15 @@ class TensorRTEngine {
202202
dy::initLibNvInferPlugins(&logger, "");
203203
}
204204

205-
~TensorRTEngine() {}
205+
~TensorRTEngine() {
206+
for (auto& attr : attrs_) {
207+
if (attr_dels_.find(attr.first) != attr_dels_.end()) {
208+
attr_dels_[attr.first]();
209+
}
210+
}
211+
attrs_.clear();
212+
attr_dels_.clear();
213+
}
206214

207215
// Add an input and set its name, data type and dimension.
208216
nvinfer1::ITensor* DeclareInput(const std::string& name,
@@ -386,6 +394,82 @@ class TensorRTEngine {
386394
}
387395
#endif
388396

397+
bool Has(const std::string& attr_name) const {
398+
return attrs_.count(attr_name) > 0;
399+
}
400+
401+
void Erase(const std::string& attr_name) {
402+
if (!Has(attr_name)) {
403+
return;
404+
}
405+
if (attr_dels_.find(attr_name) != attr_dels_.end()) {
406+
attr_dels_[attr_name]();
407+
attr_dels_.erase(attr_name);
408+
}
409+
attrs_.erase(attr_name);
410+
}
411+
412+
// Set a pointer to the attribute. Engine takes ownership of the attribute.
413+
template <typename AttrType>
414+
void Set(const std::string& attr_name, AttrType* attr) {
415+
if (attrs_.count(attr_name) == 0) {
416+
PADDLE_ENFORCE_EQ(
417+
attrs_.count(attr_name), 0,
418+
platform::errors::AlreadyExists(
419+
"Attribute %s already set in trt engine.", attr_name));
420+
} else {
421+
VLOG(3) << "Setting the attribute " << attr_name << " for trt engine "
422+
<< this;
423+
}
424+
attrs_[attr_name] = attr;
425+
attr_dels_[attr_name] = [attr, attr_name]() {
426+
VLOG(3) << "deleting " << attr_name;
427+
delete attr;
428+
};
429+
}
430+
431+
// Set a pointer to the attribute. Engine doesn't take ownership. Caller
432+
// should delete the attribute.
433+
template <typename AttrType>
434+
void SetNotOwned(const std::string& attr_name, AttrType* attr) {
435+
PADDLE_ENFORCE_EQ(
436+
attrs_.count(attr_name), 0,
437+
platform::errors::AlreadyExists(
438+
"Attribute %s already set in trt engine.", attr_name));
439+
attrs_[attr_name] = attr;
440+
}
441+
442+
// Get a reference to the attributed previously set.
443+
template <typename AttrType>
444+
AttrType& Get(const std::string& attr_name) const {
445+
PADDLE_ENFORCE_NE(attrs_.find(attr_name), attrs_.end(),
446+
platform::errors::InvalidArgument(
447+
"Attribute %s not found in trt engine.", attr_name));
448+
try {
449+
return *boost::any_cast<AttrType*>(attrs_.at(attr_name));
450+
} catch (boost::bad_any_cast&) {
451+
auto TypeToString = [](const std::type_info& info) -> std::string {
452+
if (std::type_index(info) == std::type_index(typeid(bool*))) {
453+
return "bool";
454+
} else if (std::type_index(info) == std::type_index(typeid(int*))) {
455+
return "int";
456+
} else if (std::type_index(info) ==
457+
std::type_index(typeid(const int*))) {
458+
return "const int";
459+
} else if (std::type_index(info) ==
460+
std::type_index(typeid(std::string*))) {
461+
return "std::string";
462+
}
463+
return info.name();
464+
};
465+
466+
PADDLE_THROW(platform::errors::InvalidArgument(
467+
"Invalid type for attritube %s, expected: %s, actual: %s.", attr_name,
468+
TypeToString(typeid(AttrType*)),
469+
TypeToString(attrs_.at(attr_name).type())));
470+
}
471+
}
472+
389473
private:
390474
// Each ICudaEngine object is bound to a specific GPU when it is instantiated,
391475
// ensure that the thread is associated with the correct device by calling
@@ -441,6 +525,9 @@ class TensorRTEngine {
441525
infer_ptr<nvinfer1::IHostMemory> ihost_memory_;
442526
std::unordered_map<nvinfer1::ITensor*, float> quant_dynamic_range_;
443527

528+
std::unordered_map<std::string, boost::any> attrs_;
529+
std::unordered_map<std::string, std::function<void(void)>> attr_dels_;
530+
444531
// For dynamic shape
445532
bool with_dynamic_shape_{false};
446533
infer_ptr<nvinfer1::INetworkDefinition> infer_networkv2_;

paddle/fluid/inference/tensorrt/test_engine.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,15 @@ TEST_F(TensorRTEngineTest, add_layer) {
9191
buffers[0] = reinterpret_cast<void *>(x_v_gpu_data);
9292
buffers[1] = reinterpret_cast<void *>(y_gpu_data);
9393

94+
LOG(INFO) << "Set attr";
95+
engine_->Set("test_attr", new std::string("test_attr"));
96+
if (engine_->Has("test_attr")) {
97+
auto attr_val = engine_->Get<std::string>("test_attr");
98+
engine_->Erase("test_attr");
99+
}
100+
std::string *attr_key = new std::string("attr_key");
101+
engine_->SetNotOwned("attr1", attr_key);
102+
94103
LOG(INFO) << "to execute";
95104
engine_->Execute(1, &buffers, ctx_->stream());
96105

@@ -99,6 +108,8 @@ TEST_F(TensorRTEngineTest, add_layer) {
99108

100109
LOG(INFO) << "to checkout output";
101110
ASSERT_EQ(y_cpu[0], x_v[0] * 2 + 3);
111+
112+
delete attr_key;
102113
}
103114

104115
TEST_F(TensorRTEngineTest, add_layer_multi_dim) {

paddle/fluid/inference/tests/api/tester_helper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "paddle/fluid/inference/analysis/ut_helper.h"
3434
#include "paddle/fluid/inference/api/analysis_predictor.h"
3535
#include "paddle/fluid/inference/api/helper.h"
36+
#include "paddle/fluid/inference/api/paddle_inference_api.h"
3637
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
3738
#include "paddle/fluid/inference/tests/api/config_printer.h"
3839
#include "paddle/fluid/inference/tests/test_helper.h"

paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ limitations under the License. */
1616
#include <gtest/gtest.h>
1717
#include "gflags/gflags.h"
1818

19+
#include "paddle/fluid/inference/tensorrt/helper.h"
1920
#include "paddle/fluid/inference/tests/api/trt_test_helper.h"
2021

2122
namespace paddle {
@@ -143,5 +144,136 @@ TEST(AnalysisPredictor, fp16) {
143144
#endif
144145
}
145146

147+
// ernie_varlen
148+
std::shared_ptr<paddle_infer::Predictor> InitPredictor() {
149+
paddle_infer::Config config;
150+
config.SetModel(FLAGS_infer_model);
151+
152+
config.EnableUseGpu(100, 0);
153+
154+
// Open the memory optim.
155+
config.EnableMemoryOptim();
156+
157+
int max_batch = 32;
158+
int max_single_seq_len = 128;
159+
int opt_single_seq_len = 64;
160+
int min_batch_seq_len = 1;
161+
int max_batch_seq_len = 512;
162+
int opt_batch_seq_len = 256;
163+
164+
std::string input_name0 = "read_file_0.tmp_0";
165+
std::string input_name1 = "read_file_0.tmp_1";
166+
std::string input_name2 = "read_file_0.tmp_2";
167+
std::string input_name3 = "read_file_0.tmp_4";
168+
169+
std::vector<int> min_shape = {min_batch_seq_len};
170+
std::vector<int> max_shape = {max_batch_seq_len};
171+
std::vector<int> opt_shape = {opt_batch_seq_len};
172+
// Set the input's min, max, opt shape
173+
std::map<std::string, std::vector<int>> min_input_shape = {
174+
{input_name0, min_shape},
175+
{input_name1, min_shape},
176+
{input_name2, {1}},
177+
{input_name3, {1, 1, 1}}};
178+
std::map<std::string, std::vector<int>> max_input_shape = {
179+
{input_name0, max_shape},
180+
{input_name1, max_shape},
181+
{input_name2, {max_batch + 1}},
182+
{input_name3, {1, max_single_seq_len, 1}}};
183+
std::map<std::string, std::vector<int>> opt_input_shape = {
184+
{input_name0, opt_shape},
185+
{input_name1, opt_shape},
186+
{input_name2, {max_batch + 1}},
187+
{input_name3, {1, opt_single_seq_len, 1}}};
188+
189+
// only kHalf supported
190+
config.EnableTensorRtEngine(
191+
1 << 30, 1, 5, paddle_infer::Config::Precision::kHalf, false, false);
192+
// erinie varlen must be used with dynamic shape
193+
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
194+
opt_input_shape);
195+
// erinie varlen must be used with oss
196+
config.EnableTensorRtOSS();
197+
198+
return paddle_infer::CreatePredictor(config);
199+
}
200+
201+
void run(paddle_infer::Predictor* predictor, std::vector<float>* out_data) {
202+
const int run_batch = 2;
203+
const int run_seq_len = 71;
204+
const int max_seq_len = 128;
205+
206+
int32_t i1[run_seq_len] = {
207+
// sentence 1
208+
1, 3558, 4, 75, 491, 89, 340, 313, 93, 4, 255, 10, 75, 321, 4095, 1902, 4,
209+
134, 49, 75, 311, 14, 44, 178, 543, 15, 12043, 2, 75, 201, 340, 9, 14, 44,
210+
486, 218, 1140, 279, 12043, 2,
211+
// sentence 2
212+
101, 2054, 2234, 2046, 2486, 2044, 1996, 2047, 4552, 2001, 9536, 1029,
213+
102, 2004, 1997, 2008, 2154, 1010, 1996, 2047, 4552, 9536, 2075, 1996,
214+
2117, 3072, 2234, 2046, 2486, 1012, 102,
215+
};
216+
int32_t i2[run_seq_len] = {
217+
// sentence 1
218+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219+
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
220+
// sentence 2
221+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
222+
1, 1, 1, 1, 1, 1};
223+
// shape info of this batch
224+
int32_t i3[3] = {0, 40, 71};
225+
// max_seq_len represents the max sentence length of all the sentences, only
226+
// length of
227+
// input i4 is useful, data means nothing.
228+
int32_t i4[max_seq_len] = {0};
229+
230+
auto input_names = predictor->GetInputNames();
231+
// first input
232+
auto input_t1 = predictor->GetInputHandle(input_names[0]);
233+
input_t1->Reshape({run_seq_len});
234+
input_t1->CopyFromCpu(i1);
235+
236+
// second input
237+
auto input_t2 = predictor->GetInputHandle(input_names[1]);
238+
input_t2->Reshape({run_seq_len});
239+
input_t2->CopyFromCpu(i2);
240+
241+
// third input
242+
auto input_t3 = predictor->GetInputHandle(input_names[2]);
243+
input_t3->Reshape({run_batch + 1});
244+
input_t3->CopyFromCpu(i3);
245+
246+
// fourth input
247+
auto input_t4 = predictor->GetInputHandle(input_names[3]);
248+
input_t4->Reshape({1, max_seq_len, 1});
249+
input_t4->CopyFromCpu(i4);
250+
251+
CHECK(predictor->Run());
252+
253+
auto output_names = predictor->GetOutputNames();
254+
auto output_t = predictor->GetOutputHandle(output_names[0]);
255+
std::vector<int> output_shape = output_t->shape();
256+
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
257+
std::multiplies<int>());
258+
out_data->resize(out_num);
259+
output_t->CopyToCpu(out_data->data());
260+
261+
return;
262+
}
263+
264+
TEST(AnalysisPredictor, ernie_varlen) {
265+
#if IS_TRT_VERSION_GE(7234)
266+
auto predictor = InitPredictor();
267+
std::vector<float> out_data;
268+
run(predictor.get(), &out_data);
269+
std::vector<float> ref_data{0.59814, 0.219882, 0.181978,
270+
0.359796, 0.577414, 0.0627908};
271+
float near_tolerance = 1e-3;
272+
for (size_t i = 0; i < out_data.size(); i++) {
273+
EXPECT_NEAR(ref_data[i], out_data[i], near_tolerance);
274+
}
275+
#endif
276+
}
277+
146278
} // namespace inference
147279
} // namespace paddle

0 commit comments

Comments
 (0)