Skip to content

Commit 179d426

Browse files
authored
Modify MarkTrtEngineOutputs API (#56858)
* Modify MarkTrtEngineOutputs API
1 parent 8aa1772 commit 179d426

File tree

8 files changed

+31
-18
lines changed

8 files changed

+31
-18
lines changed

paddle/fluid/inference/analysis/argument.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ struct Argument {
241241
DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int64_t);
242242
DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int);
243243
DECL_ARGUMENT_FIELD(trt_mark_output, TRTMarkOutput, bool);
244+
DECL_ARGUMENT_FIELD(trt_mark_output_with_id, TRTMarkOutputWithId, bool);
244245
DECL_ARGUMENT_FIELD(trt_output_tensor_names,
245246
TRTOutputTensorNames,
246247
std::vector<std::string>);

paddle/fluid/inference/analysis/ir_pass_manager.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ void IRPassManager::CreatePasses(Argument *argument,
163163
pass->Set("min_subgraph_size",
164164
new int(argument->tensorrt_min_subgraph_size()));
165165
pass->Set("mark_output", new bool(argument->trt_mark_output()));
166+
pass->Set("mark_output_with_id",
167+
new bool(argument->trt_mark_output_with_id()));
166168
pass->Set(
167169
"output_tensor_names",
168170
new std::vector<std::string>(argument->trt_output_tensor_names()));

paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -376,29 +376,30 @@ std::string TensorRtSubgraphPass::CreateTensorRTOp(
376376
std::vector<int> origin_outputs_dtype;
377377
std::map<std::string, int> map_origin_outputs_dtype;
378378

379-
// Whether to mark Outpus
379+
// Mark TensorRT output nodes as trt outputs
380380
auto mark_output = Get<bool>("mark_output");
381381
auto output_tensor_name =
382382
Get<std::vector<std::string>>("output_tensor_names");
383-
VLOG(1) << "mark Output: " << mark_output;
383+
auto mark_output_with_id = Get<bool>("mark_output_with_id");
384384

385-
if (mark_output == 1) {
385+
if (mark_output) {
386386
VLOG(1) << "begin to mark output ...";
387387
for (auto node : subgraph) {
388388
if (node->NodeType() == Node::Type::kOperation) {
389-
if (node->Op()->Outputs().count("Xshape")) continue;
390389
for (auto *x : node->outputs) {
391390
if (std::count(parameters.begin(), parameters.end(), x->Name()) > 0)
392391
continue;
393-
if (!output_tensor_name.empty() &&
394-
std::count(output_tensor_name.begin(),
395-
output_tensor_name.end(),
396-
x->Name())) {
397-
VLOG(1) << "output " << x->Name() << " has been marked";
398-
std::string output_name_withid =
399-
x->Name() + std::to_string(x->id());
392+
std::string name_with_id = x->Name() + std::to_string(x->id());
393+
if (((!mark_output_with_id && std::count(output_tensor_name.begin(),
394+
output_tensor_name.end(),
395+
x->Name()) > 0) ||
396+
(mark_output_with_id && std::count(output_tensor_name.begin(),
397+
output_tensor_name.end(),
398+
name_with_id) > 0)) &&
399+
!x->outputs.empty()) {
400+
VLOG(3) << "output " << x->Name() << " has been marked";
400401
output_names.insert(x->Name());
401-
output_names_with_id.insert(output_name_withid);
402+
output_names_with_id.insert(name_with_id);
402403
origin_name_output_rank[x->Name()] = x->Var()->GetShape().size();
403404
trt_outputs.insert(x);
404405
map_origin_outputs_dtype[x->Name()] =

paddle/fluid/inference/api/analysis_config.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
461461
CP_MEMBER(tensorrt_min_subgraph_size_);
462462
CP_MEMBER(tensorrt_precision_mode_);
463463
CP_MEMBER(trt_mark_output_);
464+
CP_MEMBER(trt_mark_output_with_id_);
464465
CP_MEMBER(trt_output_tensor_names_);
465466
CP_MEMBER(trt_disabled_ops_);
466467
CP_MEMBER(trt_use_dla_);
@@ -762,8 +763,10 @@ void AnalysisConfig::EnableTensorRtEngine(int64_t workspace_size,
762763
}
763764

764765
void AnalysisConfig::MarkTrtEngineOutputs(
765-
const std::vector<std::string> &output_tensor_names) {
766+
const std::vector<std::string> &output_tensor_names,
767+
const bool mark_output_with_id) {
766768
trt_mark_output_ = true;
769+
trt_mark_output_with_id_ = mark_output_with_id;
767770
trt_output_tensor_names_ = output_tensor_names;
768771
}
769772

paddle/fluid/inference/api/analysis_predictor.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1392,6 +1392,7 @@ void AnalysisPredictor::PrepareArgument() {
13921392
argument_->SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_);
13931393
argument_->SetTensorRtMinSubgraphSize(config_.tensorrt_min_subgraph_size_);
13941394
argument_->SetTRTMarkOutput(config_.trt_mark_output_);
1395+
argument_->SetTRTMarkOutputWithId(config_.trt_mark_output_with_id_);
13951396
argument_->SetTRTOutputTensorNames(config_.trt_output_tensor_names_);
13961397
argument_->SetTensorRtDisabledOPs(config_.trt_disabled_ops_);
13971398
argument_->SetTensorRtUseDLA(config_.trt_use_dla_);

paddle/fluid/inference/api/paddle_analysis_config.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -695,7 +695,8 @@ struct PD_INFER_DECL AnalysisConfig {
695695
/// \param output_tensor_names The name of the Tensor that needs to be marked
696696
///
697697
void MarkTrtEngineOutputs(
698-
const std::vector<std::string>& output_tensor_names = {});
698+
const std::vector<std::string>& output_tensor_names = {},
699+
const bool trt_mark_output_with_id = false);
699700
///
700701
/// \brief Turn on the TensorRT memory optimization.
701702
///
@@ -1237,6 +1238,7 @@ struct PD_INFER_DECL AnalysisConfig {
12371238
bool trt_use_varseqlen_{false};
12381239
bool trt_with_interleaved_{false};
12391240
bool trt_mark_output_{false};
1241+
bool trt_mark_output_with_id_{false};
12401242
std::vector<std::string> trt_output_tensor_names_{};
12411243
std::string tensorrt_transformer_posid_{""};
12421244
std::string tensorrt_transformer_maskid_{""};

paddle/fluid/pybind/inference_api.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -896,7 +896,8 @@ void BindAnalysisConfig(py::module *m) {
896896
&AnalysisConfig::tensorrt_dynamic_shape_enabled)
897897
.def("mark_trt_engine_outputs",
898898
&AnalysisConfig::MarkTrtEngineOutputs,
899-
py::arg("output_tensor_names") = std::vector<std::string>({}))
899+
py::arg("output_tensor_names") = std::vector<std::string>({}),
900+
py::arg("mark_output_with_id") = false)
900901
.def("enable_tensorrt_varseqlen", &AnalysisConfig::EnableVarseqlen)
901902
.def("tensorrt_varseqlen_enabled",
902903
&AnalysisConfig::tensorrt_varseqlen_enabled)

test/cpp/inference/api/trt_mark_trt_engine_outputs_test.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@ TEST(TensorRT, mark_trt_engine_outputs) {
2424
config.EnableUseGpu(100, 0);
2525
config.EnableTensorRtEngine(
2626
1 << 30, 1, 5, AnalysisConfig::Precision::kFloat32, false, false);
27-
// The name of the tensor that needs to be marked, the default is empty (all
28-
// marks)
29-
std::vector<std::string> markOutput = {"fc_0.tmp_0", "fc_0.tmp_1"};
27+
// The name of the tensor that needs to be marked
28+
std::vector<std::string> markOutput = {"pool2d_0.tmp_0",
29+
"elementwise_add_0.tmp_0",
30+
"conv2d_5.tmp_0",
31+
"batch_norm_6.tmp_2"};
3032
config.MarkTrtEngineOutputs(markOutput);
3133

3234
std::vector<std::vector<PaddleTensor>> inputs_all;

0 commit comments

Comments
 (0)