Skip to content

Commit bc9fd1f

Browse files
authored
CHERRY-Pick: Inference: fix mask rcnn model diff, optim memory usage, memory leak. #18532 (#18547)
fix mask rcnn add interface for setting optim_cache_dir(eg: when in trt int8 mode, and load model from memory, there should be a interface for setting the trt calibration table data dir) test=release/1.5
1 parent 7c73a68 commit bc9fd1f

19 files changed

+354
-119
lines changed

paddle/fluid/framework/ir/graph_pattern_detector.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,16 @@ PDNode *PDNode::assert_op_has_n_outputs(const std::string &op_type, size_t n) {
504504
return this;
505505
}
506506

507+
PDNode *PDNode::assert_has_n_inputs(size_t n) {
508+
asserts_.emplace_back([=](Node *x) { return x->inputs.size() == n; });
509+
return this;
510+
}
511+
512+
PDNode *PDNode::assert_has_n_outputs(size_t n) {
513+
asserts_.emplace_back([=](Node *x) { return x->outputs.size() == n; });
514+
return this;
515+
}
516+
507517
PDNode *PDNode::assert_more(PDNode::teller_t &&teller) {
508518
asserts_.emplace_back(std::move(teller));
509519
return this;
@@ -1444,11 +1454,13 @@ PDNode *patterns::ConvAffineChannel::operator()(
14441454
auto *ac_scale_var = pattern->NewNode(ac_scale_repr())
14451455
->AsInput()
14461456
->assert_is_persistable_var()
1457+
->assert_has_n_outputs(1)
14471458
->assert_is_op_input("affine_channel", "Scale");
14481459
// AC Bias
14491460
auto *ac_bias_var = pattern->NewNode(ac_bias_repr())
14501461
->AsInput()
14511462
->assert_is_persistable_var()
1463+
->assert_has_n_outputs(1)
14521464
->assert_is_op_input("affine_channel", "Bias");
14531465

14541466
// AC output

paddle/fluid/framework/ir/graph_pattern_detector.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ struct PDNode {
131131
const std::unordered_set<std::string>& op_types,
132132
const std::string& argument, int nth);
133133

134+
PDNode* assert_has_n_inputs(size_t n);
135+
PDNode* assert_has_n_outputs(size_t n);
136+
134137
template <typename T>
135138
PDNode* assert_op_attr(const std::string& attr_name, const T& attr) {
136139
asserts_.emplace_back([=](Node* x) {

paddle/fluid/inference/analysis/argument.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ struct Argument {
5959

6060
using unique_ptr_t = std::unique_ptr<void, std::function<void(void*)>>;
6161
using fusion_statis_t = std::unordered_map<std::string, int>;
62-
using engine_opt_info_t = std::map<std::string, std::string>;
6362
using anakin_max_shape_t = std::map<std::string, std::vector<int>>;
6463

6564
bool Has(const std::string& key) const { return valid_fields_.count(key); }
@@ -130,7 +129,7 @@ struct Argument {
130129
DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string);
131130
DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
132131
DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
133-
DECL_ARGUMENT_FIELD(engine_opt_info, EngineOptInfo, engine_opt_info_t);
132+
DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string);
134133

135134
// The overall graph to work on.
136135
DECL_ARGUMENT_UNIQUE_FIELD(main_graph, MainGraph, framework::ir::Graph);

paddle/fluid/inference/analysis/ir_pass_manager.cc

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,20 @@ void IRPassManager::CreatePasses(Argument *argument,
9494

9595
bool use_static_engine = argument->tensorrt_use_static_engine();
9696
bool model_from_memory = argument->model_from_memory();
97-
bool int8_valid = !(model_from_memory && enable_int8);
97+
std::string optim_cache_dir = argument->optim_cache_dir();
98+
bool int8_valid =
99+
!(model_from_memory && optim_cache_dir.empty() && enable_int8);
98100
PADDLE_ENFORCE(int8_valid,
99-
"TRT INT8 Now don't support model load from memory.");
100-
101-
if ((!model_from_memory && use_static_engine) || enable_int8) {
101+
"When you are in TRT INT8 mode, and load model from "
102+
"memory, you should set optim_cache_dir using "
103+
"config.SetOptimCacheDir()");
104+
PADDLE_ENFORCE(!(model_from_memory && use_static_engine),
105+
"When you are using Paddle-TRT, and also using load model "
106+
"from memory, you should set the use_static to false.");
107+
108+
if (!optim_cache_dir.empty()) {
109+
pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
110+
} else if (use_static_engine || enable_int8) {
102111
std::string model_opt_cache_dir =
103112
argument->Has("model_dir")
104113
? argument->model_dir()
@@ -110,8 +119,6 @@ void IRPassManager::CreatePasses(Argument *argument,
110119
pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
111120
pass->Set("use_static_engine", new bool(use_static_engine));
112121
pass->Set("model_from_memory", new bool(argument->model_from_memory()));
113-
pass->Set("engine_opt_info", new std::map<std::string, std::string>(
114-
argument->engine_opt_info()));
115122
}
116123
if (pass_name == "ngraph_subgraph_pass") {
117124
pass->Set("program",
@@ -123,8 +130,6 @@ void IRPassManager::CreatePasses(Argument *argument,
123130
pass->Set("use_gpu", new bool(argument->use_gpu()));
124131
pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
125132
pass->Set("model_from_memory", new bool(argument->model_from_memory()));
126-
pass->Set("engine_opt_info", new std::map<std::string, std::string>(
127-
argument->engine_opt_info()));
128133
pass->Set("predictor_id", new int(argument->predictor_id()));
129134
pass->Set("max_input_shape", new std::map<std::string, std::vector<int>>(
130135
argument->anakin_max_input_shape()));

paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
226226
std::unique_ptr<tensorrt::TRTInt8Calibrator> calibrator;
227227
if (enable_int8 && calibration_data.size() != 0) {
228228
calibrator.reset(new tensorrt::TRTInt8Calibrator(calibration_data));
229+
LOG(INFO) << "RUN Paddle TRT int8 calibration mode...";
229230
}
230231
// When in int8 mode and calibration_mode, the program just produce the
231232
// calibration table data.

paddle/fluid/inference/analysis/passes/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@ cc_library(memory_optim_pass SRCS memory_optimize_pass.cc DEPS analysis_pass zer
44
cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_pass.cc DEPS analysis_pass argument ir_pass_manager)
55
cc_library(ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass)
66
cc_library(adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass)
7+
cc_library(inference_op_replace_pass SRCS inference_op_replace_pass.cc DEPS analysis_pass graph_to_program_pass)
78

89
cc_library(analysis_passes SRCS passes.cc DEPS
910
ir_graph_build_pass
1011
ir_analysis_pass
1112
ir_params_sync_among_devices_pass
1213
adjust_cudnn_workspace_size_pass
1314
memory_optim_pass
15+
inference_op_replace_pass
1416
ir_graph_to_program_pass
1517
)
1618

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h"
16+
#include <unordered_map>
17+
18+
namespace paddle {
19+
namespace inference {
20+
namespace analysis {
21+
22+
void InferenceOpReplacePass::RunImpl(Argument* argument) {
23+
if (!argument->use_gpu()) return;
24+
std::unordered_map<std::string, std::string> replaced_map{
25+
{"conditional_block", "conditional_block_infer"},
26+
};
27+
28+
auto& graph = argument->main_graph();
29+
auto nodes = graph.Nodes();
30+
31+
for (auto& node : nodes) {
32+
if (!node->IsOp()) continue;
33+
auto* op_desc = node->Op();
34+
std::string op_type = op_desc->Type();
35+
if (!replaced_map.count(op_type)) continue;
36+
op_desc->SetType(replaced_map[op_type]);
37+
op_desc->Flush();
38+
}
39+
}
40+
41+
std::string InferenceOpReplacePass::repr() const {
42+
return "inference-op-replace-pass";
43+
}
44+
45+
} // namespace analysis
46+
} // namespace inference
47+
} // namespace paddle
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <string>
18+
#include <vector>
19+
20+
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
21+
#include "paddle/fluid/framework/scope.h"
22+
#include "paddle/fluid/inference/analysis/analysis_pass.h"
23+
#include "paddle/fluid/platform/place.h"
24+
25+
namespace paddle {
26+
namespace inference {
27+
namespace analysis {
28+
29+
/*
30+
* There are some ops (while, conditional_block_op etc) which have different
31+
* optimization points under predicion and training conditions.
32+
* So, We added the corresponding inference impl to these ops separately.
33+
* This pass replaces these ops with corresponding inference ops.
34+
*/
35+
class InferenceOpReplacePass : public AnalysisPass {
36+
public:
37+
void RunImpl(Argument *argument) override;
38+
std::string repr() const override;
39+
};
40+
41+
} // namespace analysis
42+
} // namespace inference
43+
} // namespace paddle

paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <functional>
1919
#include <limits>
2020
#include <map>
21+
#include <set>
2122
#include <string>
2223
#include <type_traits>
2324
#include <utility>
@@ -108,11 +109,34 @@ int DataTypeToSpace(framework::proto::VarType_Type type) {
108109
void MemoryOptimizePass::CollectVarMemorySize(
109110
space_table_t* space_table) const {
110111
const int fake_batch_size = 1;
112+
auto valid_var = [&](framework::ir::Node* node) -> bool {
113+
std::set<std::string> invalid_op = {"while", "conditional_block",
114+
"tensorrt_engine",
115+
"conditional_block_infer"};
116+
for (auto* tmp : node->inputs) {
117+
CHECK(tmp->IsOp());
118+
std::string op_type = tmp->Op()->Type();
119+
if (std::find(invalid_op.begin(), invalid_op.end(), op_type) !=
120+
invalid_op.end()) {
121+
return false;
122+
}
123+
}
124+
for (auto* tmp : node->outputs) {
125+
CHECK(tmp->IsOp());
126+
std::string op_type = tmp->Op()->Type();
127+
if (std::find(invalid_op.begin(), invalid_op.end(), op_type) !=
128+
invalid_op.end()) {
129+
return false;
130+
}
131+
}
132+
return true;
133+
};
111134
// Collect tensors from graph.
112135
for (auto* node : graph_->Nodes()) {
113136
if (node->IsVar() &&
114137
node->Var()->GetType() ==
115-
framework::proto::VarType::Type::VarType_Type_LOD_TENSOR) {
138+
framework::proto::VarType::Type::VarType_Type_LOD_TENSOR &&
139+
valid_var(node)) {
116140
// Parameters will not be reused.
117141
if (node->Var()->Persistable()) continue;
118142
auto shape = node->Var()->GetShape();
@@ -135,12 +159,9 @@ void MakeSimpleReusePlan(
135159
std::unordered_map<std::string, int>* cluster_size) {
136160
std::vector<MemNode> mem_nodes;
137161
for (auto& data : lifecycles) {
162+
if (!space_table.count(data.first)) continue;
138163
MemNode temp_node;
139164
temp_node.name = data.first;
140-
PADDLE_ENFORCE(
141-
space_table.count(data.first),
142-
"%s variable should be in the spacetable during memory optimize",
143-
data.first);
144165
temp_node.size = space_table.at(data.first);
145166
temp_node.cluster = -1;
146167
temp_node.lifetime = data.second;

paddle/fluid/inference/analysis/passes/passes.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "paddle/fluid/inference/analysis/passes/passes.h"
1616
#include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h"
17+
#include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h"
1718
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
1819
#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
1920
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
@@ -38,6 +39,8 @@ PassRegistry::PassRegistry() {
3839
std::unique_ptr<AnalysisPass>(new IrParamsSyncAmongDevicesPass));
3940
passes_.emplace("adjust_cudnn_workspace_size_pass",
4041
std::unique_ptr<AnalysisPass>(new AdjustCudnnWorkSpacePass));
42+
passes_.emplace("inference_op_replace_pass",
43+
std::unique_ptr<AnalysisPass>(new InferenceOpReplacePass));
4144
passes_.emplace(
4245
"ir_graph_to_program_pass",
4346
std::unique_ptr<IrGraphToProgramPass>(new IrGraphToProgramPass));

0 commit comments

Comments
 (0)