Skip to content

Commit 6fbd224

Browse files
authored
CHERRY PICK FROM 18941, 18860, 19213:Fix Mask RCNN bug AND Paddle-TRT fp16 support (#19378)
* CHERRY_PICK 18941, 18860: TRT fp16 support. test=release/1.5 * CHERRY_PICK 19213: Fix BUG: Mask RCNN inference diff When using AnalysisPredictor. 1. fix affine channel fuse pass. 2. fix condition block op. 3. fix merge lod tensor op bug. 4. fix memory optim cause by reset lod op. test=release/1.5
1 parent 2656e90 commit 6fbd224

31 files changed

+268
-142
lines changed

paddle/fluid/framework/ir/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ pass_library(graph_viz_pass base)
5252
pass_library(lock_free_optimize_pass base)
5353
pass_library(fc_fuse_pass inference)
5454
pass_library(attention_lstm_fuse_pass inference)
55-
pass_library(infer_clean_graph_pass inference)
5655
pass_library(fc_lstm_fuse_pass inference)
5756
pass_library(embedding_fc_lstm_fuse_pass inference)
5857
pass_library(fc_gru_fuse_pass inference)

paddle/fluid/framework/ir/infer_clean_graph_pass.cc

Lines changed: 0 additions & 67 deletions
This file was deleted.

paddle/fluid/inference/analysis/analyzer.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ void Analyzer::RunAnalysis(Argument *argument) {
3131
"analsis_passes is not valid in the argument.");
3232
for (auto &pass : argument->analysis_passes()) {
3333
string::PrettyLogH1("--- Running analysis [%s]", pass);
34+
if (!argument->enable_analysis_optim() && pass == "ir_analysis_pass")
35+
continue;
36+
3437
auto *ptr = PassRegistry::Global().Retreive(pass);
3538
PADDLE_ENFORCE_NOT_NULL(ptr, "no analysis pass called %s", pass);
3639
ptr->Run(argument);

paddle/fluid/inference/analysis/analyzer_tester.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ using namespace framework; // NOLINT
3030
TEST(Analyzer, analysis_without_tensorrt) {
3131
Argument argument;
3232
argument.SetModelDir(FLAGS_inference_model_dir);
33-
argument.SetIrAnalysisPasses({"infer_clean_graph_pass"});
33+
argument.SetEnableAnalysisOptim(false);
3434
argument.SetUseGPU(false);
3535
argument.SetAnalysisPasses({"ir_graph_build_pass", "ir_analysis_pass",
3636
"ir_params_sync_among_devices_pass"});
@@ -41,10 +41,10 @@ TEST(Analyzer, analysis_without_tensorrt) {
4141

4242
TEST(Analyzer, analysis_with_tensorrt) {
4343
Argument argument;
44+
argument.SetEnableAnalysisOptim(false);
4445
argument.SetTensorRtMaxBatchSize(3);
4546
argument.SetTensorRtWorkspaceSize(1 << 20);
4647
argument.SetModelDir(FLAGS_inference_model_dir);
47-
argument.SetIrAnalysisPasses({"infer_clean_graph_pass"});
4848
argument.SetUseGPU(false);
4949
argument.SetAnalysisPasses({"ir_graph_build_pass", "ir_analysis_pass",
5050
"ir_params_sync_among_devices_pass"});

paddle/fluid/inference/analysis/argument.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ struct Argument {
6262
using anakin_max_shape_t = std::map<std::string, std::vector<int>>;
6363

6464
bool Has(const std::string& key) const { return valid_fields_.count(key); }
65+
// If we set the model using config.SetModelBuffer,
66+
// the model and parameter will occupy additional CPU resources.
67+
// Use this interface to release these resources.
6568
void PartiallyRelease() {
6669
if (Has("model_program_path")) {
6770
if (Has("model_from_memory") && model_from_memory()) {
@@ -130,6 +133,7 @@ struct Argument {
130133
DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
131134
DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
132135
DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string);
136+
DECL_ARGUMENT_FIELD(enable_analysis_optim, EnableAnalysisOptim, bool);
133137

134138
// The overall graph to work on.
135139
DECL_ARGUMENT_UNIQUE_FIELD(main_graph, MainGraph, framework::ir::Graph);

paddle/fluid/inference/analysis/ir_pass_manager.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,15 @@ void IRPassManager::CreatePasses(Argument *argument,
8484
pass->Set("program",
8585
new framework::ProgramDesc *(&argument->main_program()));
8686

87-
bool enable_int8 = argument->tensorrt_precision_mode() ==
88-
AnalysisConfig::Precision::kInt8;
87+
auto precision_mode = argument->tensorrt_precision_mode();
88+
bool enable_int8 = precision_mode == AnalysisConfig::Precision::kInt8;
8989

9090
pass->Set("predictor_id", new int(argument->predictor_id()));
9191
bool use_calib_mode = argument->tensorrt_use_calib_mode();
9292
pass->Set("enable_int8", new bool(enable_int8));
9393
pass->Set("use_calib_mode", new bool(use_calib_mode));
94+
pass->Set("precision_mode",
95+
new AnalysisConfig::Precision(precision_mode));
9496

9597
bool use_static_engine = argument->tensorrt_use_static_engine();
9698
bool model_from_memory = argument->model_from_memory();

paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
149149
graph_var_map[node->Name()] = node;
150150
}
151151
}
152+
auto precision_mode = Get<AnalysisConfig::Precision>("precision_mode");
153+
bool enable_fp16 = false;
154+
if (precision_mode == AnalysisConfig::Precision::kHalf) enable_fp16 = true;
152155
auto enable_int8 = Get<bool>("enable_int8");
153156
auto use_calib_mode = Get<bool>("use_calib_mode");
154157
auto &subgraph_nodes = *Agent(node).subgraph();
@@ -216,6 +219,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
216219
SetAttr(op_desc->Proto(), "calibration_data", calibration_data);
217220

218221
SetAttr(op_desc->Proto(), "enable_int8", enable_int8);
222+
SetAttr(op_desc->Proto(), "enable_fp16", enable_fp16);
219223
SetAttr(op_desc->Proto(), "use_calib_mode", use_calib_mode);
220224
SetAttr(op_desc->Proto(), "engine_key", engine_key);
221225
SetAttr(op_desc->Proto(), "predictor_id", predictor_id);
@@ -244,7 +248,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
244248
inference::Singleton<inference::tensorrt::TRTEngineManager>::Global()
245249
.Create(engine_key + std::to_string(predictor_id),
246250
Get<int>("max_batch_size"), Get<int>("workspace_size"),
247-
enable_int8, calibrator.get(), Get<int>("gpu_device_id"));
251+
precision_mode, calibrator.get(), Get<int>("gpu_device_id"));
248252

249253
bool need_serialize = (use_static_engine && !load_from_memory);
250254
if (need_serialize) {

paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
2222
#include "paddle/fluid/framework/ir/pass.h"
2323
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h"
24+
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
2425

2526
namespace paddle {
2627
namespace inference {

paddle/fluid/inference/analysis/passes/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_p
55
cc_library(ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass)
66
cc_library(adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass)
77
cc_library(inference_op_replace_pass SRCS inference_op_replace_pass.cc DEPS analysis_pass graph_to_program_pass)
8+
cc_library(ir_graph_clean_pass SRCS ir_graph_clean_pass.cc DEPS analysis_pass)
89

910
cc_library(analysis_passes SRCS passes.cc DEPS
1011
ir_graph_build_pass
@@ -14,6 +15,7 @@ cc_library(analysis_passes SRCS passes.cc DEPS
1415
memory_optim_pass
1516
inference_op_replace_pass
1617
ir_graph_to_program_pass
18+
ir_graph_clean_pass
1719
)
1820

1921
set(analysis_deps ${analysis_deps}

paddle/fluid/inference/analysis/passes/inference_op_replace_pass.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ namespace inference {
2020
namespace analysis {
2121

2222
void InferenceOpReplacePass::RunImpl(Argument* argument) {
23-
if (!argument->use_gpu()) return;
2423
std::unordered_map<std::string, std::string> replaced_map{
2524
{"conditional_block", "conditional_block_infer"},
25+
{"merge_lod_tensor", "merge_lod_tensor_infer"},
2626
};
2727

2828
auto& graph = argument->main_graph();

0 commit comments

Comments
 (0)