CHERRY-Pick: Inference: fix mask rcnn model diff, optim memory usage, memory leak. #18532 (#18547)

NHZlX · web-flow · commit bc9fd1fc10f5 · 2019-07-08T20:13:24.000+08:00
fix mask rcnn
add interface for setting optim_cache_dir(eg: when in trt int8 mode, and load model from memory, there should be a interface for setting the trt calibration table data dir)

test=release/1.5
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -504,6 +504,16 @@ PDNode *PDNode::assert_op_has_n_outputs(const std::string &op_type, size_t n) {
   return this;
 }
 
+PDNode *PDNode::assert_has_n_inputs(size_t n) {
+  asserts_.emplace_back([=](Node *x) { return x->inputs.size() == n; });
+  return this;
+}
+
+PDNode *PDNode::assert_has_n_outputs(size_t n) {
+  asserts_.emplace_back([=](Node *x) { return x->outputs.size() == n; });
+  return this;
+}
+
 PDNode *PDNode::assert_more(PDNode::teller_t &&teller) {
   asserts_.emplace_back(std::move(teller));
   return this;
@@ -1444,11 +1454,13 @@ PDNode *patterns::ConvAffineChannel::operator()(
   auto *ac_scale_var = pattern->NewNode(ac_scale_repr())
                            ->AsInput()
                            ->assert_is_persistable_var()
+                           ->assert_has_n_outputs(1)
                            ->assert_is_op_input("affine_channel", "Scale");
   // AC Bias
   auto *ac_bias_var = pattern->NewNode(ac_bias_repr())
                           ->AsInput()
                           ->assert_is_persistable_var()
+                          ->assert_has_n_outputs(1)
                           ->assert_is_op_input("affine_channel", "Bias");
 
   // AC output
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -131,6 +131,9 @@ struct PDNode {
       const std::unordered_set<std::string>& op_types,
       const std::string& argument, int nth);
 
+  PDNode* assert_has_n_inputs(size_t n);
+  PDNode* assert_has_n_outputs(size_t n);
+
   template <typename T>
   PDNode* assert_op_attr(const std::string& attr_name, const T& attr) {
     asserts_.emplace_back([=](Node* x) {
diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h
@@ -59,7 +59,6 @@ struct Argument {
 
   using unique_ptr_t = std::unique_ptr<void, std::function<void(void*)>>;
   using fusion_statis_t = std::unordered_map<std::string, int>;
-  using engine_opt_info_t = std::map<std::string, std::string>;
   using anakin_max_shape_t = std::map<std::string, std::vector<int>>;
 
   bool Has(const std::string& key) const { return valid_fields_.count(key); }
@@ -130,7 +129,7 @@ struct Argument {
   DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string);
   DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
   DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
-  DECL_ARGUMENT_FIELD(engine_opt_info, EngineOptInfo, engine_opt_info_t);
+  DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string);
 
   // The overall graph to work on.
   DECL_ARGUMENT_UNIQUE_FIELD(main_graph, MainGraph, framework::ir::Graph);
diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc
@@ -94,11 +94,20 @@ void IRPassManager::CreatePasses(Argument *argument,
 
       bool use_static_engine = argument->tensorrt_use_static_engine();
       bool model_from_memory = argument->model_from_memory();
-      bool int8_valid = !(model_from_memory && enable_int8);
+      std::string optim_cache_dir = argument->optim_cache_dir();
+      bool int8_valid =
+          !(model_from_memory && optim_cache_dir.empty() && enable_int8);
       PADDLE_ENFORCE(int8_valid,
-                     "TRT INT8 Now don't support model load from memory.");
-
-      if ((!model_from_memory && use_static_engine) || enable_int8) {
+                     "When you are in TRT INT8 mode, and load model from "
+                     "memory, you should set optim_cache_dir using "
+                     "config.SetOptimCacheDir()");
+      PADDLE_ENFORCE(!(model_from_memory && use_static_engine),
+                     "When you are using Paddle-TRT, and also using load model "
+                     "from memory, you should set the use_static to false.");
+
+      if (!optim_cache_dir.empty()) {
+        pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
+      } else if (use_static_engine || enable_int8) {
         std::string model_opt_cache_dir =
             argument->Has("model_dir")
                 ? argument->model_dir()
@@ -110,8 +119,6 @@ void IRPassManager::CreatePasses(Argument *argument,
       pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
       pass->Set("use_static_engine", new bool(use_static_engine));
       pass->Set("model_from_memory", new bool(argument->model_from_memory()));
-      pass->Set("engine_opt_info", new std::map<std::string, std::string>(
-                                       argument->engine_opt_info()));
     }
     if (pass_name == "ngraph_subgraph_pass") {
       pass->Set("program",
@@ -123,8 +130,6 @@ void IRPassManager::CreatePasses(Argument *argument,
       pass->Set("use_gpu", new bool(argument->use_gpu()));
       pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
       pass->Set("model_from_memory", new bool(argument->model_from_memory()));
-      pass->Set("engine_opt_info", new std::map<std::string, std::string>(
-                                       argument->engine_opt_info()));
       pass->Set("predictor_id", new int(argument->predictor_id()));
       pass->Set("max_input_shape", new std::map<std::string, std::vector<int>>(
                                        argument->anakin_max_input_shape()));
diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -226,6 +226,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
   std::unique_ptr<tensorrt::TRTInt8Calibrator> calibrator;
   if (enable_int8 && calibration_data.size() != 0) {
     calibrator.reset(new tensorrt::TRTInt8Calibrator(calibration_data));
+    LOG(INFO) << "RUN Paddle TRT int8 calibration mode...";
   }
   // When in int8 mode and calibration_mode, the program just produce the
   // calibration table data.
diff --git a/paddle/fluid/inference/analysis/passes/CMakeLists.txt b/paddle/fluid/inference/analysis/passes/CMakeLists.txt
@@ -4,13 +4,15 @@ cc_library(memory_optim_pass SRCS memory_optimize_pass.cc DEPS analysis_pass zer
 cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_pass.cc DEPS analysis_pass argument ir_pass_manager)
 cc_library(ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass)
 cc_library(adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass)
+cc_library(inference_op_replace_pass SRCS inference_op_replace_pass.cc DEPS analysis_pass graph_to_program_pass)
 
 cc_library(analysis_passes SRCS passes.cc DEPS
   ir_graph_build_pass
   ir_analysis_pass
   ir_params_sync_among_devices_pass
   adjust_cudnn_workspace_size_pass
   memory_optim_pass
+  inference_op_replace_pass
   ir_graph_to_program_pass
 )
 
diff --git a/paddle/fluid/inference/analysis/passes/inference_op_replace_pass.cc b/paddle/fluid/inference/analysis/passes/inference_op_replace_pass.cc
@@ -0,0 +1,47 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h"
+#include <unordered_map>
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+void InferenceOpReplacePass::RunImpl(Argument* argument) {
+  if (!argument->use_gpu()) return;
+  std::unordered_map<std::string, std::string> replaced_map{
+      {"conditional_block", "conditional_block_infer"},
+  };
+
+  auto& graph = argument->main_graph();
+  auto nodes = graph.Nodes();
+
+  for (auto& node : nodes) {
+    if (!node->IsOp()) continue;
+    auto* op_desc = node->Op();
+    std::string op_type = op_desc->Type();
+    if (!replaced_map.count(op_type)) continue;
+    op_desc->SetType(replaced_map[op_type]);
+    op_desc->Flush();
+  }
+}
+
+std::string InferenceOpReplacePass::repr() const {
+  return "inference-op-replace-pass";
+}
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h b/paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h
@@ -0,0 +1,43 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+#include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/inference/analysis/analysis_pass.h"
+#include "paddle/fluid/platform/place.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+/*
+ * There are some ops (while, conditional_block_op etc) which have different
+ * optimization points under predicion and training conditions.
+ * So, We added the corresponding inference impl to these ops separately.
+ * This pass replaces these ops with corresponding inference ops.
+ */
+class InferenceOpReplacePass : public AnalysisPass {
+ public:
+  void RunImpl(Argument *argument) override;
+  std::string repr() const override;
+};
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
@@ -18,6 +18,7 @@
 #include <functional>
 #include <limits>
 #include <map>
+#include <set>
 #include <string>
 #include <type_traits>
 #include <utility>
@@ -108,11 +109,34 @@ int DataTypeToSpace(framework::proto::VarType_Type type) {
 void MemoryOptimizePass::CollectVarMemorySize(
     space_table_t* space_table) const {
   const int fake_batch_size = 1;
+  auto valid_var = [&](framework::ir::Node* node) -> bool {
+    std::set<std::string> invalid_op = {"while", "conditional_block",
+                                        "tensorrt_engine",
+                                        "conditional_block_infer"};
+    for (auto* tmp : node->inputs) {
+      CHECK(tmp->IsOp());
+      std::string op_type = tmp->Op()->Type();
+      if (std::find(invalid_op.begin(), invalid_op.end(), op_type) !=
+          invalid_op.end()) {
+        return false;
+      }
+    }
+    for (auto* tmp : node->outputs) {
+      CHECK(tmp->IsOp());
+      std::string op_type = tmp->Op()->Type();
+      if (std::find(invalid_op.begin(), invalid_op.end(), op_type) !=
+          invalid_op.end()) {
+        return false;
+      }
+    }
+    return true;
+  };
   // Collect tensors from graph.
   for (auto* node : graph_->Nodes()) {
     if (node->IsVar() &&
         node->Var()->GetType() ==
-            framework::proto::VarType::Type::VarType_Type_LOD_TENSOR) {
+            framework::proto::VarType::Type::VarType_Type_LOD_TENSOR &&
+        valid_var(node)) {
       // Parameters will not be reused.
       if (node->Var()->Persistable()) continue;
       auto shape = node->Var()->GetShape();
@@ -135,12 +159,9 @@ void MakeSimpleReusePlan(
     std::unordered_map<std::string, int>* cluster_size) {
   std::vector<MemNode> mem_nodes;
   for (auto& data : lifecycles) {
+    if (!space_table.count(data.first)) continue;
     MemNode temp_node;
     temp_node.name = data.first;
-    PADDLE_ENFORCE(
-        space_table.count(data.first),
-        "%s variable should be in the spacetable during memory optimize",
-        data.first);
     temp_node.size = space_table.at(data.first);
     temp_node.cluster = -1;
     temp_node.lifetime = data.second;
diff --git a/paddle/fluid/inference/analysis/passes/passes.cc b/paddle/fluid/inference/analysis/passes/passes.cc
@@ -14,6 +14,7 @@
 
 #include "paddle/fluid/inference/analysis/passes/passes.h"
 #include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h"
+#include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h"
 #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
 #include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
 #include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
@@ -38,6 +39,8 @@ PassRegistry::PassRegistry() {
       std::unique_ptr<AnalysisPass>(new IrParamsSyncAmongDevicesPass));
   passes_.emplace("adjust_cudnn_workspace_size_pass",
                   std::unique_ptr<AnalysisPass>(new AdjustCudnnWorkSpacePass));
+  passes_.emplace("inference_op_replace_pass",
+                  std::unique_ptr<AnalysisPass>(new InferenceOpReplacePass));
   passes_.emplace(
       "ir_graph_to_program_pass",
       std::unique_ptr<IrGraphToProgramPass>(new IrGraphToProgramPass));
diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
@@ -90,6 +90,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
   CP_MEMBER(model_from_memory_);  // the memory model reuses prog_file_ and
                                   // params_file_ fields.
 
+  CP_MEMBER(opt_cache_dir_);
   prog_file_ = std::move(other.prog_file_);
   params_file_ = std::move(other.params_file_);
 
@@ -406,11 +407,6 @@ void AnalysisConfig::SetModelBuffer(const char *prog_buffer,
   Update();
 }
 
-void AnalysisConfig::SetEngineOptInfo(
-    std::map<std::string, std::string> engine_opt_info) {
-  engine_opt_info_ = engine_opt_info;
-}
-
 NativeConfig AnalysisConfig::ToNativeConfig() const {
   NativeConfig config;
   config.model_dir = model_dir_;
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -368,10 +368,10 @@ void AnalysisPredictor::PrepareArgument() {
   argument_.SetStaticMemoryOptimForceUpdate(
       config_.static_memory_optim_force_update_);
   argument_.SetModelFromMemory(config_.model_from_memory_);
-  argument_.SetEngineOptInfo(config_.engine_opt_info_);
   // Analyze inference_program
   argument_.SetUseAnakin(config_.anakin_engine_enabled());
   argument_.SetPredictorID(predictor_id_);
+  argument_.SetOptimCacheDir(config_.opt_cache_dir_);
   if (!config_.model_dir().empty()) {
     argument_.SetModelDir(config_.model_dir());
   } else {
diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -61,6 +61,11 @@ struct AnalysisConfig {
   /** Set parameter composed file path.
    */
   void SetParamsFile(const std::string& x) { params_file_ = x; }
+  /** Set opt cache dir.
+   */
+  void SetOptimCacheDir(const std::string& opt_cache_dir) {
+    opt_cache_dir_ = opt_cache_dir;
+  }
   /** Get the model directory path.
    */
   const std::string& model_dir() const { return model_dir_; }
@@ -143,7 +148,7 @@ struct AnalysisConfig {
                             int max_batch_size = 1, int min_subgraph_size = 3,
                             Precision precision = Precision::kFloat32,
                             bool use_static = false,
-                            bool use_calib_mode = false);
+                            bool use_calib_mode = true);
   /** A boolean state telling whether the TensorRT engine is used.
    */
   bool tensorrt_engine_enabled() const { return use_tensorrt_; }
@@ -223,7 +228,6 @@ struct AnalysisConfig {
   /** A boolean state telling whether the model is set from the CPU memory.
    */
   bool model_from_memory() const { return model_from_memory_; }
-  void SetEngineOptInfo(std::map<std::string, std::string> engine_opt_info);
 
   /** Turn on memory optimize
    * NOTE still in development, will release latter.
@@ -311,15 +315,15 @@ struct AnalysisConfig {
   bool anakin_auto_config_layout_{false};
   std::vector<std::string> anakin_passes_filter_;
   std::vector<std::string> anakin_ops_filter_;
-  std::map<std::string, std::string> engine_opt_info_;
 
   bool use_mkldnn_quantizer_{false};
   std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_;
   // If the config is already used on a predictor, it becomes invalid.
-  mutable bool is_valid_{true};
   // Any config can only be used with one predictor.
   // Variables held by config can take up a lot of memory in some cases.
   // So we release the memory when the predictor is set up.
+  mutable bool is_valid_{true};
+  std::string opt_cache_dir_;
 };
 
 }  // namespace paddle
diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h
@@ -73,8 +73,8 @@ class PaddlePassBuilder {
  protected:
   std::vector<std::string> analysis_passes_{
       {"ir_graph_build_pass", "ir_analysis_pass",
-       "ir_params_sync_among_devices_pass",
-       "adjust_cudnn_workspace_size_pass"}};
+       "ir_params_sync_among_devices_pass", "adjust_cudnn_workspace_size_pass",
+       "inference_op_replace_pass"}};
   std::vector<std::string> passes_;
 };
 
diff --git a/paddle/fluid/operators/controlflow/conditional_block_infer_op.cc b/paddle/fluid/operators/controlflow/conditional_block_infer_op.cc
diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc
diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.h b/paddle/fluid/operators/controlflow/conditional_block_op.h
diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc