modification

sneaxiy · sneaxiy · commit 612e1a31554b · 2018-09-17T02:01:03.000Z
diff --git a/paddle/fluid/framework/details/computation_op_handle.h b/paddle/fluid/framework/details/computation_op_handle.h
@@ -23,8 +23,6 @@
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/platform/device_context.h"
 
-#include "paddle/fluid/framework/details/reference_count_op_handle.h"
-
 namespace paddle {
 namespace framework {
 namespace details {
diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h
@@ -89,11 +89,6 @@ class OpHandleBase {
 
   ir::Node *Node() { return node_; }
 
-  const std::map<platform::Place, platform::DeviceContext *>
-      &GetDeviceContexts() const {
-    return dev_ctxes_;
-  }
-
  protected:
   void RunAndRecordEvent(const std::function<void()> &callback);
 
diff --git a/paddle/fluid/framework/details/reference_count_op_handle.h b/paddle/fluid/framework/details/reference_count_op_handle.h
@@ -69,15 +69,15 @@ class ReferenceCountOpHandle : public OpHandleBase {
 
   std::string Name() const override { return "reference_count"; }
 
-  // protected:
+ protected:
   void RunImpl() override {
-    auto *exec_scope_ = scope_->FindVar(kLocalExecScopeName)->Get<Scope *>();
+    auto *exec_scope = scope_->FindVar(kLocalExecScopeName)->Get<Scope *>();
     std::vector<LoDTensor *> tensors;
     for (auto &name : var_names_) {
       auto it = ref_cnts_->find(name);
       if (it == ref_cnts_->end()) continue;
 
-      auto *var = exec_scope_->FindVar(name);
+      auto *var = exec_scope->FindVar(name);
       if (var == nullptr || !var->IsType<LoDTensor>()) continue;
 
       if (it->second.fetch_sub(1) <= 1) {
@@ -91,8 +91,8 @@ class ReferenceCountOpHandle : public OpHandleBase {
   }
 
  private:
-  void ClearTensors(const std::vector<LoDTensor *> &tensors) const {
-    auto *gc = dynamic_cast<const StreamGarbageCollector<Tensor> *>(gc_);
+  void ClearTensors(const std::vector<LoDTensor *> &tensors) {
+    auto *gc = dynamic_cast<StreamGarbageCollector<Tensor> *>(gc_);
     if (gc != nullptr) {
       auto compute_stream = dev_ctx_->stream();
       auto callback_stream = gc->stream();
diff --git a/paddle/fluid/framework/details/reference_count_pass.cc b/paddle/fluid/framework/details/reference_count_pass.cc
@@ -128,12 +128,10 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
   std::vector<std::unique_ptr<OpHandleBase>> new_all_ops;
   new_all_ops.reserve(compute_ref_cnt_map.size() + all_ops.size());
   for (auto &op : all_ops) {
-    auto it = compute_ref_cnt_map.find(op.get());
+    new_all_ops.emplace_back(std::move(op));
+    auto it = compute_ref_cnt_map.find(new_all_ops.back().get());
     if (it != compute_ref_cnt_map.end()) {
-      new_all_ops.emplace_back(std::move(op));
-      new_all_ops.emplace_back(std::unique_ptr<OpHandleBase>(it->second));
-    } else {
-      new_all_ops.emplace_back(std::move(op));
+      new_all_ops.emplace_back(it->second);
     }
   }
 
diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc
@@ -37,9 +37,11 @@ int kProgramId = -1;
 
 ExecutorPrepareContext::ExecutorPrepareContext(
     const framework::ProgramDesc& prog, size_t block_id)
-    : prog_(prog),
-      block_id_(block_id),
-      ref_cnts_(GetNonPersistableReferenceCount<int>(prog, block_id)) {}
+    : prog_(prog), block_id_(block_id) {
+  if (GetEagerDeletionThreshold() >= 0) {
+    ref_cnts_ = GetNonPersistableReferenceCount<int>(prog_, block_id_);
+  }
+}
 
 ExecutorPrepareContext::~ExecutorPrepareContext() {
   VLOG(5) << "destroy ExecutorPrepareContext";
@@ -331,8 +333,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
     CreateVariables(ctx->prog_, local_scope, ctx->block_id_);
   }
 
-  std::shared_ptr<std::vector<framework::LoDTensor*>> erase_tensors(
-      new std::vector<framework::LoDTensor*>());
   int64_t max_memory_size = GetEagerDeletionThreshold();
 
   std::unique_ptr<GarbageCollector<Tensor>> gc;
@@ -353,7 +353,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
   for (auto& op : ctx->ops_) {
     op->Run(*local_scope, place_);
 
-#ifdef PADDLE_WITH_CUDA
     if (gc != nullptr) {
       std::vector<std::string> erase_vars;
       for (auto& input : op->Inputs()) {
@@ -395,18 +394,18 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
         if (!erase_tensors.empty()) gc->Add(erase_tensors);
       }
     }
-#endif
 
     if (FLAGS_benchmark) {
       VLOG(2) << "Memory used after operator " + op->Type() + " running: "
               << memory::memory_usage(place_);
     }
   }
 
-  if (gc != nullptr)
+  if (gc != nullptr) {
     gc->Wait();
-  else
+  } else {
     platform::DeviceContextPool::Instance().Get(place_)->Wait();
+  }
 
   if (local_scope != scope) {
     scope->DeleteScope(local_scope);
diff --git a/paddle/fluid/framework/executor.h b/paddle/fluid/framework/executor.h
@@ -28,8 +28,6 @@ namespace paddle {
 namespace framework {
 extern void InitializeVariable(Variable* var, proto::VarType::Type var_type);
 
-int64_t GetEagerDeletionThreshold();
-
 template <typename T>
 std::unordered_map<std::string, T> GetNonPersistableReferenceCount(
     const ProgramDesc& prog, size_t block_id) {
diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc
@@ -22,7 +22,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/ir/graph_viz_pass.h"
 
 #ifdef PADDLE_WITH_CUDA
-#include "paddle/fluid/framework/details/reference_count_pass.h"
 #include "paddle/fluid/platform/nccl_helper.h"
 #endif
 
diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h
@@ -29,6 +29,10 @@ limitations under the License. */
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/platform/device_context.h"
 
+#ifdef PADDLE_WITH_CUDA
+#include "paddle/fluid/framework/details/reference_count_pass.h"
+#endif
+
 namespace paddle {
 namespace framework {
 
diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc
@@ -32,17 +32,17 @@ DEFINE_bool(
     "slow down the destruction of variables.(around 1% performance harm)");
 
 DEFINE_double(
-    eager_delete_tensor_GB, -1.0,
+    eager_delete_tensor_gb, -1.0,
     "Memory size threshold (GB) when the garbage collector clear tensors."
     "Disabled when this value is less than 0");
 
 namespace paddle {
 namespace framework {
 
 int64_t GetEagerDeletionThreshold() {
-  return FLAGS_eager_delete_tensor_GB < 0
+  return FLAGS_eager_delete_tensor_gb < 0
              ? -1
-             : static_cast<int64_t>(FLAGS_eager_delete_tensor_GB *
+             : static_cast<int64_t>(FLAGS_eager_delete_tensor_gb *
                                     (static_cast<int64_t>(1) << 30));
 }
 
diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h
@@ -36,8 +36,6 @@ limitations under the License. */
 #endif
 #include "unsupported/Eigen/CXX11/Tensor"
 
-DECLARE_bool(clear_gpu_memory_when_unused);
-
 namespace paddle {
 namespace platform {
 
diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
@@ -122,7 +122,7 @@ def __bootstrap__():
         'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
         'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb',
         'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads',
-        "dist_threadpool_size", 'cpu_deterministic', 'eager_delete_tensor_GB'
+        "dist_threadpool_size", 'cpu_deterministic', 'eager_delete_tensor_gb'
     ]
     if core.is_compiled_with_dist():
         read_env_flags.append('rpc_deadline')