PaddlePaddle
diff --git a/‎benchmark/fluid/README.md
Lines changed: 2 additions & 0 deletions b/‎benchmark/fluid/README.md
Lines changed: 2 additions & 0 deletions
diff --git a/‎benchmark/fluid/run_fluid_benchmark.sh
Lines changed: 9 additions & 0 deletions b/‎benchmark/fluid/run_fluid_benchmark.sh
Lines changed: 9 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/scope.cc
Lines changed: 17 additions & 13 deletions b/‎paddle/fluid/framework/scope.cc
Lines changed: 17 additions & 13 deletions
diff --git a/‎paddle/fluid/framework/scope.h
Lines changed: 14 additions & 3 deletions b/‎paddle/fluid/framework/scope.h
Lines changed: 14 additions & 3 deletions
diff --git a/‎paddle/fluid/inference/analysis/helper.h
Lines changed: 9 additions & 0 deletions b/‎paddle/fluid/inference/analysis/helper.h
Lines changed: 9 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
Lines changed: 7 additions & 1 deletion b/‎paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
Lines changed: 7 additions & 1 deletion
diff --git a/‎paddle/fluid/inference/tensorrt/convert/activation_op.cc
Lines changed: 3 additions & 2 deletions b/‎paddle/fluid/inference/tensorrt/convert/activation_op.cc
Lines changed: 3 additions & 2 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
Lines changed: 3 additions & 3 deletions b/‎paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
Lines changed: 3 additions & 3 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/convert/fc_op.cc
Lines changed: 6 additions & 4 deletions b/‎paddle/fluid/inference/tensorrt/convert/fc_op.cc
Lines changed: 6 additions & 4 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/convert/mul_op.cc
Lines changed: 10 additions & 5 deletions b/‎paddle/fluid/inference/tensorrt/convert/mul_op.cc
Lines changed: 10 additions & 5 deletions
@@ -29,9 +29,11 @@ Currently supported `--model` argument include:
     You can choose to use GPU/CPU training. With GPU training, you can specify
     `--gpus <gpu_num>` to run multi GPU training.
 * Run distributed training with parameter servers:
+    * see [run_fluid_benchmark.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/benchmark/fluid/run_fluid_benchmark.sh) as an example.
     * start parameter servers:
         ```bash
         PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist  --device GPU --update_method pserver
+        sleep 15
         ```
     * start trainers:
         ```bash
 
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model resnet --device CPU --update_method pserver --iterations=10000 &
+
+sleep 15
+
+CUDA_VISIBLE_DEVICES=0,1 PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model resnet --device GPU --update_method pserver --iterations=10000 --gpus 2 &
+
+CUDA_VISIBLE_DEVICES=2,3 PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=1 python fluid_benchmark.py --model resnet --device GPU --update_method pserver --iterations=10000 --gpus 2 &
@@ -34,13 +34,7 @@ DEFINE_bool(
 namespace paddle {
 namespace framework {
 
-Scope::~Scope() {
-  DropKids();
-  for (auto& kv : vars_) {
-    VLOG(3) << "Destroy variable " << kv.first;
-    delete kv.second;
-  }
-}
+Scope::~Scope() { DropKids(); }
 
 Scope& Scope::NewScope() const {
   std::unique_lock<std::mutex> lock(mutex_);
@@ -49,10 +43,13 @@ Scope& Scope::NewScope() const {
 }
 
 Variable* Scope::Var(const std::string& name) {
+  // acquire the lock when new var under this scope
+  std::unique_lock<std::mutex> lock(mutex_);
   auto* v = FindVarLocally(name);
   if (v != nullptr) return v;
+
   v = new Variable();
-  vars_[name] = v;
+  vars_[name].reset(v);
   VLOG(3) << "Create variable " << name;
   v->name_ = &(vars_.find(name)->first);
   return v;
@@ -67,22 +64,29 @@ Variable* Scope::Var(std::string* name) {
 }
 
 Variable* Scope::FindVar(const std::string& name) const {
+  // acquire the lock when find var
+  std::unique_lock<std::mutex> lock(mutex_);
+  return FindVarInternal(name);
+}
+
+Variable* Scope::FindVarInternal(const std::string& name) const {
   auto var = FindVarLocally(name);
   if (var != nullptr) {
     return var;
   }
-  return (parent_ == nullptr) ? nullptr : parent_->FindVar(name);
+  return (parent_ == nullptr) ? nullptr : parent_->FindVarInternal(name);
 }
 
 const Scope* Scope::FindScope(const Variable* var) const {
   for (auto& kv : vars_) {
-    if (kv.second == var) {
+    if (kv.second.get() == var) {
       return this;
     }
   }
   return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
 }
 void Scope::DropKids() {
+  std::unique_lock<std::mutex> lock(mutex_);
   for (Scope* s : kids_) delete s;
   kids_.clear();
 }
@@ -110,10 +114,10 @@ void Scope::DeleteScope(Scope* scope) const {
 }
 
 void Scope::EraseVars(const std::vector<std::string>& var_names) {
+  std::unique_lock<std::mutex> lock(mutex_);
   std::set<std::string> var_set(var_names.begin(), var_names.end());
   for (auto it = vars_.begin(); it != vars_.end();) {
     if (var_set.find(it->first) != var_set.end()) {
-      delete it->second;
       it = vars_.erase(it);
     } else {
       ++it;
@@ -129,7 +133,7 @@ void Scope::Rename(const std::string& origin_name,
   auto new_it = vars_.find(new_name);
   PADDLE_ENFORCE(new_it == vars_.end(),
                  "The variable with name %s is already in the scope", new_name);
-  vars_[new_name] = origin_it->second;
+  vars_[new_name].reset(origin_it->second.release());
   vars_.erase(origin_it);
 }
 
@@ -141,7 +145,7 @@ std::string Scope::Rename(const std::string& origin_name) const {
 
 Variable* Scope::FindVarLocally(const std::string& name) const {
   auto it = vars_.find(name);
-  if (it != vars_.end()) return it->second;
+  if (it != vars_.end()) return it->second.get();
   return nullptr;
 }
 
 
@@ -47,15 +47,18 @@ class Scope {
   Scope& NewScope() const;
 
   /// Create a variable with given name if it doesn't exist.
+  /// Caller doesn't own the returned Variable.
   Variable* Var(const std::string& name);
 
   /// Create a variable with a scope-unique name.
+  /// Caller doesn't own the returned Variable.
   Variable* Var(std::string* name = nullptr);
 
   void EraseVars(const std::vector<std::string>& var_names);
 
   /// Find a variable in the scope or any of its ancestors.  Returns
   /// nullptr if cannot find.
+  /// Caller doesn't own the returned Variable.
   Variable* FindVar(const std::string& name) const;
 
   const Scope* parent() const { return parent_; }
@@ -78,13 +81,21 @@ class Scope {
   // Rename variable to a new name and return the new name
   std::string Rename(const std::string& origin_name) const;
 
-  Variable* FindVarLocally(const std::string& name) const;
-
  private:
   // Call Scope::NewScope for a sub-scope.
   explicit Scope(Scope const* parent) : parent_(parent) {}
 
-  mutable std::unordered_map<std::string, Variable*> vars_;
+  // Called by FindVar recursively.
+  // Caller doesn't own the returned Variable.
+  Variable* FindVarInternal(const std::string& name) const;
+
+  // Called by FindVarInternal and Var.
+  // Caller doesn't own the returned Variable.
+  Variable* FindVarLocally(const std::string& name) const;
+
+  mutable std::unordered_map<std::string, std::unique_ptr<Variable>> vars_;
+
+  // Scope in `kids_` are owned by this class.
   mutable std::list<Scope*> kids_;
   Scope const* parent_{nullptr};
 
 
@@ -18,6 +18,8 @@ limitations under the License. */
 #include <unordered_map>
 #include <vector>
 
+#include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/framework/variable.h"
 #include "paddle/fluid/platform/enforce.h"
 
 namespace paddle {
@@ -107,6 +109,13 @@ class OrderedRegistry {
   std::vector<std::unique_ptr<T>> data_;
 };
 
+template <typename T>
+T &GetFromScope(const framework::Scope &scope, const std::string &name) {
+  framework::Variable *var = scope.FindVar(name);
+  PADDLE_ENFORCE(var != nullptr);
+  return *var->GetMutable<T>();
+}
+
 }  // namespace analysis
 }  // namespace inference
 }  // namespace paddle
 
@@ -1,10 +1,16 @@
 # Add TRT tests
-nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine)
 # This test is not stable
 # See https://paddleci.ngrok.io/viewLog.html?tab=buildLog&buildTypeId=Paddle_PrCi2&buildId=36834&_focus=8828 
 #nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc
 #    DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine
 #    SERIAL)
+nv_library(tensorrt_converter
+  SRCS mul_op.cc conv2d_op.cc fc_op.cc
+  DEPS tensorrt_engine mul_op)
+
+nv_test(test_op_converter SRCS test_op_converter.cc DEPS
+  ${FLUID_CORE_MODULES} tensorrt_engine tensorrt_converter)
+
 nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
 nv_test(test_trt_mul_op SRCS test_mul_op.cc mul_op.cc
         DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL)
 
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
 
 namespace paddle {
@@ -36,8 +37,8 @@ class ReluOpConverter : public OpConverter {
   }
 };
 
-REGISTER_TRT_OP_CONVERTER(relu, ReluOpConverter);
-
 }  // namespace tensorrt
 }  // namespace inference
 }  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(relu, ReluOpConverter);
@@ -22,14 +22,14 @@ class Conv2dOpConverter : public OpConverter {
  public:
   Conv2dOpConverter() {}
   void operator()(const framework::proto::OpDesc& op,
-                  const framework::Scope& scope) override {
+                  const framework::Scope& scope, bool test_mode) override {
     LOG(INFO)
         << "convert a fluid conv2d op to tensorrt conv layer without bias";
   }
 };
 
-REGISTER_TRT_OP_CONVERTER(conv2d, Conv2dOpConverter);
-
 }  // namespace tensorrt
 }  // namespace inference
 }  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(conv2d, Conv2dOpConverter);
@@ -56,7 +56,7 @@ void ReorderCKtoKC(TensorRTEngine::Weight& iweights,
 class FcOpConverter : public OpConverter {
  public:
   void operator()(const framework::proto::OpDesc& op,
-                  const framework::Scope& scope) override {
+                  const framework::Scope& scope, bool test_mode) override {
     VLOG(4) << "convert a fluid fc op to tensorrt fc layer without bias";
 
     framework::OpDesc op_desc(op, nullptr);
@@ -106,14 +106,16 @@ class FcOpConverter : public OpConverter {
                                        n_output, weight.get(), bias.get());
 
     auto output_name = op_desc.Output("Out").front();
-    engine_->DeclareOutput(layer, 0, output_name);
+    engine_->SetITensor(output_name, layer->getOutput(0));
+    if (test_mode) {
+      engine_->DeclareOutput(output_name);
+    }
   }
 };
 
-REGISTER_TRT_OP_CONVERTER(fc, FcOpConverter);
-
 }  // namespace tensorrt
 }  // namespace inference
 }  // namespace paddle
 
+REGISTER_TRT_OP_CONVERTER(fc, FcOpConverter);
 USE_OP(mul);
@@ -23,9 +23,8 @@ namespace tensorrt {
  */
 class MulOpConverter : public OpConverter {
  public:
-  MulOpConverter() {}
   void operator()(const framework::proto::OpDesc& op,
-                  const framework::Scope& scope) override {
+                  const framework::Scope& scope, bool test_mode) override {
     VLOG(4) << "convert a fluid mul op to tensorrt mul layer without bias";
 
     framework::OpDesc op_desc(op, nullptr);
@@ -37,12 +36,18 @@ class MulOpConverter : public OpConverter {
         engine_, MatrixMultiply, *const_cast<nvinfer1::ITensor*>(input1), false,
         *const_cast<nvinfer1::ITensor*>(input2), false);
 
-    engine_->DeclareOutput(layer, 0, op_desc.Output("Out")[0]);
+    auto output_name = op_desc.Output("Out")[0];
+    engine_->SetITensor(output_name, layer->getOutput(0));
+    if (test_mode) {  // the test framework can not determine which is the
+                      // output, so place the declaration inside.
+      engine_->DeclareOutput(output_name);
+    }
   }
 };
 
-REGISTER_TRT_OP_CONVERTER(mul, MulOpConverter);
-
 }  // namespace tensorrt
 }  // namespace inference
 }  // namespace paddle
+
+USE_OP(mul);
+REGISTER_TRT_OP_CONVERTER(mul, MulOpConverter);