PaddlePaddle
diff --git a/‎doc/design/parallel_do.md
Lines changed: 2 additions & 2 deletions b/‎doc/design/parallel_do.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddle/fluid/framework/block_desc.cc
Lines changed: 19 additions & 0 deletions b/‎paddle/fluid/framework/block_desc.cc
Lines changed: 19 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/block_desc.h
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/framework/block_desc.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/channel.h
Lines changed: 73 additions & 0 deletions b/‎paddle/fluid/framework/channel.h
Lines changed: 73 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/executor.cc
Lines changed: 4 additions & 1 deletion b/‎paddle/fluid/framework/executor.cc
Lines changed: 4 additions & 1 deletion
diff --git a/‎paddle/fluid/framework/var_desc.cc
Lines changed: 52 additions & 2 deletions b/‎paddle/fluid/framework/var_desc.cc
Lines changed: 52 additions & 2 deletions
diff --git a/‎paddle/fluid/framework/var_desc.h
Lines changed: 4 additions & 0 deletions b/‎paddle/fluid/framework/var_desc.h
Lines changed: 4 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/var_type.h
Lines changed: 6 additions & 0 deletions b/‎paddle/fluid/framework/var_type.h
Lines changed: 6 additions & 0 deletions
diff --git a/‎paddle/fluid/operators/elementwise_add_op.h
Lines changed: 5 additions & 57 deletions b/‎paddle/fluid/operators/elementwise_add_op.h
Lines changed: 5 additions & 57 deletions
@@ -24,7 +24,7 @@ A vanilla implementation of parallel_do can be shown as the following (`|` means
 ```
 In the forward pass
   |      Split input onto different devices
-  |      Copy parameter to onto different devices
+  |      Copy parameter onto different devices
   ||||   Compute forward pass in parallel
   |      Merge output from different devices
 
@@ -87,7 +87,7 @@ block2 {
 }
 ```
 
-## Proformance Imporvement
+## Performance Imporvement
 
 There are serial places we can make this parallel_do faster.
 
 
@@ -44,6 +44,25 @@ bool BlockDesc::HasVar(const std::string &name) const {
   return vars_.find(name) != vars_.end();
 }
 
+VarDesc *BlockDesc::RenameVar(const std::string &old_name,
+                              const std::string &new_name) {
+  if (!this->HasVar(old_name)) {
+    return nullptr;
+  }
+  need_update_ = true;
+  auto *var = this->Var(old_name);
+  VarDesc *new_var = new VarDesc(*(var->Proto()));
+  new_var->SetName(new_name);
+  vars_[new_name].reset(new_var);
+  // rename inputs and outputs
+  for (const auto &op : ops_) {
+    auto *it = op.get();
+    it->Rename(old_name, new_name);
+  }
+  vars_.erase(old_name);
+  return new_var;
+}
+
 VarDesc *BlockDesc::FindVarRecursive(const std::string &name) const {
   if (name == kEmptyVarName) return nullptr;
 
 
@@ -57,6 +57,8 @@ class BlockDesc {
 
   bool HasVar(const std::string &var_name) const;
 
+  VarDesc *RenameVar(const std::string &old_name, const std::string &new_name);
+
   VarDesc *FindVarRecursive(const std::string &name_bytes) const;
 
   VarDesc &FindRecursiveOrCreateVar(const std::string &name_bytes);
 
@@ -15,6 +15,8 @@ limitations under the License. */
 #pragma once
 
 #include <stddef.h>  // for size_t
+#include <typeindex>
+#include "paddle/fluid/platform/enforce.h"
 
 namespace paddle {
 namespace framework {
@@ -51,6 +53,77 @@ void CloseChannel(Channel<T>* ch) {
   ch->Close();
 }
 
+/*
+ * The ChannelHolder class serves two main purposes:
+ * 1. It acts as a unified wrapper for the different kinds of
+ *    channels, i.e. Buffered and Unbuffered channels. This is
+ *    similar to the ReaderHolder class.
+ * 2. It also helps us in TypeHiding. This is similar to the
+ *    PlaceHolder implementations in variable.h and tensor.h.
+ */
+class ChannelHolder {
+ public:
+  template <typename T>
+  void Reset(size_t buffer_size) {
+    holder_.reset(new PlaceholderImpl<T>(buffer_size));
+  }
+
+  template <typename T>
+  bool Send(T* data) {
+    if (!IsInitialized()) return false;
+    PADDLE_ENFORCE_EQ(holder_->Type(), std::type_index(typeid(T)));
+    // Static cast should be safe because we have ensured that types are same
+    Channel<T>* channel = static_cast<Channel<T>*>(holder_->Ptr());
+    return channel != nullptr ? channel->Send(data) : false;
+  }
+
+  template <typename T>
+  bool Receive(T* data) {
+    if (!IsInitialized()) return false;
+    PADDLE_ENFORCE_EQ(holder_->Type(), std::type_index(typeid(T)));
+    Channel<T>* channel = static_cast<Channel<T>*>(holder_->Ptr());
+    return channel != nullptr ? channel->Receive(data) : false;
+  }
+
+  void close() {
+    if (IsInitialized()) holder_->Close();
+  }
+
+  inline bool IsInitialized() const { return holder_ != nullptr; }
+
+ private:
+  /**
+   * @note    Placeholder hides type T, so it doesn't appear as a template
+   *          parameter of ChannelHolder.
+   */
+  struct Placeholder {
+    virtual ~Placeholder() {}
+    virtual const std::type_index Type() const = 0;
+    virtual void* Ptr() const = 0;
+    virtual void Close() const = 0;
+    std::type_info type_;
+  };
+
+  template <typename T>
+  struct PlaceholderImpl : public Placeholder {
+    PlaceholderImpl(size_t buffer_size) : type_(std::type_index(typeid(T))) {
+      channel_.reset(MakeChannel<T>(buffer_size));
+    }
+
+    virtual const std::type_index Type() const { return type_; }
+    virtual void* Ptr() const { return static_cast<void*>(channel_.get()); }
+    virtual void Close() {
+      if (channel_) channel_->Close();
+    }
+
+    std::unique_ptr<Channel<T>*> channel_;
+    const std::type_index type_;
+  };
+
+  // Pointer to a PlaceholderImpl object
+  std::unique_ptr<Placeholder> holder_;
+};
+
 }  // namespace framework
 }  // namespace paddle
 
 
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <set>
 
 #include "gflags/gflags.h"
+#include "paddle/fluid/framework/channel.h"
 #include "paddle/fluid/framework/feed_fetch_method.h"
 #include "paddle/fluid/framework/feed_fetch_type.h"
 #include "paddle/fluid/framework/lod_rank_table.h"
@@ -55,13 +56,15 @@ static void CreateTensor(Variable* var, proto::VarType::Type var_type) {
     var->GetMutable<platform::PlaceList>();
   } else if (var_type == proto::VarType::READER) {
     var->GetMutable<ReaderHolder>();
+  } else if (var_type == proto::VarType::CHANNEL) {
+    var->GetMutable<ChannelHolder>();
   } else if (var_type == proto::VarType::NCCL_COM) {
     // GetMutable will be called in ncclInit
   } else {
     PADDLE_THROW(
         "Variable type %d is not in "
         "[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
-        "LOD_RANK_TABLE, PLACE_LIST, READER, NCCL_COM]",
+        "LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, NCCL_COM]",
         var_type);
   }
 }
 
@@ -88,7 +88,13 @@ std::vector<std::vector<int64_t>> VarDesc::GetShapes() const {
 }
 
 void VarDesc::SetDataType(proto::VarType::Type data_type) {
-  mutable_tensor_desc()->set_data_type(data_type);
+  switch (desc_.type().type()) {
+    case proto::VarType::CHANNEL:
+      mutable_channel_desc()->set_data_type(data_type);
+      break;
+    default:
+      mutable_tensor_desc()->set_data_type(data_type);
+  }
 }
 
 void VarDesc::SetDataTypes(
@@ -109,7 +115,13 @@ void VarDesc::SetDataTypes(
 }
 
 proto::VarType::Type VarDesc::GetDataType() const {
-  return tensor_desc().data_type();
+  switch (desc_.type().type()) {
+    case proto::VarType::CHANNEL:
+      return channel_desc().data_type();
+      break;
+    default:
+      return tensor_desc().data_type();
+  }
 }
 
 std::vector<proto::VarType::Type> VarDesc::GetDataTypes() const {
@@ -122,6 +134,17 @@ std::vector<proto::VarType::Type> VarDesc::GetDataTypes() const {
   return res;
 }
 
+void VarDesc::SetCapacity(int64_t capacity) {
+  switch (desc_.type().type()) {
+    case proto::VarType::CHANNEL:
+      desc_.mutable_type()->mutable_channel()->set_capacity(capacity);
+      break;
+    default:
+      PADDLE_THROW("Setting 'capacity' is not supported by the type of var %s.",
+                   this->Name());
+  }
+}
+
 void VarDesc::SetLoDLevel(int32_t lod_level) {
   switch (desc_.type().type()) {
     case proto::VarType::LOD_TENSOR:
@@ -191,6 +214,19 @@ std::vector<int32_t> VarDesc::GetLoDLevels() const {
   }
 }
 
+const proto::VarType::ChannelDesc &VarDesc::channel_desc() const {
+  PADDLE_ENFORCE(desc_.has_type(), "The var's type hasn't been set.");
+  PADDLE_ENFORCE(desc_.type().has_type(), "The var type hasn't been set.");
+  switch (desc_.type().type()) {
+    case proto::VarType::CHANNEL:
+      return desc_.type().channel();
+    default:
+      PADDLE_THROW(
+          "Getting 'channel_desc' is not supported by the type of var %s.",
+          this->Name());
+  }
+}
+
 const proto::VarType::TensorDesc &VarDesc::tensor_desc() const {
   PADDLE_ENFORCE(desc_.has_type(), "The var's type hasn't been set.");
   PADDLE_ENFORCE(desc_.type().has_type(), "The var type hasn't been set.");
@@ -226,6 +262,20 @@ std::vector<proto::VarType::TensorDesc> VarDesc::tensor_descs() const {
   }
 }
 
+proto::VarType::ChannelDesc *VarDesc::mutable_channel_desc() {
+  PADDLE_ENFORCE(desc_.has_type(), "The var type hasn't been set.");
+  PADDLE_ENFORCE(desc_.type().has_type(), "The var type hasn't been set.");
+  switch (desc_.type().type()) {
+    case proto::VarType::CHANNEL:
+      return desc_.mutable_type()->mutable_channel();
+    default:
+      PADDLE_THROW(
+          "Getting 'mutable_channel_desc' is not supported by the type of var "
+          "%s.",
+          this->Name());
+  }
+}
+
 proto::VarType::TensorDesc *VarDesc::mutable_tensor_desc() {
   PADDLE_ENFORCE(desc_.has_type(), "The var type hasn't been set.");
   PADDLE_ENFORCE(desc_.type().has_type(), "The var type hasn't been set.");
 
@@ -85,6 +85,8 @@ class VarDesc {
   void SetDataTypes(
       const std::vector<proto::VarType::Type> &multiple_data_type);
 
+  void SetCapacity(int64_t capacity);
+
   proto::VarType::Type GetDataType() const;
 
   std::vector<proto::VarType::Type> GetDataTypes() const;
@@ -106,8 +108,10 @@ class VarDesc {
   void SetPersistable(bool persistable) { desc_.set_persistable(persistable); }
 
  private:
+  const proto::VarType::ChannelDesc &channel_desc() const;
   const proto::VarType::TensorDesc &tensor_desc() const;
   std::vector<proto::VarType::TensorDesc> tensor_descs() const;
+  proto::VarType::ChannelDesc *mutable_channel_desc();
   proto::VarType::TensorDesc *mutable_tensor_desc();
   std::vector<proto::VarType::TensorDesc *> mutable_tensor_descs();
 
 
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
+#include "paddle/fluid/framework/channel.h"
 #include "paddle/fluid/framework/framework.pb.h"
 #include "paddle/fluid/framework/lod_rank_table.h"
 #include "paddle/fluid/framework/lod_tensor.h"
@@ -34,6 +35,8 @@ inline proto::VarType::Type ToVarType(std::type_index type) {
     return proto::VarType_Type_SELECTED_ROWS;
   } else if (type.hash_code() == typeid(ReaderHolder).hash_code()) {
     return proto::VarType_Type_READER;
+  } else if (type.hash_code() == typeid(ChannelHolder).hash_code()) {
+    return proto::VarType_Type_CHANNEL;
   } else {
     PADDLE_THROW("ToVarType:Unsupported type %s", type.name());
   }
@@ -57,6 +60,9 @@ inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
     case proto::VarType_Type_READER:
       visitor(var.Get<ReaderHolder>());
       return;
+    case proto::VarType_Type_CHANNEL:
+      visitor(var.Get<ChannelHolder>());
+      return;
     default:
       PADDLE_THROW("Not supported visit type, %d", ToVarType(var.Type()));
   }
 
@@ -41,59 +41,8 @@ class ElementwiseAddKernel : public framework::OpKernel<T> {
 };
 
 template <typename T>
-struct ElementwiseAddGradFunctor {
-  template <typename Device, typename X, typename Y, typename Z, typename dX,
-            typename dY, typename dZ>
-  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) {
-    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
-    if (dx) {
-      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
-      dx_e.device(d) = dz_e;
-    }
-    if (dy) {
-      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
-      dy_e.device(d) = dz_e;
-    }
-  }
-};
-
-template <typename T>
-struct ElementwiseAddBroadCastGradFunctor {
-  template <typename Device, typename X, typename Y, typename Z, typename dX,
-            typename dY, typename dZ, typename Pre, typename N>
-  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n) {
-    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
-    if (dx) {
-      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
-      dx_e.device(d) = dz_e;
-    }
-
-    if (dy) {
-      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
-      dy_e.device(d) = dz_e.reshape(Eigen::DSizes<int, 2>(pre, n))
-                           .sum(Eigen::array<int, 1>{{0}});
-    }
-  }
-};
-
-template <typename T>
-struct ElementwiseAddBroadCast2GradFunctor {
-  template <typename Device, typename X, typename Y, typename Z, typename dX,
-            typename dY, typename dZ, typename Pre, typename N, typename Post>
-  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n,
-                  Post post) {
-    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
-    if (dx) {
-      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
-      dx_e.device(d) = dz_e;
-    }
-
-    if (dy) {
-      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
-      dy_e.device(d) = dz_e.reshape(Eigen::DSizes<int, 3>(pre, n, post))
-                           .sum(Eigen::array<int, 2>{{0, 2}});
-    }
-  }
+struct IdentityGrad {
+  HOSTDEVICE T operator()(T x, T y, T out, T dout) const { return dout; }
 };
 
 template <typename DeviceContext, typename T>
@@ -109,10 +58,9 @@ class ElementwiseAddGradKernel : public framework::OpKernel<T> {
     auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
     auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
     int axis = ctx.Attr<int>("axis");
-    ElementwiseGradCompute<DeviceContext, T, ElementwiseAddGradFunctor<T>,
-                           ElementwiseAddBroadCastGradFunctor<T>,
-                           ElementwiseAddBroadCast2GradFunctor<T>>(
-        ctx, x, y, out, dout, axis, dx, dy);
+    ElemwiseGradCompute<DeviceContext, T, IdentityGrad<T>, IdentityGrad<T>>(
+        ctx, *x, *y, *out, *dout, axis, dx, dy, IdentityGrad<T>(),
+        IdentityGrad<T>());
   }
 };