PaddlePaddle
diff --git a/‎Dockerfile
Lines changed: 1 addition & 1 deletion b/‎Dockerfile
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/design/block.md
Lines changed: 1 addition & 1 deletion b/‎doc/design/block.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/framework/CMakeLists.txt
Lines changed: 3 additions & 3 deletions b/‎paddle/framework/CMakeLists.txt
Lines changed: 3 additions & 3 deletions
diff --git a/‎paddle/framework/backward.cc
Lines changed: 14 additions & 2 deletions b/‎paddle/framework/backward.cc
Lines changed: 14 additions & 2 deletions
diff --git a/‎paddle/framework/block_desc.cc
Lines changed: 14 additions & 1 deletion b/‎paddle/framework/block_desc.cc
Lines changed: 14 additions & 1 deletion
diff --git a/‎paddle/framework/block_desc.h
Lines changed: 5 additions & 0 deletions b/‎paddle/framework/block_desc.h
Lines changed: 5 additions & 0 deletions
diff --git a/‎paddle/framework/framework.proto
Lines changed: 1 addition & 0 deletions b/‎paddle/framework/framework.proto
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/framework/lod_tensor.cc
Lines changed: 39 additions & 30 deletions b/‎paddle/framework/lod_tensor.cc
Lines changed: 39 additions & 30 deletions
diff --git a/‎paddle/framework/lod_tensor.h
Lines changed: 19 additions & 6 deletions b/‎paddle/framework/lod_tensor.h
Lines changed: 19 additions & 6 deletions
diff --git a/‎paddle/framework/lod_tensor_test.cc
Lines changed: 15 additions & 16 deletions b/‎paddle/framework/lod_tensor_test.cc
Lines changed: 15 additions & 16 deletions
@@ -22,7 +22,7 @@ COPY ./paddle/scripts/docker/root/ /root/
 
 RUN apt-get update && \
     apt-get install -y \
-    git python-pip python-dev openssh-server bison  \
+    git python-pip python-dev openssh-server bison libnccl-dev \
     wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
     curl sed grep graphviz libjpeg-dev zlib1g-dev  \
     python-matplotlib gcc-4.8 g++-4.8 \
 
@@ -189,7 +189,7 @@ OpDesc {
   inputs = {0} // the index of x in vars of BlockDesc above
   outputs = {5, 3} // indices of act and hidden_out in vars of BlockDesc above
   attrs {
-    "memories" : {1} // the index of h
+    "states" : {1} // the index of h
     "step_net" : <above step net>
   }
 };
 
@@ -19,15 +19,15 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope)
 proto_library(framework_proto SRCS framework.proto)
 
 cc_library(attribute SRCS attribute.cc DEPS framework_proto)
-cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info)
 cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc)
 cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
 cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
 cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
-cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope proto_desc glog)
+cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog)
 cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
+cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info operator)
 
-cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog)
+cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
 cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
 
 py_proto_compile(framework_py_proto SRCS framework.proto)
 
@@ -21,6 +21,7 @@
 
 #include "paddle/framework/block_desc.h"
 #include "paddle/framework/op_registry.h"
+#include "paddle/operators/dynamic_recurrent_op.h"
 #include "paddle/operators/net_op.h"
 #include "paddle/operators/recurrent_op.h"
 
@@ -220,8 +221,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
     // process recurrent gradient op as a special operator.
     if (forwardOp.Type() == "recurrent") {
       // NOTE clean up cycle call somewhere (RNN's stepnet constains itself),
-      // or
-      // this will result in infinite loop.
+      // or this will result in infinite loop.
       const auto& rnnop =
           *static_cast<const operators::RecurrentOp*>(&forwardOp);
       auto rnn_grad_op =
@@ -231,6 +231,18 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
       // create stepnet's gradient op
       rnn_grad_op->set_stepnet(
           BackwardRecursive(stepnet_op, no_grad_names, grad_to_var, uniq_id));
+    } else if (forwardOp.Type() == "dynamic_recurrent") {
+      // NOTE clean up cycle call somewhere (RNN's stepnet constains itself),
+      // or this will result in infinite loop.
+      const auto& rnnop =
+          *static_cast<const operators::DynamicRecurrentOp*>(&forwardOp);
+      auto rnn_grad_op =
+          static_cast<operators::DynamicRecurrentGradientOp*>(grad_op.get());
+      const auto& stepnet_op =
+          *static_cast<const OperatorBase*>(&rnnop.rnn.GetStepUnit());
+      // create stepnet's gradient op
+      rnn_grad_op->rnn.SetStepUnit(
+          BackwardRecursive(stepnet_op, no_grad_names, grad_to_var, uniq_id));
     }
 
     if (net->ops_.empty()) {  // Current no aux op is added to network
 
@@ -41,6 +41,19 @@ bool BlockDescBind::HasVar(const std::string &name) const {
   return vars_.find(name) != vars_.end();
 }
 
+VarDescBind *BlockDescBind::FindVarRecursive(const std::string &name) const {
+  auto it = vars_.find(name);
+  if (it == vars_.end()) {
+    return Parent() == kNoneBlockIndex ? nullptr
+                                       : ParentBlock()->FindVarRecursive(name);
+  }
+  return it->second.get();
+}
+
+bool BlockDescBind::HasVarRecursive(const std::string &name) const {
+  return FindVarRecursive(name) != nullptr;
+}
+
 std::vector<VarDescBind *> BlockDescBind::AllVars() const {
   std::vector<VarDescBind *> res;
   for (const auto &p : vars_) {
@@ -97,7 +110,7 @@ void BlockDescBind::Flush() {
 }
 
 BlockDescBind *BlockDescBind::ParentBlock() const {
-  if (this->desc_->parent_idx() == -1) {
+  if (this->desc_->parent_idx() == kNoneBlockIndex) {
     return nullptr;
   }
   return prog_->Block(static_cast<size_t>(this->desc_->parent_idx()));
 
@@ -21,6 +21,7 @@ limitations under the License. */
 #include <vector>
 
 #include "paddle/framework/op_desc.h"
+#include "paddle/framework/proto_desc.h"
 #include "paddle/framework/var_desc.h"
 #include "paddle/platform/macros.h"
 
@@ -56,6 +57,10 @@ class BlockDescBind {
 
   bool HasVar(const std::string &var_name) const;
 
+  VarDescBind *FindVarRecursive(const std::string &name_bytes) const;
+
+  bool HasVarRecursive(const std::string &var_name) const;
+
   std::set<std::string> LocalVarNames() const {
     std::set<std::string> var_names;
     for (auto &var : vars_) {
 
@@ -68,6 +68,7 @@ message OpProto {
 
     optional bool duplicable = 3 [ default = false ];
     optional bool intermediate = 4 [ default = false ];
+    optional bool dispensable = 5 [ default = false ];
   }
 
   // AttrProto describes the C++ type Attribute.
 
@@ -25,31 +25,50 @@ LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) {
   for (size_t i = level_begin; i < level_end; i++) {
     new_lod.emplace_back(in.at(i));
   }
+  // transform the lowest level to absolute offset.
+  LoD abs_offset_lod = ToAbsOffset(in);
+  new_lod.back() = abs_offset_lod[level_end - 1];
   return new_lod;
 }
 
 LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
                  size_t elem_end) {
-  // slice the lod.
-  LoD new_lod;
-  new_lod.reserve(in.size() - level);
-  auto start = in.at(level)[elem_begin];
-  auto end = in.at(level)[elem_end];
-
-  for (auto it = in.begin() + level; it != in.end(); it++) {
-    auto it_begin = std::find(it->begin(), it->end(), start);
-    auto it_end = std::find(it_begin, it->end(), end);
-    PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info");
-    PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info");
-    new_lod.emplace_back(it_begin, it_end + 1);
-    // reset offset if tensor is copyed and sliced.
-    std::transform(new_lod.back().begin(), new_lod.back().end(),
-                   new_lod.back().begin(),
-                   [start](int v) { return v - start; });
-    PADDLE_ENFORCE_EQ(new_lod.back().front(), 0, "error in slice LoD");
+  PADDLE_ENFORCE_LT(level, in.size());
+  PADDLE_ENFORCE_LT(elem_end, in[level].size());
+
+  LoD res;
+  res.resize(in.size() - level);
+  // copy the first level
+  res[0].assign(in[level].begin() + elem_begin,
+                in[level].begin() + elem_end + 1);
+  for (size_t lvl = 1; lvl < res.size(); lvl++) {
+    const auto& in_level = in[level + lvl];
+    const auto& above_level = res[lvl - 1];
+    auto& out_level = res[lvl];
+    out_level.assign(in_level.begin() + above_level.front(),
+                     in_level.begin() + above_level.back() + 1);
   }
-  PADDLE_ENFORCE_LE(new_lod.size(), in.size());
-  return new_lod;
+  for (size_t lvl = 0; lvl < res.size(); lvl++) {
+    // to make the first offset equals 0, all the elements minus the first
+    // element
+    size_t front = res[lvl].front();
+    for (auto& ele : res[lvl]) {
+      ele -= front;
+    }
+  }
+  return res;
+}
+
+LoD ToAbsOffset(const LoD& in) {
+  // the lowest level stores relative offsets
+  if (in.empty() || in.size() == 1) return in;
+  LoD result = in;
+  for (int level = result.size() - 2; level >= 0; level--) {
+    for (auto& ele : result[level]) {
+      ele = result[level + 1][ele];
+    }
+  }
+  return result;
 }
 
 bool operator==(const LoD& a, const LoD& b) {
@@ -75,17 +94,7 @@ bool operator==(const LoD& a, const LoD& b) {
 size_t LoDTensor::NumElements(size_t level, size_t idx) const {
   PADDLE_ENFORCE_LT(level, NumLevels());
   PADDLE_ENFORCE_LT(idx, NumElements(level));
-  // the last level of LoD, just return number of records in Tensor
-  if (level == NumLevels() - 1) {
-    return lod_[level][idx + 1] - lod_[level][idx];
-  }
-  // high level of LoD, and there is another lower level, return number of
-  // lower-level elements
-  auto tmp = SliceInLevel(lod_, level, idx, idx + 1);
-  PADDLE_ENFORCE_GE(tmp.size(), 2);
-  // there is a 0 as a placeholder stored in LoD, so the number of elements
-  // equals lod.size() - 1
-  return tmp[1].size() - 1;
+  return lod_[level][idx + 1] - lod_[level][idx];
 }
 
 void LoDTensor::ShrinkLevels(size_t level_begin, size_t level_end) {
 
@@ -39,23 +39,36 @@ using Vector = thrust::host_vector<
 #endif
 
 /*
- * 3-level LoD stores
+ * LoD is short for Level of Details.
  *
- * 0 10 20
- * 0 5 10 15 20
- * 0 2 5 7 10 12 15 20
- *
- * - in a level, each element indicates offset in the underlying Tensor
+ * - in a level, each element indicates relative offset of the lower level
  * - the first element should be 0 and that indicates that this sequence start
  * from 0
  * - each sequence's begin and end(no-inclusive) is level[id, id+1]
+ *
+ * For example:
+ *    3-level LoD stores
+ *
+ *    0 2 3
+ *    0 2 4 7
+ *    0 2 5 7 10 12 15 20
  */
 using LoD = std::vector<Vector<size_t>>;
 
+/*
+ * Slice levels from a LoD.
+ * NOTE the lowest level should always be the absolute offsets of the underlying
+ * tensor instances. So if higher layers are sliced without the lowest level,
+ * the lower level of the sliced LoD will be transformed to the absolute offset.
+ */
 LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end);
 
 LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
                  size_t elem_end);
+/*
+ * Transform an LoD from relative offsets to absolute offsets.
+ */
+LoD ToAbsOffset(const LoD& in);
 
 bool operator==(const LoD& a, const LoD& b);
 
 
@@ -30,8 +30,8 @@ class LoDTensorTester : public ::testing::Test {
     // 0 5 10 15 20
     // 0 2 5 7 10 12 15 20
     LoD lod;
-    lod.push_back(std::vector<size_t>{0, 10, 20});
-    lod.push_back(std::vector<size_t>{0, 5, 10, 15, 20});
+    lod.push_back(std::vector<size_t>{0, 2, 3});
+    lod.push_back(std::vector<size_t>{0, 2, 5, 8});
     lod.push_back(std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20});
 
     ASSERT_EQ(lod.size(), 3UL);
@@ -52,14 +52,14 @@ TEST_F(LoDTensorTester, NumLevels) { ASSERT_EQ(lod_tensor_.NumLevels(), 3UL); }
 
 TEST_F(LoDTensorTester, NumElements) {
   ASSERT_EQ(lod_tensor_.NumElements(0), 2UL);
-  ASSERT_EQ(lod_tensor_.NumElements(1), 4UL);
+  ASSERT_EQ(lod_tensor_.NumElements(1), 3UL);
   ASSERT_EQ(lod_tensor_.NumElements(2), 8UL);
 }
 
 TEST_F(LoDTensorTester, NumElements2) {
   ASSERT_EQ(lod_tensor_.NumElements(0, 0), 2UL);
-  ASSERT_EQ(lod_tensor_.NumElements(0, 1), 2UL);
-  ASSERT_EQ(lod_tensor_.NumElements(1, 1), 2UL);
+  ASSERT_EQ(lod_tensor_.NumElements(0, 1), 1UL);
+  ASSERT_EQ(lod_tensor_.NumElements(1, 1), 3UL);
 }
 
 TEST_F(LoDTensorTester, ShrinkLevels) {
@@ -68,37 +68,36 @@ TEST_F(LoDTensorTester, ShrinkLevels) {
     LoDTensor new_lod_tensor = lod_tensor_;
     new_lod_tensor.ShrinkLevels(level, level + 1);
     ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL);
-    ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level));
     ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
   }
   // shrink 2 level
   for (size_t level = 0; level < 2UL; ++level) {
     LoDTensor new_lod_tensor = lod_tensor_;
     new_lod_tensor.ShrinkLevels(level, level + 2);
+    // the lowest level's last element should be the tensor's batch_size.
+    ASSERT_EQ(new_lod_tensor.lod().back().back(),
+              lod_tensor_.lod().back().back());
     ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
-    ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level));
-    ASSERT_EQ(new_lod_tensor.NumElements(1),
-              lod_tensor_.NumElements(level + 1));
     ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
   }
 }
 
 TEST_F(LoDTensorTester, ShrinkInLevel) {
   size_t level = 0;
   LoDTensor new_lod_tensor = lod_tensor_;
-  new_lod_tensor.ShrinkInLevel(level, 0, 2);
+  new_lod_tensor.ShrinkInLevel(level, 0, 1);
   EXPECT_EQ(new_lod_tensor.NumLevels(), 3UL);
-  EXPECT_EQ(new_lod_tensor.NumElements(0), 2UL);
-  EXPECT_EQ(new_lod_tensor.NumElements(1), 4UL);
-  EXPECT_EQ(new_lod_tensor.NumElements(2), 8UL);
+  EXPECT_EQ(new_lod_tensor.NumElements(0), 1UL);
+  EXPECT_EQ(new_lod_tensor.NumElements(1), 2UL);
+  EXPECT_EQ(new_lod_tensor.NumElements(2), 5UL);
   ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
 
   level = 1;
   new_lod_tensor = lod_tensor_;
-  new_lod_tensor.ShrinkInLevel(level, 0, 2);
+  new_lod_tensor.ShrinkInLevel(level, 1, 2);
   ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
-  ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL);
-  ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL);
+  ASSERT_EQ(new_lod_tensor.NumElements(0), 1UL);
+  ASSERT_EQ(new_lod_tensor.NumElements(1), 3UL);
   ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
 }
Original file line number	Diff line number	Diff line change
`@@ -189,7 +189,7 @@ OpDesc {`
`189`	`189`	`inputs = {0} // the index of x in vars of BlockDesc above`
`190`	`190`	`outputs = {5, 3} // indices of act and hidden_out in vars of BlockDesc above`
`191`	`191`	`attrs {`
`192`		`- "memories" : {1} // the index of h`
	`192`	`+ "states" : {1} // the index of h`
`193`	`193`	`"step_net" : <above step net>`
`194`	`194`	`}`
`195`	`195`	`};`
Original file line number	Diff line number	Diff line change
`@@ -68,6 +68,7 @@ message OpProto {`
`68`	`68`
`69`	`69`	`optional bool duplicable = 3 [ default = false ];`
`70`	`70`	`optional bool intermediate = 4 [ default = false ];`
	`71`	`+ optional bool dispensable = 5 [ default = false ];`
`71`	`72`	`}`
`72`	`73`
`73`	`74`	`// AttrProto describes the C++ type Attribute.`