PaddlePaddle
diff --git a/‎CMakeLists.txt
Lines changed: 7 additions & 0 deletions b/‎CMakeLists.txt
Lines changed: 7 additions & 0 deletions
diff --git a/‎cmake/external/xxhash.cmake
Lines changed: 46 additions & 0 deletions b/‎cmake/external/xxhash.cmake
Lines changed: 46 additions & 0 deletions
diff --git a/‎cmake/inference_lib.cmake
Lines changed: 12 additions & 2 deletions b/‎cmake/inference_lib.cmake
Lines changed: 12 additions & 2 deletions
diff --git a/‎paddle/fluid/API.spec
Lines changed: 1 addition & 0 deletions b/‎paddle/fluid/API.spec
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/fluid/framework/ir/graph.cc
Lines changed: 0 additions & 68 deletions b/‎paddle/fluid/framework/ir/graph.cc
Lines changed: 0 additions & 68 deletions
diff --git a/‎paddle/fluid/framework/ir/node.h
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/framework/ir/node.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/lod_tensor_array.h
Lines changed: 77 additions & 1 deletion b/‎paddle/fluid/framework/lod_tensor_array.h
Lines changed: 77 additions & 1 deletion
diff --git a/‎paddle/fluid/framework/op_desc.h
Lines changed: 0 additions & 4 deletions b/‎paddle/fluid/framework/op_desc.h
Lines changed: 0 additions & 4 deletions
diff --git a/‎paddle/fluid/framework/scope.h
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/framework/scope.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/CMakeLists.txt
Lines changed: 2 additions & 2 deletions b/‎paddle/fluid/inference/CMakeLists.txt
Lines changed: 2 additions & 2 deletions
@@ -69,6 +69,7 @@ option(WITH_ANAKIN      "Compile with Anakin library"                   OFF)
 option(WITH_GRPC     "Use grpc as the default rpc framework"            ${WITH_DISTRIBUTE})
 option(WITH_BRPC_RDMA     "Use brpc rdma as the rpc protocal"           OFF)
 option(WITH_INFERENCE    "Compile fluid inference library"              ON)
+option(ON_INFER         "Turn on inference optimization."               OFF)
 option(WITH_INFERENCE_API_TEST   "Test fluid inference high-level api interface"  OFF)
 option(WITH_SYSTEM_BLAS   "Use system blas library"           OFF)
 option(PY_VERSION       "Compile PaddlePaddle with python3 support"     ${PY_VERSION})
@@ -179,6 +180,7 @@ include(external/eigen)     # download eigen3
 include(external/pybind11)  # download pybind11
 include(external/cares)
 include(external/cub)
+include(external/xxhash)    # download xxhash
 
 if (NOT WIN32)
 # there is no official support of snappystream, warpctc, nccl, cupti in windows
@@ -301,3 +303,8 @@ if(WITH_DOC)
     find_python_module(recommonmark REQUIRED)
     add_subdirectory(doc)
 endif()
+
+if (ON_INFER)
+    message(WARNING "On inference mode, will take place some specific optimization.")
+    add_definitions(-DPADDLE_ON_INFERENCE)
+endif()
@@ -0,0 +1,46 @@
+INCLUDE(ExternalProject)
+
+set(XXHASH_SOURCE_DIR ${THIRD_PARTY_PATH}/xxhash)
+set(XXHASH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/xxhash)
+set(XXHASH_INCLUDE_DIR "${XXHASH_INSTALL_DIR}/include")
+
+IF(WITH_STATIC_LIB)
+  SET(BUILD_CMD make lib)
+ELSE()
+  SET(BUILD_CMD sed -i "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" ${XXHASH_SOURCE_DIR}/src/extern_xxhash/Makefile && make lib)
+ENDIF()
+
+ExternalProject_Add(
+    extern_xxhash
+    ${EXTERNAL_PROJECT_LOG_ARGS}
+    GIT_REPOSITORY  "https://github.com/Cyan4973/xxHash"
+    GIT_TAG         "v0.6.5"
+    PREFIX          ${XXHASH_SOURCE_DIR}
+    DOWNLOAD_NAME   "xxhash"
+    UPDATE_COMMAND  ""
+    CONFIGURE_COMMAND ""
+    BUILD_IN_SOURCE 1
+    PATCH_COMMAND
+    BUILD_COMMAND     ${BUILD_CMD}
+    INSTALL_COMMAND   export PREFIX=${XXHASH_INSTALL_DIR}/ && make install
+    TEST_COMMAND      ""
+)
+
+set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/libxxhash.a")
+INCLUDE_DIRECTORIES(${XXHASH_INCLUDE_DIR})
+
+add_library(xxhash STATIC IMPORTED GLOBAL)
+set_property(TARGET xxhash PROPERTY IMPORTED_LOCATION ${XXHASH_LIBRARIES})
+include_directories(${XXHASH_INCLUDE_DIR})
+add_dependencies(xxhash extern_xxhash)
+
+LIST(APPEND external_project_dependencies xxhash)
+
+IF(WITH_C_API)
+  INSTALL(DIRECTORY ${XXHASH_INCLUDE_DIR} DESTINATION third_party/xxhash)
+  IF(ANDROID)
+    INSTALL(FILES ${XXHASH_LIBRARIES} DESTINATION third_party/xxhash/lib/${ANDROID_ABI})
+  ELSE()
+    INSTALL(FILES ${XXHASH_LIBRARIES} DESTINATION third_party/xxhash/lib)
+  ENDIF()
+ENDIF()
@@ -14,6 +14,9 @@
 
 # make package for paddle fluid shared and static library
 function(copy TARGET)
+    if (NOT ON_INFER)
+      message(WARNING "Turn on the ON_INFER flag when building inference_lib only.")
+    endif()
     set(options "")
     set(oneValueArgs "")
     set(multiValueArgs SRCS DSTS DEPS)
@@ -31,7 +34,7 @@ function(copy TARGET)
     foreach(index RANGE ${len})
         list(GET copy_lib_SRCS ${index} src)
         list(GET copy_lib_DSTS ${index} dst)
-        add_custom_command(TARGET ${TARGET} PRE_BUILD 
+        add_custom_command(TARGET ${TARGET} PRE_BUILD
           COMMAND mkdir -p "${dst}"
           COMMAND cp -r "${src}" "${dst}"
           COMMENT "copying ${src} -> ${dst}")
@@ -67,6 +70,13 @@ copy(boost_lib
   DEPS boost
 )
 
+set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/xxhash")
+copy(xxhash_lib
+  SRCS ${XXHASH_INCLUDE_DIR} ${XXHASH_LIBRARIES}
+  DSTS ${dst_dir} ${dst_dir}/lib
+  DEPS xxhash
+)
+
 if(NOT PROTOBUF_FOUND)
     set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/protobuf")
     copy(protobuf_lib
@@ -186,7 +196,7 @@ copy(cmake_cache
   DSTS ${FLUID_INSTALL_DIR})
 
 # This command generates a complete fluid library for both train and inference
-add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep}) 
+add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
 
 # Following commands generate a inference-only fluid library
 # third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR}
 
@@ -176,6 +176,7 @@ paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label'
 paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None))
+paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None))
 paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
 paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
 paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
 
@@ -24,74 +24,6 @@ namespace paddle {
 namespace framework {
 namespace ir {
 
-std::vector<std::string> FindDistTrainSendVars(
-    const std::vector<ir::Node *> &nodes) {
-  std::vector<std::string> send_vars;
-  // since parameters are all in block 0,
-  // it's enough to only scan send ops in block 0
-  for (auto &node : nodes) {
-    auto op_vars = node->Op()->InputArgumentNames();
-    send_vars.reserve(send_vars.size() +
-                      std::distance(op_vars.begin(), op_vars.end()));
-    send_vars.insert(send_vars.end(), op_vars.begin(), op_vars.end());
-  }
-  return send_vars;
-}
-
-std::vector<std::string> FindDistTrainRecvVars(
-    const std::vector<ir::Node *> &nodes) {
-  std::vector<std::string> recv_vars;
-  for (auto &node : nodes) {
-    auto op_vars = node->Op()->OutputArgumentNames();
-    recv_vars.reserve(recv_vars.size() +
-                      std::distance(op_vars.begin(), op_vars.end()));
-    recv_vars.insert(recv_vars.end(), op_vars.begin(), op_vars.end());
-  }
-  return recv_vars;
-}
-
-bool IsDistTrainOp(ir::Node *node, const std::vector<std::string> &send_vars,
-                   const std::vector<std::string> &recv_vars) {
-  if (send_vars.size() == 0 || recv_vars.size() == 0) {
-    return false;
-  }
-
-  /**
-   * Check any of opvars contains `.block` and in sendvars
-   */
-  auto checker = [](const std::vector<std::string> &opvars,
-                    const std::vector<std::string> &rpc_vars) -> bool {
-    for (auto &var : opvars) {
-      // a variable name with the suffix `.block` means it's a splited
-      // variable by (DistributeTranspiler)
-      // [python/paddle/fluid/transpiler/distribute_transpiler.py]
-      if (var.find(".block") != std::string::npos &&
-          std::find(rpc_vars.begin(), rpc_vars.end(), var) != rpc_vars.end()) {
-        return true;
-      }
-
-      if (!(var.find(".block") == std::string::npos &&
-            var.find(".pserver") == std::string::npos) &&
-          std::find(rpc_vars.begin(), rpc_vars.end(), var) != rpc_vars.end()) {
-        return true;
-      }
-    }
-    return false;
-  };
-
-  std::vector<std::string> input_var_names;
-  std::vector<std::string> output_var_names;
-  for (ir::Node *input : node->inputs) {
-    input_var_names.push_back(input->Name());
-  }
-  for (ir::Node *output : node->outputs) {
-    output_var_names.push_back(output->Name());
-  }
-
-  return checker(output_var_names, send_vars) ||
-         checker(input_var_names, recv_vars);
-}
-
 Graph::Graph(const ProgramDesc &program) : program_(program) {
   // Make the nodes id start from 0.
   Node::ResetId();
 
@@ -44,6 +44,7 @@ class Node {
     return op_desc_.get();
   }
 
+  // Please don't use this API!
   int id() const { return id_; }
 
   bool IsOp() const { return type_ == Type::kOperation; }
@@ -92,6 +93,7 @@ class Node {
   Node() = delete;
 
   static int count_;
+  // Please don't use this API or make this public.
   static void ResetId() { count_ = 0; }
   DISABLE_COPY_AND_ASSIGN(Node);
 };
 
@@ -18,6 +18,82 @@ limitations under the License. */
 
 namespace paddle {
 namespace framework {
+
+// NOTE The vector<LoDTensor> can't be replaced with the class LoDTensorArray
+// directly, because there are many vector<LoDTensor> used accross the project,
+// and some of them are treated as LoDTensorArray.
+#if !defined(PADDLE_ON_INFERENCE)
+
 using LoDTensorArray = std::vector<LoDTensor>;
-}
+
+#else  // !PADDLE_ON_INFERENCE
+
+#pragma message "LoDTensorArray is replaced with the inference one."
+/*
+ * A LoDTensorArray which will not deallocate buffer when resized, fix the data
+ * diff in inference, and more performance friendly in the concurrency
+ * scenerios.
+ */
+class LoDTensorArray {
+ public:
+  LoDTensorArray() = default;
+
+  using iterator = std::vector<LoDTensor>::iterator;
+  using const_iterator = std::vector<LoDTensor>::const_iterator;
+
+  const_iterator begin() const { return array_.begin(); }
+  const_iterator end() const { return array_.begin() + size_; }
+  iterator begin() { return array_.begin(); }
+  iterator end() { return array_.begin() + size_; }
+
+  void push_back(const LoDTensor& x) {
+    if (size_ < array_.size()) {
+      array_[size_++] = x;
+    } else {
+      array_.push_back(x);
+      ++size_;
+    }
+  }
+  void resize(size_t size) {
+    if (array_.size() < size) {
+      array_.resize(size);
+    }
+    size_ = size;
+  }
+
+  void emplace_back() { array_.emplace_back(); }
+
+  void emplace_back(LoDTensor&& x) { array_.emplace_back(std::move(x)); }
+
+  LoDTensor& back() { return array_.back(); }
+
+  size_t space() const { return array_.size(); }
+
+  void reserve(size_t size) {
+    // Naive warning to tell user this array might be to large. The memory and
+    // buffer used by this TensorArray will not be deleted during the training
+    // and inference phase, so attention not to make it expand too long.
+    if (size > 800UL) {
+      LOG(WARNING) << "TensorArray has more than 800 items";
+    }
+    array_.reserve(size);
+  }
+
+  bool empty() const { return size_ == 0UL; }
+  void clear() { size_ = 0UL; }
+
+  LoDTensor& operator[](size_t id) { return array_[id]; }
+  const LoDTensor& operator[](size_t id) const { return array_[id]; }
+  LoDTensor& at(size_t id) { return array_.at(id); }
+  const LoDTensor& at(size_t id) const { return array_.at(id); }
+
+  size_t size() const { return size_; }
+
+ private:
+  size_t size_{0};
+  std::vector<LoDTensor> array_;
+};
+#endif  // !PADDLE_ON_INFERENCE
+
+}  // namespace framework
 }  // namespace paddle
@@ -121,10 +121,6 @@ class OpDesc {
 
   BlockDesc *Block() { return this->block_; }
 
-  const BlockDesc &BlockRef() const { return *this->block_; }
-
-  void SetBlock(BlockDesc *block) { this->block_ = block; }
-
  private:
   template <typename MapType>
   static std::vector<typename MapType::key_type> MapKeys(const MapType &map) {
 
@@ -78,6 +78,8 @@ class Scope {
   /// Drop all kids scopes belonged to this scope.
   void DropKids();
 
+  std::list<Scope*>& kids() const { return kids_; }
+
   /// Find if a scope exists in the kid scopes
   bool HasKid(const Scope* scope) const;
 
 
@@ -30,7 +30,7 @@ if (WITH_GPU AND TENSORRT_FOUND)
 endif()
 
 # Create static library
-cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor)
+cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array)
 
 if(NOT APPLE)
   # TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
@@ -40,7 +40,7 @@ endif()
 
 # Create shared library
 cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
-    DEPS ${fluid_modules} paddle_fluid_api)
+    DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array)
 
 set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
 if(NOT APPLE)