PaddlePaddle
diff --git a/‎.travis.yml
Lines changed: 1 addition & 1 deletion b/‎.travis.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmake/generic.cmake
Lines changed: 4 additions & 4 deletions b/‎cmake/generic.cmake
Lines changed: 4 additions & 4 deletions
diff --git a/‎doc/design/cpp_data_feeding.md
Lines changed: 14 additions & 15 deletions b/‎doc/design/cpp_data_feeding.md
Lines changed: 14 additions & 15 deletions
diff --git a/‎doc/design/dist_refactor/distributed_architecture.md renamed to ‎doc/fluid/design/dist_train/distributed_architecture.md
Lines changed: 1 addition & 1 deletion b/‎doc/design/dist_refactor/distributed_architecture.md renamed to ‎doc/fluid/design/dist_train/distributed_architecture.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/design/dist_refactor/multi_cpu.md renamed to ‎doc/fluid/design/dist_train/multi_cpu.md b/‎doc/design/dist_refactor/multi_cpu.md renamed to ‎doc/fluid/design/dist_train/multi_cpu.md
diff --git a/‎doc/design/dist_refactor/parameter_server.md renamed to ‎doc/fluid/design/dist_train/parameter_server.md
Lines changed: 12 additions & 1 deletion b/‎doc/design/dist_refactor/parameter_server.md renamed to ‎doc/fluid/design/dist_train/parameter_server.md
Lines changed: 12 additions & 1 deletion
diff --git a/‎doc/design/dist_refactor/src/compiler.graffle renamed to ‎doc/fluid/design/dist_train/src/compiler.graffle b/‎doc/design/dist_refactor/src/compiler.graffle renamed to ‎doc/fluid/design/dist_train/src/compiler.graffle
diff --git a/‎doc/design/dist_refactor/src/compiler.png renamed to ‎doc/fluid/design/dist_train/src/compiler.png b/‎doc/design/dist_refactor/src/compiler.png renamed to ‎doc/fluid/design/dist_train/src/compiler.png
diff --git a/‎doc/design/dist_refactor/src/dist-graph.graffle renamed to ‎doc/fluid/design/dist_train/src/dist-graph.graffle b/‎doc/design/dist_refactor/src/dist-graph.graffle renamed to ‎doc/fluid/design/dist_train/src/dist-graph.graffle
diff --git a/‎doc/design/dist_refactor/src/dist-graph.png renamed to ‎doc/fluid/design/dist_train/src/dist-graph.png b/‎doc/design/dist_refactor/src/dist-graph.png renamed to ‎doc/fluid/design/dist_train/src/dist-graph.png
@@ -56,7 +56,7 @@ script:
     export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh
     export DOCS_DIR=`pwd`
     cd ..
-    curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH $DOCS_DIR $DOCS_DIR/build/doc/v2   
+    curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH $DOCS_DIR $DOCS_DIR/build/doc/
 notifications:
   email:
     on_success: change
 
@@ -244,11 +244,11 @@ function(cc_test TARGET_NAME)
     cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
     add_executable(${TARGET_NAME} ${cc_test_SRCS})
     # Support linking flags: --whole-archive (Linux) / -force_load (MacOS)
-    target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
+    target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog)
     if("${cc_test_DEPS}" MATCHES "ARCHIVE_START")
       list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END)
     endif()
-    add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
+    add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog)
     add_test(NAME ${TARGET_NAME}
              COMMAND ${TARGET_NAME} ${cc_test_ARGS}
              WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
@@ -311,8 +311,8 @@ function(nv_test TARGET_NAME)
     set(multiValueArgs SRCS DEPS)
     cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
     cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
-    target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
-    add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
+    target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog)
+    add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog)
     add_test(${TARGET_NAME} ${TARGET_NAME})
   endif()
 endfunction(nv_test)
 
@@ -1,17 +1,17 @@
 # C++ Data Feeding
 
-In training with Paddle V2 API, data feeding wholly dependents on Python code. To get rid of the Python environment and achieve the goal of "wrapping the whole training by a while loop op" in Paddle Fluid, a C++ data feeding mechanism is required. 
+While using Paddle V2 API for Training, data feeding completely depends on the Python code. To get rid of the Python environment and achieve the goal of "wrapping the whole training by a while loop op" in Paddle Fluid, a C++ data feeding mechanism is required. 
 
-In this document we show the fundamental design of C++ data feeding process, which includes the data reading, shuffling and batching.
+In this document we show the fundamental design of a C++ data feeding process, which includes data reading, shuffling and batching.
 
 ## Reader
 
-A new concept named 'Reader' is introduced. `Reader` is a series of inherited classes which can be hold by our `Variable` and they are used to read or process file data.
+In order to handle the above mentioned problem, a new concept called 'Reader' is introduced. `Reader` is a series of inherited classes which can be held by our `Variable` and they are used to read or process file data.
 
 
 ### `ReaderBase`
 
-`ReaderBase` is the abstract base class of all readers. It defines the all readers' interfaces.
+`ReaderBase` is the abstract base class for all readers. It defines the interface for all readers.
 
 ```cpp
 class ReaderBase {
@@ -20,11 +20,10 @@ class ReaderBase {
     PADDLE_ENFORCE(!shapes_.empty());
   }
   // Read the next batch of data. (A 'batch' can be only one instance)
+  // If the next batch doesn't exist, '*out' will be an empty std::vector.
   virtual void ReadNext(std::vector<LoDTensor>* out) = 0;
-  // Show whether the next bacth exists.
-  virtual bool HasNext() const = 0;
 
-  // Reinitialize the reader and read the file from the begin.
+  // Reinitialize the reader and read the file from the beginning.
   virtual void ReInit() = 0;
 
   // Get a certain read in data's shape.
@@ -43,36 +42,36 @@ class ReaderBase {
 
 ### `FileReader` and `DecoratedReader`
 
-These two classes are derived from the `ReaderBase` and will further be derived by respective specific readers. That is to say, in our design, there are two kinds of readers: file readers and decorated readers. A file reader reads from a file of some specific format, and yield only one instance of data at a time. e.g. RecordIO reader, jpg reader, .... A decorated reader takes another reader(both file reader and decorated reader are OK) as its 'underlying reader'. It gets data from its underlying reader, does some process on them(shuffling, or batching), then yields processed data. The output data of a decorated reader can be a single instance or a batch. `ShuffleReader` and `BatchReader` are both decorated readers.
+These two classes are derived from the `ReaderBase` and will further be derived by more specific readers. Thus, in our design, there are two kinds of readers: file readers and decorated readers. A file reader reads from a file of some specific format, and yield only one instance of data at a time. For example, RecordIO reader, jpg reader, .... A decorated reader takes another reader(both file reader and decorated reader are OK) as its 'underlying reader'. It gets data from its underlying reader, does some processing on them(shuffling, or batching), then yields processed data. The output data of a decorated reader can be a single instance or a batch. `ShuffleReader` and `BatchReader` are both decorated readers.
 
-All the readers share exactly the same interfaces defined in `ReaderBase`. So they can be decorated for more than one time: We can **shuffle** a reader's outputs and then **batch** the shuffle outputs. The interface consistency also allows related ops use readers without knowing what they are exactly.
+All the readers share exactly the same interface as defined in `ReaderBase`. So they can be decorated for more than one time: We can **shuffle** a reader's outputs and then **batch** the shuffle outputs. The interface consistency also allows related ops use readers without knowing what they are exactly.
 
 
 ### `ReaderHolder`
 
-Different readers belong to different class types. It leads to a problem: How can we drop them into `Variable`s and fetch them out by a unified method? For example, if a Variable holds a `BatchReader`, we can not get it by the following code:
+Different readers belong to different class types. This leads to a problem: How can we drop them into `Variable`s and fetch them out by a unified method? For example, if a Variable holds a `BatchReader`, we can not get it by the following code:
 
 ```cpp
 var->Get<ReaderBase>("batch_reader");
 ```
 
-we have to write:
+We would have to write:
 
 ```cpp
 var->Get<BatchReader>("batch_reader");
 ```
 
-This requires each time getting a reader from a variable we must know the reader's type exactly. It is nearly impossible.
+This requires that in order to get a reader from a variable, every time, we must know the reader's type exactly. This is nearly impossible.
 
-To solve this problem, we introduce `ReaderHolder` as a wrapper. It acts as an empty decorator of `ReaderBase`, which erases reader's type. With `ReaderHolder` we are able to fetch all types of readers by `var->Get<ReaderHolder>("...")` and regard the obtained object as a reader.
+To solve this problem, we introduce `ReaderHolder` as a wrapper. It acts as an empty decorator of `ReaderBase`, which hides reader's type. With `ReaderHolder` we are able to fetch all types of readers by `var->Get<ReaderHolder>("...")` and regard the obtained object as a reader.
 
 ## Related Operators
 
-To create and invoke readers, some now ops are introduced:
+To create and invoke readers, some new ops are introduced:
 
 ### `CreateReaderOp`
 
-Each reader has its creating op. File readers' creating ops have no input and yield the created file reader as its output. Decorated readers' creating ops take the underlying readers as inputs and then yield new decorated readers.
+Each reader has its creation op. File readers' creation ops have no input and yield the created file reader as its output. Decorated readers' creation ops take the underlying readers as inputs and then yield new decorated readers.
 
 ### `ReadOp`
 
 
@@ -1,4 +1,4 @@
-# Design Doc: Distributed Training Architecture
+# Design Doc: Fluid Distributed Training Architecture
 
 ## Abstract
 
 
@@ -59,6 +59,17 @@ After converting:
      queue. It will block until the queue has the required number of
      tensors.
 
+### Sparse Update
+
+For embedding layers, the gradient may have many rows containing only 0 when training,
+if the gradient uses a dense tensor to do parameter optimization,
+it could spend unnecessary memory, slow down the calculations and waste
+the bandwidth while doing distributed training.
+In Fluid, we introduce [SelectedRows](../selected_rows.md) to represent a list of rows containing
+non-zero gradient data. So when we do parameter optimization both locally and remotely,
+we only need to send those non-zero rows to the optimizer operators:
+
+<img src="src/sparse_update.png" width="700" />
 
 ### Benefits
 
@@ -91,6 +102,6 @@ After converting:
   `min_count` attribute), does our current design support it? (similar
   question for the *Add* OP)
 
+### References
 
-### References:
 [1] [TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45166.pdf)
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Design Doc: Distributed Training Architecture`
	`1`	`+# Design Doc: Fluid Distributed Training Architecture`
`2`	`2`
`3`	`3`	`## Abstract`
`4`	`4`