Skip to content

Commit 35e7944

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into seq_expand_op
2 parents 97f1b98 + b84e822 commit 35e7944

File tree

265 files changed

+9980
-1710
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

265 files changed

+9980
-1710
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,4 @@ cmake_install.cmake
2828
paddle/.timestamp
2929
python/paddlepaddle.egg-info/
3030
paddle/pybind/pybind.h
31+
python/paddle/v2/framework/tests/tmp/*

cmake/external/eigen.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ ExternalProject_Add(
88
extern_eigen3
99
${EXTERNAL_PROJECT_LOG_ARGS}
1010
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git"
11-
GIT_TAG 4e79cb69b9425f5f8c3a84be4350d4ab75b5fd9d
11+
GIT_TAG 70661066beef694cadf6c304d0d07e0758825c10
1212
PREFIX ${EIGEN_SOURCE_DIR}
1313
UPDATE_COMMAND ""
1414
CONFIGURE_COMMAND ""

cmake/external/nccl.cmake

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
INCLUDE(ExternalProject)
1+
include(ExternalProject)
22

3-
SET(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
4-
5-
INCLUDE_DIRECTORIES(${NCCL_SOURCE_DIR}/src/extern_nccl/src)
3+
set(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
64

5+
include_directories(${NCCL_SOURCE_DIR}/src/extern_nccl/src)
76

87
if(WITH_DSO)
98
# If we use DSO, we do not build nccl, just download the dependencies
@@ -12,39 +11,39 @@ if(WITH_DSO)
1211
set(NCCL_INSTALL_DIR "")
1312
else()
1413
# otherwise, we build nccl and link it.
14+
set(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl)
15+
# Note: cuda 8.0 is needed to make nccl
16+
# When cuda is not installed on the system directory, need to set CUDA_HOME to your cuda root
1517
set(NCCL_BUILD_COMMAND "make -j 8")
16-
set(NCCL_INSTALL_COMMAND "make install")
17-
SET(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl)
18+
set(NCCL_INSTALL_COMMAND "make install PREFIX=${NCCL_INSTALL_DIR}")
1819
endif()
1920

2021
ExternalProject_Add(
21-
extern_nccl
22-
${EXTERNAL_PROJECT_LOG_ARGS}
23-
GIT_REPOSITORY "https://github.com/NVIDIA/nccl.git"
24-
GIT_TAG "v1.3.4-1"
25-
PREFIX "${NCCL_SOURCE_DIR}"
26-
UPDATE_COMMAND ""
27-
CONFIGURE_COMMAND ""
28-
BUILD_COMMAND "${NCCL_BUILD_COMMAND}"
29-
INSTALL_COMMAND "${NCCL_INSTALL_COMMAND}"
30-
INSTALL_DIR "${NCCL_INSTALL_DIR}"
31-
TEST_COMMAND ""
22+
extern_nccl
23+
${EXTERNAL_PROJECT_LOG_ARGS}
24+
GIT_REPOSITORY "https://github.com/NVIDIA/nccl.git"
25+
GIT_TAG "v1.3.4-1"
26+
PREFIX "${NCCL_SOURCE_DIR}"
27+
UPDATE_COMMAND ""
28+
CONFIGURE_COMMAND ""
29+
BUILD_COMMAND "${NCCL_BUILD_COMMAND}"
30+
INSTALL_COMMAND "${NCCL_INSTALL_COMMAND}"
31+
INSTALL_DIR "${NCCL_INSTALL_DIR}"
32+
TEST_COMMAND ""
3233
)
3334

34-
if (WITH_DSO)
35-
if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
36-
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_any_dummy.c)
37-
file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";")
35+
if(WITH_DSO)
36+
if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
37+
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_nccl_dummy.c)
38+
file(WRITE ${dummyfile} "const char * dummy_nccl = \"${dummyfile}\";")
3839
add_library(nccl STATIC ${dummyfile})
3940
else()
4041
add_library(nccl INTERFACE)
4142
endif()
4243
else()
43-
ADD_LIBRARY(nccl STATIC IMPORTED GLOBAL)
44-
SET_PROPERTY(TARGET nccl PROPERTY IMPORTED_LOCATION
45-
${NCCL_INSTALL_DIR}/lib/libnccl.a)
44+
add_library(nccl STATIC IMPORTED GLOBAL)
45+
set_property(TARGET nccl PROPERTY IMPORTED_LOCATION
46+
${NCCL_INSTALL_DIR}/lib/libnccl_static.a)
4647
endif()
4748

4849
add_dependencies(nccl extern_nccl)
49-
50-
LIST(APPEND external_project_dependencies nccl)

doc/design/graph_survey.md

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
## Survey on Graph
2+
3+
Neural network framework often provides symbolic API for users to write network topology conveniently. This doc manily focus on symbolic API in most popular neural network frameworks, and try to find out how to parse symbolic configuration to a portable file, such as protobuf or json.
4+
5+
### Mxnet
6+
7+
The core concept of symbolic API is `Symbol`. Mxnet implements `Symbol` class in C++, and export to Python using C-API. Please refer to the comments in Mxnet:
8+
9+
10+
`Symbol` is help class used to represent the operator node in Graph.
11+
`Symbol` acts as an interface for building graphs from different components like Variable, Functor and Group. `Symbol` is also exported to python front-end (while Graph is not) to enable quick test and deployment. Conceptually, symbol is the final operation of a graph and thus including all the information required (the graph) to evaluate its output value.
12+
13+
14+
A simple network topology wrote by Symbol is as follows:
15+
16+
```python
17+
def get_symbol(num_classes=10, **kwargs):
18+
data = mx.symbol.Variable('data')
19+
data = mx.symbol.Flatten(data=data)
20+
fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
21+
act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
22+
fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64)
23+
act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
24+
fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=num_classes)
25+
mlp = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax')
26+
return mlp
27+
```
28+
29+
30+
31+
Varible here is actually a Symbol. Every basic Symbol will correspond to one Node, and every Node has its own NodeAttr. There is a op field in NodeAttr class, when a Symbol represents Variable(often input data), the op field is null.
32+
33+
Symbol contains a data member, std::vector<NodeEntry> outputs, and NodeEntry cantains a poniter to Node. We can follow the Node pointer to get all the Graph.
34+
35+
And Symbol can be saved to a Json file.
36+
37+
Here is a detailed example:
38+
39+
```
40+
>>> import mxnet as mx
41+
>>> data = mx.symbol.Variable('data')
42+
>>> print data.debug_str()
43+
Variable:data
44+
45+
>>> data = mx.symbol.Flatten(data=data)
46+
>>> print data.debug_str()
47+
Symbol Outputs:
48+
output[0]=flatten0(0)
49+
Variable:data
50+
--------------------
51+
Op:Flatten, Name=flatten0
52+
Inputs:
53+
arg[0]=data(0) version=0
54+
55+
>>> fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
56+
>>> print fc1.debug_str()
57+
Symbol Outputs:
58+
output[0]=fc1(0)
59+
Variable:data
60+
--------------------
61+
Op:Flatten, Name=flatten0
62+
Inputs:
63+
arg[0]=data(0) version=0
64+
Variable:fc1_weight
65+
Variable:fc1_bias
66+
--------------------
67+
Op:FullyConnected, Name=fc1
68+
Inputs:
69+
arg[0]=flatten0(0)
70+
arg[1]=fc1_weight(0) version=0
71+
arg[2]=fc1_bias(0) version=0
72+
Attrs:
73+
num_hidden=128
74+
75+
```
76+
77+
78+
### TensorFlow
79+
80+
81+
The core concept of symbolic API is `Tensor`. Tensorflow defines `Tensor` in Python. Please refer to the comments in TensorFlow:
82+
83+
A `Tensor` is a symbolic handle to one of the outputs of an `Operation`. It does not hold the values of that operation's output, but instead provides a means of computing those values in a TensorFlow [Session](https://www.tensorflow.org/api_docs/python/tf/Session).
84+
85+
A simple example is as follows:
86+
87+
```python
88+
# Build a dataflow graph.
89+
c = tf.constant([[1.0, 2.0], [3.0, 4.0]])
90+
d = tf.constant([[1.0, 1.0], [0.0, 1.0]])
91+
e = tf.matmul(c, d)
92+
93+
# Construct a `Session` to execute the graph.
94+
sess = tf.Session()
95+
96+
# Execute the graph and store the value that `e` represents in `result`.
97+
result = sess.run(e)
98+
```
99+
100+
101+
The main method of `Tensor` is as follows:
102+
103+
104+
```python
105+
@property
106+
def op(self):
107+
"""The `Operation` that produces this tensor as an output."""
108+
return self._op
109+
110+
@property
111+
def dtype(self):
112+
"""The `DType` of elements in this tensor."""
113+
return self._dtype
114+
115+
@property
116+
def graph(self):
117+
"""The `Graph` that contains this tensor."""
118+
return self._op.graph
119+
120+
@property
121+
def name(self):
122+
"""The string name of this tensor."""
123+
if not self._op.name:
124+
raise ValueError("Operation was not named: %s" % self._op)
125+
return "%s:%d" % (self._op.name, self._value_index)
126+
127+
@property
128+
def device(self):
129+
"""The name of the device on which this tensor will be produced, or None."""
130+
return self._op.device
131+
```
132+
133+
134+
Tensor can be taken as target to run by session. Tensor contains all the information of Graph, and tracks data dependency.
135+
136+
137+
Here is a detailed example:
138+
139+
140+
```
141+
>>> import tensorflow as tf
142+
>>> c = tf.constant([[1.0, 2.0], [3.0, 4.0]])
143+
>>> print c.graph
144+
<tensorflow.python.framework.ops.Graph object at 0x10f256d50>
145+
>>> d = tf.constant([[1.0, 1.0], [0.0, 1.0]])
146+
>>> print d.graph
147+
<tensorflow.python.framework.ops.Graph object at 0x10f256d50>
148+
>>> e = tf.matmul(c, d)
149+
>>> print e.graph
150+
<tensorflow.python.framework.ops.Graph object at 0x10f256d50>
151+
```
152+
153+
### Dynet
154+
155+
156+
The core concept of symbolic API is `Expression`, and Dynet defines `Expression` class in C++.
157+
158+
159+
A simple example is as follows:
160+
161+
```cpp
162+
ComputationGraph cg;
163+
Expression W = parameter(cg, pW);
164+
165+
Expression in = input(cg, xs[i]);
166+
Expression label = input(cg, ys[i]);
167+
Expression pred = W * in;
168+
Expression loss = square(pred - label);
169+
```
170+
171+
The input data and parameter are also represented by Expression. Every basci Expression corresponds to a Node. And input data is also a Node.
172+
173+
Expression has a data member ComputationGraph, and ComputationGraph will be modified in users' configuring process. Expression can be a running target, beacuse Expression contains all dependency.
174+
175+
176+
Here is a detailed example:
177+
178+
write topology in C++
179+
180+
```
181+
ComputationGraph cg;
182+
Expression W = parameter(cg, pW);
183+
cg.print_graphviz();
184+
185+
Expression pred = W * xs[i];
186+
cg.print_graphviz();
187+
188+
Expression loss = square(pred - ys[i]);
189+
cg.print_graphviz();
190+
```
191+
192+
compile and print
193+
194+
```
195+
# first print
196+
digraph G {
197+
rankdir=LR;
198+
nodesep=.05;
199+
N0 [label="v0 = parameters({1}) @ 0x7ffe4de00110"];
200+
}
201+
# second print
202+
digraph G {
203+
rankdir=LR;
204+
nodesep=.05;
205+
N0 [label="v0 = parameters({1}) @ 0x7ffe4de00110"];
206+
N1 [label="v1 = v0 * -0.98"];
207+
N0 -> N1;
208+
}
209+
# third print
210+
digraph G {
211+
rankdir=LR;
212+
nodesep=.05;
213+
N0 [label="v0 = parameters({1}) @ 0x7ffe4de00110"];
214+
N1 [label="v1 = v0 * -0.98"];
215+
N0 -> N1;
216+
N2 [label="v2 = -1.88387 - v1"];
217+
N1 -> N2;
218+
N3 [label="v3 = -v2"];
219+
N2 -> N3;
220+
N4 [label="v4 = square(v3)"];
221+
N3 -> N4;
222+
}
223+
```
224+
225+
### Conclusion
226+
227+
228+
Actually, Symbol/Tensor/Expression in Mxnet/TensorFlow/Dynet are the same level concepts. We use a unified name Expression here, this level concept has following features:
229+
230+
- Users wirte topoloy with symbolic API, and all return value is Expression, including input data and parameter.
231+
- Expression corresponds with a global Graph, and Expression can also be composed.
232+
- Expression tracks all dependency and can be taken as a run target

doc/design/model_format.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Design Doc: Model Format
2+
3+
## Motivation
4+
5+
A model is an output of the training process. One complete model consists of two parts, the **topology** and the **parameters**. In order to support industrial deployment, the model format must be self-complete and must not expose any training source code.
6+
7+
As a result, In PaddlePaddle, the **topology** is represented as a [ProgramDesc](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/doc/design/program.md), which describes the model structure. The **parameters** contain all the trainable weights in the model. We must support large size parameters and efficient serialization/deserialization of parameters.
8+
9+
## Implementation
10+
11+
The topology is saved as a plain text in a detailed self-contain protobuf file.
12+
13+
The parameters are saved as a binary file. As we all know, the protobuf message has a limit of [64M size](https://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.io.coded_stream#CodedInputStream.SetTotalBytesLimit.details). We have done a [benchmark experiment](https://github.com/PaddlePaddle/Paddle/pull/4610), which shows that protobuf is not fit for the task.
14+
15+
As a result, we design a particular format for tensor serialization. By default, an arbitrary tensor in Paddle is a [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/lod_tensor.md), and has a description information proto of [LoDTensorDesc](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L99). We save the DescProto as the byte string header. It contains all the necessary information, such as the `dims`, and the `LoD` information in [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/paddle/framework/lod_tensor.md). A tensor stores values in a continuous memory buffer. For speed we dump the raw memory to disk and save it as the byte string content. So, the binary format of one tensor is,
16+
17+
The table below shows a tensor's byte view in detail. Note that all the signed values are written in the little-endian format.
18+
19+
|field name | type | description |
20+
| --- | --- | --- |
21+
| version | uint32_t | Version of saved file. Always 0 now. |
22+
| tensor desc length | uint32_t | TensorDesc(Protobuf message) length in bytes. |
23+
| tensor desc | void* | TensorDesc protobuf binary message |
24+
| tensor data | void* | Tensor's data in binary format. The length of `tensor_data` is decided by `TensorDesc.dims()` and `TensorDesc.data_type()` |
25+
| lod_level | uint64_t | Level of LoD |
26+
| length of lod[0] | uint64_t | [Optional] length of lod[0] in bytes. |
27+
| data of lod[0] | uint64_t* | [Optional] lod[0].data() |
28+
| ... | ... | ... |
29+
30+
31+
32+
## Summary
33+
34+
- We introduce a model format.
35+
- The model represented by its forward-pass computation procedure is saved in a **ProgramDesc** protobuf message.
36+
- A bunch of specified format binary tensors describe the **parameters**.

doc/design/optimizer.md

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -65,20 +65,6 @@ class Optimizer(object):
6565
def __init__(self):
6666
pass
6767

68-
def create_backward_pass(self, loss, parameter_list=None):
69-
"""
70-
create and add gradient Operators in BlockDesc to Compute gradients of `loss`
71-
for parameters in parameter_list
72-
73-
Args:
74-
loss: an variable generated by cost function.
75-
parameter_list: parameters that need to compute gradient and update to optimize the lost.
76-
77-
Returns:
78-
list of (parameters, gradients) pair.
79-
"""
80-
return None
81-
8268
def create_optimization_pass(self, parameters_and_grads):
8369
"""Add optimization operators to update gradients to variables.
8470
@@ -93,7 +79,7 @@ class Optimizer(object):
9379
def minimize(self, loss, parameter_list):
9480
"""Add operations to minimize `loss` by updating `parameter_list`.
9581
96-
This method combines interface `create_backward_pass()` and
82+
This method combines interface `append_backward_ops()` and
9783
`create_optimization_pass()` into one.
9884
"""
9985
params_grads = self.create_backward_pass(loss, parameter_list)

0 commit comments

Comments
 (0)