Skip to content

Commit a3f7ebd

Browse files
authored
Merge pull request #10 from PaddlePaddle/develop
merge to local
2 parents 98069d9 + c6bd434 commit a3f7ebd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+1843
-436
lines changed

README.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
English | [简体中文](./README_cn.md)
44

55
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
6-
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html)
7-
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html)
6+
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html)
7+
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html)
88
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
99
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
1010

@@ -18,17 +18,17 @@ learning to many products at Baidu.
1818
Our vision is to enable deep learning for everyone via PaddlePaddle.
1919
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
2020

21-
### Latest PaddlePaddle Release: [Fluid 1.2.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.2)
21+
### Latest PaddlePaddle Release: [Fluid 1.3.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.3)
2222
### Install Latest Stable Release:
2323
```
2424
# Linux CPU
2525
pip install paddlepaddle
2626
# Linux GPU cuda9cudnn7
2727
pip install paddlepaddle-gpu
2828
# Linux GPU cuda8cudnn7
29-
pip install paddlepaddle-gpu==1.2.0.post87
29+
pip install paddlepaddle-gpu==1.3.0.post87
3030
# Linux GPU cuda8cudnn5
31-
pip install paddlepaddle-gpu==1.2.0.post85
31+
pip install paddlepaddle-gpu==1.3.0.post85
3232
3333
# For installation on other platform, refer to http://paddlepaddle.org/
3434
```
@@ -75,26 +75,26 @@ pip install paddlepaddle-gpu==1.2.0.post85
7575

7676
## Installation
7777

78-
It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html) on our website.
78+
It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html) on our website.
7979

8080
## Documentation
8181

82-
We provide [English](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html) and
83-
[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html) documentation.
82+
We provide [English](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html) and
83+
[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html) documentation.
8484

8585
- [Deep Learning 101](https://github.com/PaddlePaddle/book)
8686

8787
You might want to start from this online interactive book that can run in a Jupyter Notebook.
8888

89-
- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/1.2/user_guides/howto/training/cluster_howto.html)
89+
- [Distributed Training](http://paddlepaddle.org/documentation/docs/en/1.3/user_guides/howto/training/multi_node_en.html)
9090

9191
You can run distributed training jobs on MPI clusters.
9292

93-
- [Python API](http://paddlepaddle.org/documentation/docs/zh/1.2/api_cn/index_cn.html)
93+
- [Python API](http://paddlepaddle.org/documentation/docs/en/1.3/api/index_en.html)
9494

9595
Our new API enables much shorter programs.
9696

97-
- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/1.2/advanced_usage/development/contribute_to_paddle/index_cn.html)
97+
- [How to Contribute](http://paddlepaddle.org/documentation/docs/en/1.3/advanced_usage/development/contribute_to_paddle/index_en.html)
9898

9999
We appreciate your contributions!
100100

README_cn.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
[English](./README.md) | 简体中文
44

55
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
6-
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html)
7-
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html)
6+
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html)
7+
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html)
88
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
99
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
1010

@@ -16,17 +16,17 @@ PaddlePaddle (PArallel Distributed Deep LEarning) 是一个简单易用、高效
1616

1717
跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
1818

19-
### PaddlePaddle最新版本: [Fluid 1.2.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.2)
19+
### PaddlePaddle最新版本: [Fluid 1.3.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.3)
2020
### 安装最新稳定版本:
2121
```
2222
# Linux CPU
2323
pip install paddlepaddle
2424
# Linux GPU cuda9cudnn7
2525
pip install paddlepaddle-gpu
2626
# Linux GPU cuda8cudnn7
27-
pip install paddlepaddle-gpu==1.2.0.post87
27+
pip install paddlepaddle-gpu==1.3.0.post87
2828
# Linux GPU cuda8cudnn5
29-
pip install paddlepaddle-gpu==1.2.0.post85
29+
pip install paddlepaddle-gpu==1.3.0.post85
3030
3131
# 其他平台上的安装指引请参考 http://paddlepaddle.org/
3232
```
@@ -57,26 +57,26 @@ pip install paddlepaddle-gpu==1.2.0.post85
5757

5858
## 安装
5959

60-
推荐阅读官网上的[安装说明](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html)
60+
推荐阅读官网上的[安装说明](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/install/index_cn.html)
6161

6262
## 文档
6363

64-
我们提供[英文](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html)
65-
[中文](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html) 文档
64+
我们提供[英文](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html)
65+
[中文](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html) 文档
6666

6767
- [深度学习101](https://github.com/PaddlePaddle/book)
6868

6969
或许您想从这个在线交互式书籍开始,可以在Jupyter Notebook中运行
7070

71-
- [分布式训练](http://paddlepaddle.org/documentation/docs/zh/1.2/user_guides/howto/training/cluster_howto.html)
71+
- [分布式训练](http://paddlepaddle.org/documentation/docs/zh/1.3/user_guides/howto/training/multi_node.html)
7272

7373
可以在MPI集群上运行分布式训练任务
7474

75-
- [Python API](http://paddlepaddle.org/documentation/docs/zh/1.2/api_cn/index_cn.html)
75+
- [Python API](http://paddlepaddle.org/documentation/docs/zh/1.3/api_cn/index_cn.html)
7676

7777
新的API支持代码更少更简洁的程序
7878

79-
- [贡献方式](http://paddlepaddle.org/documentation/docs/zh/1.2/advanced_usage/development/contribute_to_paddle/index_cn.html)
79+
- [贡献方式](http://paddlepaddle.org/documentation/docs/zh/1.3/advanced_usage/development/contribute_to_paddle/index_cn.html)
8080

8181
欢迎您的贡献!
8282

cmake/external/mklml.cmake

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,8 @@ IF(WIN32)
3939
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib)
4040
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll)
4141
SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.dll)
42-
ELSE()
43-
#TODO(intel-huying):
44-
# Now enable Erf function in mklml library temporarily, it will be updated as offical version later.
45-
SET(MKLML_VER "VsErf_mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
42+
ELSE()
43+
SET(MKLML_VER "mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
4644
SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
4745
SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
4846
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)

paddle/fluid/API.spec

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ paddle.fluid.initializer.NumpyArrayInitializer.__init__ ArgSpec(args=['self', 'v
7171
paddle.fluid.layers.fc ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, False, None))
7272
paddle.fluid.layers.embedding ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32'))
7373
paddle.fluid.layers.dynamic_lstm ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None))
74-
paddle.fluid.layers.dynamic_lstmp ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None))
74+
paddle.fluid.layers.dynamic_lstmp ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None))
7575
paddle.fluid.layers.dynamic_gru ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False))
7676
paddle.fluid.layers.gru_unit ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False))
7777
paddle.fluid.layers.linear_chain_crf ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,))
@@ -121,6 +121,7 @@ paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=
121121
paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
122122
paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
123123
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False))
124+
paddle.fluid.layers.sampled_softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'num_samples', 'num_true', 'remove_accidental_hits', 'use_customized_samples', 'customized_samples', 'customized_probabilities', 'seed'], varargs=None, keywords=None, defaults=(1, True, False, None, None, 0))
124125
paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False))
125126
paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False))
126127
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))

paddle/fluid/framework/details/build_strategy.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,12 +135,15 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
135135
void AppendMultiDevPass(const BuildStrategy &strategy) {
136136
ir::Pass *multi_devices_pass;
137137
if (strategy_.is_distribution_) {
138+
VLOG(3) << "multi device parameter server mode";
138139
multi_devices_pass = AppendPass("dist_multi_devices_pass").get();
139140
} else {
140141
if (strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce) {
142+
VLOG(3) << "multi devices collective mode with allreduce";
141143
multi_devices_pass =
142144
AppendPass("allreduce_mode_multi_devices_pass").get();
143145
} else if (strategy.reduce_ == BuildStrategy::ReduceStrategy::kReduce) {
146+
VLOG(3) << "multi deivces collective mode with reduce";
144147
multi_devices_pass = AppendPass("reduce_mode_multi_devices_pass").get();
145148
} else {
146149
PADDLE_THROW("Unknown reduce strategy.");

paddle/fluid/framework/details/multi_devices_graph_pass.cc

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -937,9 +937,21 @@ void DistSSAGraphBuilder::InsertCollectiveOp(ir::Graph *result,
937937
}
938938

939939
void DistSSAGraphBuilder::InsertPostprocessOps(ir::Graph *result) const {
940-
if (need_broadcast_var_ ||
941-
(UseGPU() &&
942-
strategy_.reduce_ == BuildStrategy::ReduceStrategy::kReduce)) {
940+
// broad cast received parameters when training in parameter server mode.
941+
if (need_broadcast_var_) {
942+
// There are 4 conditions:
943+
// 1. GPU && Reduce: Reduce gradient then broadcast gradient to other GPUS.
944+
// Need to broadcast received parameters to other GPU.
945+
// 2. GPU && AllReduce: AllReduce all graident to each GPU. Need to
946+
// broadcast received parameters to other GPU.
947+
// 3. CPU && AllReduce: AllReduce all gradient to each thread. Need to
948+
// broadcast received parameters to other scope.
949+
// 4. CPU && Reduce: because all parameters share the same memory, did not
950+
// broadcast received parameters.
951+
if (!UseGPU() &&
952+
strategy_.reduce_ == BuildStrategy::ReduceStrategy::kReduce) {
953+
return;
954+
}
943955
if (strategy_.fuse_broadcast_op_) {
944956
CreateFusedBroadcastOp(result, bcast_var_name_set_);
945957
} else {

paddle/fluid/operators/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ set(COMMON_OP_DEPS ${OP_HEADER_DEPS})
6666

6767
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor)
6868
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc)
69-
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler tree2col)
69+
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col)
7070
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search)
7171
if (WITH_GPU)
7272
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv prelu)

paddle/fluid/operators/activation_op.h

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ limitations under the License. */
1111

1212
#pragma once
1313
#include <glog/logging.h>
14-
#include <algorithm>
1514
#include <string>
1615
#include <unordered_set>
1716
#include <utility>
@@ -25,7 +24,6 @@ limitations under the License. */
2524
#include "paddle/fluid/framework/eigen.h"
2625
#include "paddle/fluid/framework/op_registry.h"
2726
#include "paddle/fluid/operators/detail/safe_ref.h"
28-
#include "paddle/fluid/operators/math/blas.h"
2927
#include "paddle/fluid/platform/float16.h"
3028

3129
#ifdef PADDLE_WITH_MKLDNN
@@ -303,28 +301,8 @@ template <typename T>
303301
struct GeluFunctor : public BaseActivationFunctor<T> {
304302
template <typename Device, typename X, typename Out>
305303
void operator()(Device d, X x, Out out) const {
306-
// Because the execute or device context can not be deliver here, it keep the
307-
// marco for NVCC.
308-
#if defined(PADDLE_WITH_MKLML) && !defined(_WIN32) && !defined(__APPLE__) && \
309-
!defined(__OSX__) && !defined(PADDLE_WITH_CUDA)
310-
auto x_data = x.data();
311-
auto out_data = out.data();
312-
int n = std::min(x.size(), out.size());
313-
314-
std::memset(out_data, 0, n * sizeof(T));
315-
math::CBlas<T>::AXPY(n, static_cast<T>(M_SQRT1_2), x_data, 1, out_data, 1);
316-
math::CBlas<T>::VMERF(n, out_data, out_data, VML_LA);
317-
for (int i = 0; i < n; i++) {
318-
out_data[i] += static_cast<T>(1);
319-
}
320-
math::CBlas<T>::VMUL(n, x_data, out_data, out_data);
321-
for (int i = 0; i < n; i++) {
322-
out_data[i] *= static_cast<T>(0.5);
323-
}
324-
#else
325304
auto temp = (x * static_cast<T>(M_SQRT1_2)).erf();
326305
out.device(d) = x * static_cast<T>(0.5) * (static_cast<T>(1) + temp);
327-
#endif
328306
}
329307
};
330308

paddle/fluid/operators/beam_search_decode_op.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ void BeamSearchDecoder<T>::ConvertSentenceVectorToLodTensor(
122122

123123
auto cpu_place = std::unique_ptr<paddle::platform::CPUPlace>(
124124
new paddle::platform::CPUPlace());
125-
paddle::platform::CPUDeviceContext cpu_ctx(*cpu_place.get());
125+
paddle::platform::CPUDeviceContext cpu_ctx(*cpu_place);
126126

127127
framework::LoD lod;
128128
lod.push_back(source_level_lod);

0 commit comments

Comments
 (0)