Skip to content

Commit 86af6bd

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/clean_blas
2 parents 49dedfa + 46c90ea commit 86af6bd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+917
-435
lines changed

.travis.yml

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ services:
1212
os:
1313
- linux
1414
env:
15-
- JOB=build_doc
15+
- JOB=doc
1616
- JOB=check_style
1717
- JOB=build_android
1818
addons:
@@ -36,21 +36,18 @@ addons:
3636
- ccache
3737
ssh_known_hosts: 13.229.163.131
3838
before_install:
39-
- if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
40-
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
41-
# protobuf version.
4239
- sudo pip install -r $TRAVIS_BUILD_DIR/python/requirements.txt
43-
- sudo pip install wheel sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit LinkChecker
40+
- sudo pip install wheel sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit
4441
- |
4542
function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
4643
script:
4744
- |
4845
# 43min timeout
49-
if [[ "$JOB" == "build_android" ]]; then timeout 2580 docker run -it --rm -v "$TRAVIS_BUILD_DIR:/paddle" paddlepaddle/paddle:latest-dev-android;
50-
else timeout 2580 paddle/scripts/travis/${JOB}.sh; fi;
51-
RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else exit 1; fi;
46+
if [[ "$JOB" != "doc" ]]; then timeout 2580 paddle/scripts/paddle_docker_build.sh ${JOB}; else paddle/scripts/paddle_build.sh ${JOB}; fi;
47+
if [ $? -eq 0 ] || [ $? -eq 142 ]; then true; else exit 1; fi;
5248
- |
53-
if [[ "$JOB" != "build_doc" ]]; then exit 0; fi;
49+
if [[ "$JOB" != "doc" ]]; then exit 0; fi;
50+
# For document only
5451
if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
5552
if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then exit 0; fi;
5653
export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh

AUTHORS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
| hedaoyuan | Dao-Yuan He |
1919
| helinwang | He-Lin Wang |
2020
| jacquesqiao | Long-Fei Qiao |
21+
| jczaja | Jacek Czaja |
2122
| JiayiFeng | Jia-Yi Feng |
23+
| kbinias | Krzysztof Binias |
2224
| kuke | Yi-Bing Liu |
2325
| lcy-seso | Ying Cao |
2426
| lipeng-unisound | Peng Li |
@@ -27,17 +29,20 @@
2729
| llxxxll | Yong-Feng Liu |
2830
| luotao01 | Tao Luo |
2931
| lzhao4ever | Liang Zhao |
32+
| mozga-intel | Mateusz Ozga |
3033
| NHZlX | Zhao-Long Xing |
3134
| Noplz | Yuan Gao |
3235
| pakchoi | Chuan-Jiang Song |
3336
| panyx0718 | Xin Pan |
3437
| pengli09 | Peng Li |
3538
| pkuyym | Ya-Ming Yang |
39+
| pzelazko-intel | Pawel Zelazko |
3640
| QiJune | Jun Qi |
3741
| qingqing01 | Qing-Qing Dang |
3842
| reyoung | Yang Yu |
3943
| Superjom | Chun-Wei Yan |
4044
| tianbingsz | Tian-Bing Xu |
45+
| tpatejko | Tomasz Patejko |
4146
| typhoonzero | Yi Wu |
4247
| wanghaoshuang | Hao-Shuang Wang |
4348
| wangyang59 | Yang Wang |

Dockerfile

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# A image for building paddle binaries
22
# Use cuda devel base image for both cpu and gpu environment
3-
4-
# When you modify it, please be aware of cudnn-runtime version
3+
# When you modify it, please be aware of cudnn-runtime version
54
# and libcudnn.so.x in paddle/scripts/docker/build.sh
65
FROM nvidia/cuda:8.0-cudnn7-devel-ubuntu16.04
76
MAINTAINER PaddlePaddle Authors <[email protected]>
@@ -24,7 +23,7 @@ ENV HOME /root
2423
COPY ./paddle/scripts/docker/root/ /root/
2524

2625
RUN apt-get update && \
27-
apt-get install -y \
26+
apt-get install -y --allow-downgrades \
2827
git python-pip python-dev openssh-server bison \
2928
libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \
3029
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \

cmake/cuda.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ set(CUDA_PROPAGATE_HOST_FLAGS OFF)
172172
list(APPEND CUDA_NVCC_FLAGS "-std=c++11")
173173
list(APPEND CUDA_NVCC_FLAGS "--use_fast_math")
174174
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
175+
# in cuda9, suppress cuda warning on eigen
176+
list(APPEND CUDA_NVCC_FLAGS "-w")
175177
# Set :expt-relaxed-constexpr to suppress Eigen warnings
176178
list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr")
177179

cmake/external/eigen.cmake

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ else()
2222
extern_eigen3
2323
${EXTERNAL_PROJECT_LOG_ARGS}
2424
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git"
25-
GIT_TAG 70661066beef694cadf6c304d0d07e0758825c10
25+
# eigen on cuda9.1 missing header of math_funtions.hpp
26+
# https://stackoverflow.com/questions/43113508/math-functions-hpp-not-found-when-using-cuda-with-eigen
27+
GIT_TAG 917060c364181f33a735dc023818d5a54f60e54c
2628
PREFIX ${EIGEN_SOURCE_DIR}
2729
UPDATE_COMMAND ""
2830
CONFIGURE_COMMAND ""

cmake/external/warpctc.cmake

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@ ENDIF()
3838
ExternalProject_Add(
3939
extern_warpctc
4040
${EXTERNAL_PROJECT_LOG_ARGS}
41-
GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git"
42-
GIT_TAG b63a0644654a3e0ed624c85a1767bc8193aead09
41+
GIT_REPOSITORY "https://github.com/dzhwinter/warp-ctc.git"
4342
PREFIX ${WARPCTC_SOURCES_DIR}
4443
UPDATE_COMMAND ""
4544
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}

doc/fluid/design/algorithm/parameter_average.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Averaging Parameter in PaddlePaddle
22

33
## Why Averaging
4-
In a large scale machine learning setup where the size of the training data is huge, it could take us a large number of iterations over the training data before we can achieve the optimal values of parameters of our model. Looking at the problem setup, it is desirable if we can obtain the optimal values of parameters by going through the data in as few passes as we can.
4+
In a large scale machine learning setup where the size of the training data is huge, it could take us a large number of iterations over the training data before we can achieve the optimal values of parameters of our model. Looking at the problem setup, it is desirable to obtain the optimal values of parameters by going through the data in as few passes as possible.
55

66
Polyak and Juditsky (1992) showed that the test performance of simple average of parameters obtained by Stochastic Gradient Descent (SGD) is as good as that of parameter values that are obtained by training the model over and over again, over the training dataset.
77

@@ -16,16 +16,16 @@ We propose averaging for any optimizer similar to how ASGD performs it, as menti
1616
### How to perform Parameter Averaging in PaddlePaddle
1717

1818
Parameter Averaging in PaddlePaddle works in the following way during training :
19-
1. It will take in an instance of a normal optimizer as an input, e.g. RMSPropOptimizer
19+
1. It will take in an instance of an optimizer as an input, e.g. RMSPropOptimizer
2020
2. The optimizer itself is responsible for updating the parameters.
2121
3. The ParameterAverageOptimizer maintains a separate copy of the parameters for itself:
22-
1. In concept, the values of this copy are the average of the values of the parameters in the most recent N batches.
23-
2. However, saving all the N instances of the parameters in memory is not feasible.
22+
1. In theory, the values of this copy are the average of the values of the parameters in the most recent N batches.
23+
2. However, saving all N instances of the parameters in memory is not feasible.
2424
3. Therefore, an approximation algorithm is used.
2525

2626
Hence, overall we have have two copies of the parameters: one for the optimizer itself, and one for the ParameterAverageOptimizer. The former should be used in back propagation, while the latter should be used during testing and should be saved.
2727

28-
During the testing/ saving the model phase, we perform the following steps:
28+
During the testing/saving the model phase, we perform the following steps:
2929
1. Perform the delayed operations.
3030
2. Save current values of the parameters to a temporary variable.
3131
3. Replace the values of the parameters with the averaged values.

paddle/cuda/src/hl_cuda_lstm.cu

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -344,9 +344,9 @@ __device__ __forceinline__ void transpose_32x32(real a[], const int idx) {
344344
int addr = idx % 32;
345345
#pragma unroll
346346
for (int k = 1; k < 32; k++) {
347-
// rSrc[k] = __shfl(rSrc[k], (threadIdx.x + k) % 32, 32);
348-
addr = __shfl(addr, (idx + 1) % 32, 32);
349-
a[k] = __shfl(a[k], addr, 32);
347+
// rSrc[k] = __shfl_sync(rSrc[k], (threadIdx.x + k) % 32, 32);
348+
addr = __shfl_sync(addr, (idx + 1) % 32, 32);
349+
a[k] = __shfl_sync(a[k], addr, 32);
350350
}
351351

352352
#pragma unroll
@@ -362,8 +362,8 @@ __device__ __forceinline__ void transpose_32x32(real a[], const int idx) {
362362
addr = (32 - idx) % 32;
363363
#pragma unroll
364364
for (int k = 0; k < 32; k++) {
365-
a[k] = __shfl(a[k], addr, 32);
366-
addr = __shfl(addr, (idx + 31) % 32, 32);
365+
a[k] = __shfl_sync(a[k], addr, 32);
366+
addr = __shfl_sync(addr, (idx + 31) % 32, 32);
367367
}
368368
}
369369

paddle/cuda/src/hl_top_k.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ __device__ __forceinline__ void blockReduce(Pair* shTopK,
250250
}
251251
}
252252
if (maxId[0] / 32 == warp) {
253-
if (__shfl(beam, (maxId[0]) % 32, 32) == maxLength) break;
253+
if (__shfl_sync(beam, (maxId[0]) % 32, 32) == maxLength) break;
254254
}
255255
}
256256
}

paddle/fluid/framework/details/scale_loss_grad_op_handle.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ void ScaleLossGradOpHandle::RunImpl() {
4646
->stream();
4747
memory::Copy(boost::get<platform::CUDAPlace>(place_), tmp,
4848
platform::CPUPlace(), &coeff_, sizeof(float), stream);
49+
VLOG(1) << place_ << "RUN Scale loss grad op";
4950
});
5051
#endif
5152
}

0 commit comments

Comments
 (0)