Skip to content

Commit 4901184

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into float16
2 parents 41bd1f9 + 399d3a2 commit 4901184

File tree

466 files changed

+15489
-18088
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

466 files changed

+15489
-18088
lines changed

.clang-format

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,3 @@ AllowAllParametersOfDeclarationOnNextLine: true
2525
BinPackParameters: false
2626
BinPackArguments: false
2727
...
28-

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ before_install:
4242
script:
4343
- |
4444
timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout
45-
RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi;
45+
RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true ;else exit 1; fi;
4646
- |
4747
if [[ "$JOB" != "build_doc" ]]; then exit 0; fi;
4848
if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;

CMakeLists.txt

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ include(simd)
3636
################################ Configurations #######################################
3737
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
3838
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
39-
option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND})
40-
option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND})
39+
option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND})
4140
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
4241
option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON)
4342
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
@@ -83,10 +82,8 @@ if(ANDROID OR IOS)
8382
"Disable PYTHON when cross-compiling for Android and iOS" FORCE)
8483
set(WITH_RDMA OFF CACHE STRING
8584
"Disable RDMA when cross-compiling for Android and iOS" FORCE)
86-
set(WITH_MKLDNN OFF CACHE STRING
87-
"Disable MKLDNN when cross-compiling for Android and iOS" FORCE)
88-
set(WITH_MKLML OFF CACHE STRING
89-
"Disable MKLML package when cross-compiling for Android and iOS" FORCE)
85+
set(WITH_MKL OFF CACHE STRING
86+
"Disable MKL when cross-compiling for Android and iOS" FORCE)
9087

9188
# Compile PaddlePaddle mobile inference library
9289
if (NOT WITH_C_API)
@@ -112,6 +109,14 @@ else()
112109
set(THIRD_PARTY_BUILD_TYPE Release)
113110
endif()
114111

112+
set(WITH_MKLML ${WITH_MKL})
113+
if (WITH_MKL AND AVX2_FOUND)
114+
set(WITH_MKLDNN ON)
115+
else()
116+
message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN")
117+
set(WITH_MKLDNN OFF)
118+
endif()
119+
115120
########################################################################################
116121

117122
include(external/mklml) # download mklml package
@@ -129,6 +134,8 @@ include(external/any) # download libn::any
129134
include(external/eigen) # download eigen3
130135
include(external/pybind11) # download pybind11
131136
include(external/nccl)
137+
include(external/cares)
138+
include(external/grpc)
132139

133140
include(cudnn) # set cudnn libraries, must before configure
134141
include(configure) # add paddle env configuration
@@ -159,14 +166,15 @@ set(EXTERNAL_LIBS
159166
)
160167

161168
if(WITH_GPU)
162-
list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
163-
if(NOT WITH_DSO)
164-
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
165-
endif(NOT WITH_DSO)
169+
include(cuda)
166170
endif(WITH_GPU)
167171

172+
if(WITH_MKLML)
173+
list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
174+
endif()
175+
168176
if(WITH_MKLDNN)
169-
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB})
177+
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB})
170178
endif()
171179

172180
if(USE_NNPACK)

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ RUN apt-get update && \
2929
automake locales clang-format swig doxygen cmake \
3030
liblapack-dev liblapacke-dev libboost-dev \
3131
clang-3.8 llvm-3.8 libclang-3.8-dev \
32-
net-tools && \
32+
net-tools libtool && \
3333
apt-get clean -y
3434

3535
# Install Go and glide

benchmark/IntelOptimizedPaddle.md

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ Machine:
1212

1313
System: CentOS release 6.3 (Final), Docker 1.12.1.
1414

15-
PaddlePaddle: paddlepaddle/paddle:latest (TODO: will rerun after 0.11.0)
16-
17-
- MKL-DNN tag v0.10
18-
- MKLML 2018.0.20170720
15+
PaddlePaddle: paddlepaddle/paddle:latest (for MKLML and MKL-DNN), paddlepaddle/paddle:latest-openblas (for OpenBLAS)
16+
- MKL-DNN tag v0.11
17+
- MKLML 2018.0.1.20171007
1918
- OpenBLAS v0.2.20
19+
(TODO: will rerun after 0.11.0)
2020

2121
On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively.
2222

@@ -31,17 +31,37 @@ Input image size - 3 * 224 * 224, Time: images/second
3131

3232
| BatchSize | 64 | 128 | 256 |
3333
|--------------|-------| -----| --------|
34-
| OpenBLAS | 7.82 | 8.62 | 10.34 |
35-
| MKLML | 11.02 | 12.86 | 15.33 |
36-
| MKL-DNN | 27.69 | 28.8 | 29.27 |
34+
| OpenBLAS | 7.80 | 9.00 | 10.80 |
35+
| MKLML | 12.12 | 13.70 | 16.18 |
36+
| MKL-DNN | 28.46 | 29.83 | 30.44 |
37+
38+
39+
chart on batch size 128
40+
TBD
41+
42+
- ResNet-50
43+
44+
| BatchSize | 64 | 128 | 256 |
45+
|--------------|-------| ------| -------|
46+
| OpenBLAS | 25.22 | 25.68 | 27.12 |
47+
| MKLML | 32.52 | 31.89 | 33.12 |
48+
| MKL-DNN | 81.69 | 82.35 | 84.08 |
3749

3850

3951
chart on batch size 128
4052
TBD
4153

42-
- ResNet
4354
- GoogLeNet
4455

56+
| BatchSize | 64 | 128 | 256 |
57+
|--------------|-------| ------| -------|
58+
| OpenBLAS | 89.52 | 96.97 | 108.25 |
59+
| MKLML | 128.46| 137.89| 158.63 |
60+
| MKL-DNN     | 250.46| 264.83| 269.50 |
61+
62+
chart on batch size 128
63+
TBD
64+
4565
### Laptop
4666
TBD
4767
### Desktop

benchmark/paddle/image/googlenet.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
width = 224
66
num_class = 1000
77
batch_size = get_config_arg('batch_size', int, 128)
8+
use_gpu = get_config_arg('use_gpu', bool, True)
89

910
args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
1011
define_py_data_sources2(
@@ -16,6 +17,8 @@
1617
learning_method=MomentumOptimizer(0.9),
1718
regularization=L2Regularization(0.0005 * batch_size))
1819

20+
conv_projection = conv_projection if use_gpu else img_conv_layer
21+
1922
def inception2(name, input, channels, \
2023
filter1,
2124
filter3R, filter3,
@@ -138,7 +141,7 @@ def inception(name, input, channels, \
138141
cat = concat_layer(
139142
name=name,
140143
input=[cov1, cov3, cov5, covprj],
141-
bias_attr=True,
144+
bias_attr=True if use_gpu else False,
142145
act=ReluActivation())
143146
return cat
144147

benchmark/paddle/image/run_mkldnn.sh

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
set -e
22

33
function train() {
4-
unset OMP_NUM_THREADS MKL_NUM_THREADS
5-
export OMP_DYNAMIC="FALSE"
6-
export KMP_AFFINITY="granularity=fine,compact,0,0"
4+
unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY
75
topology=$1
86
layer_num=$2
97
bs=$3
@@ -14,8 +12,6 @@ function train() {
1412
elif [ $4 == "False" ]; then
1513
thread=`nproc`
1614
# each trainer_count use only 1 core to avoid conflict
17-
export OMP_NUM_THREADS=1
18-
export MKL_NUM_THREADS=1
1915
log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log"
2016
else
2117
echo "Wrong input $3, use True or False."
@@ -44,6 +40,7 @@ fi
4440
for use_mkldnn in True False; do
4541
for batchsize in 64 128 256; do
4642
train vgg 19 $batchsize $use_mkldnn
47-
train resnet 50 $batchsize $use_mkldnn
43+
train resnet 50 $batchsize $use_mkldnn
44+
train googlenet v1 $batchsize $use_mkldnn
4845
done
4946
done

cmake/configure.cmake

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -81,27 +81,14 @@ else()
8181
include_directories(${CUDA_TOOLKIT_INCLUDE})
8282
endif(NOT WITH_GPU)
8383

84-
if(WITH_MKLDNN)
85-
add_definitions(-DPADDLE_USE_MKLDNN)
86-
if (WITH_MKLML AND MKLDNN_IOMP_DIR)
87-
message(STATUS "Enable Intel OpenMP at ${MKLDNN_IOMP_DIR}")
88-
set(OPENMP_FLAGS "-fopenmp")
89-
set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
90-
set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
91-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}")
92-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}")
93-
else()
94-
find_package(OpenMP)
95-
if(OPENMP_FOUND)
96-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
97-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
98-
else()
99-
message(WARNING "Can not find OpenMP."
100-
"Some performance features in MKLDNN may not be available")
101-
endif()
102-
endif()
103-
104-
endif(WITH_MKLDNN)
84+
if (WITH_MKLML AND MKLML_IOMP_LIB)
85+
message(STATUS "Enable Intel OpenMP with ${MKLML_IOMP_LIB}")
86+
set(OPENMP_FLAGS "-fopenmp")
87+
set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
88+
set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
89+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}")
90+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}")
91+
endif()
10592

10693
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}")
10794
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}")

cmake/cross_compiling/ios.cmake

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
7676
# Set the architecture for iOS
7777
if(NOT DEFINED IOS_ARCH)
7878
if(IOS_PLATFORM STREQUAL "OS")
79-
# FIXME(liuyiqun): support "armv7;armv7s;arm64" future
80-
set(IOS_ARCH "arm64")
79+
set(IOS_ARCH "armv7;armv7s;arm64")
8180
elseif(IOS_PLATFORM STREQUAL "SIMULATOR")
82-
# FIXME(liuyiqun): support "i386;x86_64" future
83-
set(IOS_ARCH "x86_64")
81+
set(IOS_ARCH "i386;x86_64")
8482
endif()
8583
endif()
8684
set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS")
@@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_
248246

249247
# Hidden visibilty is required for cxx on iOS
250248
set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags")
251-
set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags")
249+
set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags")
252250

253251
set(IOS_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first")
254252

0 commit comments

Comments
 (0)