Skip to content

Commit 989ee8b

Browse files
author
Kavya Srinet
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into design_doc_edit
2 parents 9f2dbc4 + 659c937 commit 989ee8b

File tree

301 files changed

+9565
-13279
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

301 files changed

+9565
-13279
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,3 @@ cmake_install.cmake
2828
paddle/.timestamp
2929
python/paddlepaddle.egg-info/
3030
paddle/pybind/pybind.h
31-
python/paddle/v2/framework/tests/tmp/*

CMakeLists.txt

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ include(simd)
3636
################################ Configurations #######################################
3737
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
3838
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
39-
option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND})
40-
option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND})
39+
option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND})
4140
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
4241
option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON)
4342
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
@@ -82,10 +81,8 @@ if(ANDROID OR IOS)
8281
"Disable PYTHON when cross-compiling for Android and iOS" FORCE)
8382
set(WITH_RDMA OFF CACHE STRING
8483
"Disable RDMA when cross-compiling for Android and iOS" FORCE)
85-
set(WITH_MKLDNN OFF CACHE STRING
86-
"Disable MKLDNN when cross-compiling for Android and iOS" FORCE)
87-
set(WITH_MKLML OFF CACHE STRING
88-
"Disable MKLML package when cross-compiling for Android and iOS" FORCE)
84+
set(WITH_MKL OFF CACHE STRING
85+
"Disable MKL when cross-compiling for Android and iOS" FORCE)
8986

9087
# Compile PaddlePaddle mobile inference library
9188
if (NOT WITH_C_API)
@@ -111,6 +108,14 @@ else()
111108
set(THIRD_PARTY_BUILD_TYPE Release)
112109
endif()
113110

111+
set(WITH_MKLML ${WITH_MKL})
112+
if (WITH_MKL AND AVX2_FOUND)
113+
set(WITH_MKLDNN ON)
114+
else()
115+
message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN")
116+
set(WITH_MKLDNN OFF)
117+
endif()
118+
114119
########################################################################################
115120

116121
include(external/mklml) # download mklml package
@@ -158,14 +163,15 @@ set(EXTERNAL_LIBS
158163
)
159164

160165
if(WITH_GPU)
161-
list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
162-
if(NOT WITH_DSO)
163-
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
164-
endif(NOT WITH_DSO)
166+
include(cuda)
165167
endif(WITH_GPU)
166168

169+
if(WITH_MKLML)
170+
list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
171+
endif()
172+
167173
if(WITH_MKLDNN)
168-
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB})
174+
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB})
169175
endif()
170176

171177
if(USE_NNPACK)

benchmark/IntelOptimizedPaddle.md

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ Machine:
1212

1313
System: CentOS release 6.3 (Final), Docker 1.12.1.
1414

15-
PaddlePaddle: paddlepaddle/paddle:latest (TODO: will rerun after 0.11.0)
16-
17-
- MKL-DNN tag v0.10
18-
- MKLML 2018.0.20170720
15+
PaddlePaddle: paddlepaddle/paddle:latest (for MKLML and MKL-DNN), paddlepaddle/paddle:latest-openblas (for OpenBLAS)
16+
- MKL-DNN tag v0.11
17+
- MKLML 2018.0.1.20171007
1918
- OpenBLAS v0.2.20
19+
(TODO: will rerun after 0.11.0)
2020

2121
On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively.
2222

@@ -31,17 +31,37 @@ Input image size - 3 * 224 * 224, Time: images/second
3131

3232
| BatchSize | 64 | 128 | 256 |
3333
|--------------|-------| -----| --------|
34-
| OpenBLAS | 7.82 | 8.62 | 10.34 |
35-
| MKLML | 11.02 | 12.86 | 15.33 |
36-
| MKL-DNN | 27.69 | 28.8 | 29.27 |
34+
| OpenBLAS | 7.80 | 9.00 | 10.80 |
35+
| MKLML | 12.12 | 13.70 | 16.18 |
36+
| MKL-DNN | 28.46 | 29.83 | 30.44 |
37+
38+
39+
chart on batch size 128
40+
TBD
41+
42+
- ResNet-50
43+
44+
| BatchSize | 64 | 128 | 256 |
45+
|--------------|-------| ------| -------|
46+
| OpenBLAS | 25.22 | 25.68 | 27.12 |
47+
| MKLML | 32.52 | 31.89 | 33.12 |
48+
| MKL-DNN | 81.69 | 82.35 | 84.08 |
3749

3850

3951
chart on batch size 128
4052
TBD
4153

42-
- ResNet
4354
- GoogLeNet
4455

56+
| BatchSize | 64 | 128 | 256 |
57+
|--------------|-------| ------| -------|
58+
| OpenBLAS | 89.52 | 96.97 | 108.25 |
59+
| MKLML | 128.46| 137.89| 158.63 |
60+
| MKL-DNN     | 250.46| 264.83| 269.50 |
61+
62+
chart on batch size 128
63+
TBD
64+
4565
### Laptop
4666
TBD
4767
### Desktop

benchmark/paddle/image/googlenet.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
width = 224
66
num_class = 1000
77
batch_size = get_config_arg('batch_size', int, 128)
8+
use_gpu = get_config_arg('use_gpu', bool, True)
89

910
args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
1011
define_py_data_sources2(
@@ -16,6 +17,8 @@
1617
learning_method=MomentumOptimizer(0.9),
1718
regularization=L2Regularization(0.0005 * batch_size))
1819

20+
conv_projection = conv_projection if use_gpu else img_conv_layer
21+
1922
def inception2(name, input, channels, \
2023
filter1,
2124
filter3R, filter3,
@@ -138,7 +141,7 @@ def inception(name, input, channels, \
138141
cat = concat_layer(
139142
name=name,
140143
input=[cov1, cov3, cov5, covprj],
141-
bias_attr=True,
144+
bias_attr=True if use_gpu else False,
142145
act=ReluActivation())
143146
return cat
144147

benchmark/paddle/image/run_mkldnn.sh

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
set -e
22

33
function train() {
4-
unset OMP_NUM_THREADS MKL_NUM_THREADS
5-
export OMP_DYNAMIC="FALSE"
6-
export KMP_AFFINITY="granularity=fine,compact,0,0"
4+
unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY
75
topology=$1
86
layer_num=$2
97
bs=$3
@@ -14,8 +12,6 @@ function train() {
1412
elif [ $4 == "False" ]; then
1513
thread=`nproc`
1614
# each trainer_count use only 1 core to avoid conflict
17-
export OMP_NUM_THREADS=1
18-
export MKL_NUM_THREADS=1
1915
log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log"
2016
else
2117
echo "Wrong input $3, use True or False."
@@ -44,6 +40,7 @@ fi
4440
for use_mkldnn in True False; do
4541
for batchsize in 64 128 256; do
4642
train vgg 19 $batchsize $use_mkldnn
47-
train resnet 50 $batchsize $use_mkldnn
43+
train resnet 50 $batchsize $use_mkldnn
44+
train googlenet v1 $batchsize $use_mkldnn
4845
done
4946
done

cmake/configure.cmake

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -76,27 +76,14 @@ else()
7676
include_directories(${CUDA_TOOLKIT_INCLUDE})
7777
endif(NOT WITH_GPU)
7878

79-
if(WITH_MKLDNN)
80-
add_definitions(-DPADDLE_USE_MKLDNN)
81-
if (WITH_MKLML AND MKLDNN_IOMP_DIR)
82-
message(STATUS "Enable Intel OpenMP at ${MKLDNN_IOMP_DIR}")
83-
set(OPENMP_FLAGS "-fopenmp")
84-
set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
85-
set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
86-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}")
87-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}")
88-
else()
89-
find_package(OpenMP)
90-
if(OPENMP_FOUND)
91-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
92-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
93-
else()
94-
message(WARNING "Can not find OpenMP."
95-
"Some performance features in MKLDNN may not be available")
96-
endif()
97-
endif()
98-
99-
endif(WITH_MKLDNN)
79+
if (WITH_MKLML AND MKLML_IOMP_LIB)
80+
message(STATUS "Enable Intel OpenMP with ${MKLML_IOMP_LIB}")
81+
set(OPENMP_FLAGS "-fopenmp")
82+
set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
83+
set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
84+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}")
85+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}")
86+
endif()
10087

10188
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}")
10289
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}")

cmake/cross_compiling/ios.cmake

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
7676
# Set the architecture for iOS
7777
if(NOT DEFINED IOS_ARCH)
7878
if(IOS_PLATFORM STREQUAL "OS")
79-
# FIXME(liuyiqun): support "armv7;armv7s;arm64" future
80-
set(IOS_ARCH "arm64")
79+
set(IOS_ARCH "armv7;armv7s;arm64")
8180
elseif(IOS_PLATFORM STREQUAL "SIMULATOR")
82-
# FIXME(liuyiqun): support "i386;x86_64" future
83-
set(IOS_ARCH "x86_64")
81+
set(IOS_ARCH "i386;x86_64")
8482
endif()
8583
endif()
8684
set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS")
@@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_
248246

249247
# Hidden visibilty is required for cxx on iOS
250248
set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags")
251-
set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags")
249+
set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags")
252250

253251
set(IOS_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first")
254252

0 commit comments

Comments
 (0)