Skip to content

Commit ee3483b

Browse files
committed
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into fix_404_dist_train
2 parents 10acacf + 3644446 commit ee3483b

File tree

192 files changed

+6401
-2408
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

192 files changed

+6401
-2408
lines changed

CMakeLists.txt

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ cmake_minimum_required(VERSION 3.0)
1616
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
1717
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
1818
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
19+
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
20+
SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
1921

2022
include(system)
2123

@@ -54,6 +56,7 @@ option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF)
5456
option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF)
5557
option(GLIDE_INSTALL "Download and install go dependencies " ON)
5658
option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
59+
option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF)
5760
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
5861

5962
# CMAKE_BUILD_TYPE
@@ -67,9 +70,6 @@ if(ANDROID OR IOS)
6770
if(ANDROID)
6871
if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "16")
6972
message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 16")
70-
elseif(${CMAKE_SYSTEM_VERSION} VERSION_LESS "21")
71-
# TODO: support glog for Android api 16 ~ 19 in the future
72-
message(WARNING "Using the unofficial git repository <https://github.com/Xreki/glog.git> instead")
7373
endif()
7474
endif()
7575

@@ -83,6 +83,8 @@ if(ANDROID OR IOS)
8383
"Disable RDMA when cross-compiling for Android and iOS" FORCE)
8484
set(WITH_MKL OFF CACHE STRING
8585
"Disable MKL when cross-compiling for Android and iOS" FORCE)
86+
set(WITH_GOLANG OFF CACHE STRING
87+
"Disable golang when cross-compiling for Android and iOS" FORCE)
8688

8789
# Compile PaddlePaddle mobile inference library
8890
if (NOT WITH_C_API)

benchmark/paddle/image/googlenet.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,21 @@
66
num_class = 1000
77
batch_size = get_config_arg('batch_size', int, 128)
88
use_gpu = get_config_arg('use_gpu', bool, True)
9-
10-
args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
9+
is_infer = get_config_arg("is_infer", bool, False)
10+
11+
args = {
12+
'height': height,
13+
'width': width,
14+
'color': True,
15+
'num_class': num_class,
16+
'is_infer': is_infer
17+
}
1118
define_py_data_sources2(
12-
"train.list", None, module="provider", obj="process", args=args)
19+
"train.list" if not is_infer else None,
20+
"test.list" if is_infer else None,
21+
module="provider",
22+
obj="process",
23+
args=args)
1324

1425
settings(
1526
batch_size=batch_size,
@@ -146,7 +157,6 @@ def inception(name, input, channels, \
146157
return cat
147158

148159

149-
lab = data_layer(name="label", size=1000)
150160
data = data_layer(name="input", size=3 * height * width)
151161

152162
# stage 1
@@ -224,6 +234,10 @@ def inception(name, input, channels, \
224234
dropout = dropout_layer(name="dropout", input=pool5, dropout_rate=0.4)
225235
out3 = fc_layer(
226236
name="output3", input=dropout, size=1000, act=SoftmaxActivation())
227-
loss3 = cross_entropy(name='loss3', input=out3, label=lab)
228237

229-
outputs(loss3)
238+
if is_infer:
239+
outputs(out3)
240+
else:
241+
lab = data_layer(name="label", size=num_class)
242+
loss3 = cross_entropy(name='loss3', input=out3, label=lab)
243+
outputs(loss3)

benchmark/paddle/image/provider.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,20 @@ def initHook(settings, height, width, color, num_class, **kwargs):
1313
settings.data_size = settings.height * settings.width * 3
1414
else:
1515
settings.data_size = settings.height * settings.width
16-
17-
settings.slots = [dense_vector(settings.data_size), integer_value(1)]
16+
settings.is_infer = kwargs.get('is_infer', False)
17+
if settings.is_infer:
18+
settings.slots = [dense_vector(settings.data_size)]
19+
else:
20+
settings.slots = [dense_vector(settings.data_size), integer_value(1)]
1821

1922

2023
@provider(
2124
init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
2225
def process(settings, file_list):
23-
for i in xrange(1024):
26+
for i in xrange(2560 if settings.is_infer else 1024):
2427
img = np.random.rand(1, settings.data_size).reshape(-1, 1).flatten()
25-
lab = random.randint(0, settings.num_class - 1)
26-
yield img.astype('float32'), int(lab)
28+
if settings.is_infer:
29+
yield img.astype('float32')
30+
else:
31+
lab = random.randint(0, settings.num_class - 1)
32+
yield img.astype('float32'), int(lab)

benchmark/paddle/image/resnet.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,21 @@
66
num_class = 1000
77
batch_size = get_config_arg('batch_size', int, 64)
88
layer_num = get_config_arg("layer_num", int, 50)
9-
is_test = get_config_arg("is_test", bool, False)
10-
11-
args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
9+
is_infer = get_config_arg("is_infer", bool, False)
10+
11+
args = {
12+
'height': height,
13+
'width': width,
14+
'color': True,
15+
'num_class': num_class,
16+
'is_infer': is_infer
17+
}
1218
define_py_data_sources2(
13-
"train.list", None, module="provider", obj="process", args=args)
19+
"train.list" if not is_infer else None,
20+
"test.list" if is_infer else None,
21+
module="provider",
22+
obj="process",
23+
args=args)
1424

1525
settings(
1626
batch_size=batch_size,
@@ -45,7 +55,10 @@ def conv_bn_layer(name,
4555
act=LinearActivation(),
4656
bias_attr=False)
4757
return batch_norm_layer(
48-
name=name + "_bn", input=tmp, act=active_type, use_global_stats=is_test)
58+
name=name + "_bn",
59+
input=tmp,
60+
act=active_type,
61+
use_global_stats=is_infer)
4962

5063

5164
def bottleneck_block(name, input, num_filters1, num_filters2):
@@ -207,7 +220,9 @@ def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
207220
else:
208221
print("Wrong layer number.")
209222

210-
lbl = data_layer(name="label", size=num_class)
211-
loss = cross_entropy(name='loss', input=resnet, label=lbl)
212-
inputs(img, lbl)
213-
outputs(loss)
223+
if is_infer:
224+
outputs(resnet)
225+
else:
226+
lbl = data_layer(name="label", size=num_class)
227+
loss = cross_entropy(name='loss', input=resnet, label=lbl)
228+
outputs(loss)
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
set -e
2+
3+
function clock_to_seconds() {
4+
hours=`echo $1 | awk -F ':' '{print $1}'`
5+
mins=`echo $1 | awk -F ':' '{print $2}'`
6+
secs=`echo $1 | awk -F ':' '{print $3}'`
7+
echo `awk 'BEGIN{printf "%.2f",('$secs' + '$mins' * 60 + '$hours' * 3600)}'`
8+
}
9+
10+
function infer() {
11+
unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY
12+
topology=$1
13+
layer_num=$2
14+
bs=$3
15+
use_mkldnn=$4
16+
if [ $4 == "True" ]; then
17+
thread=1
18+
log="logs/infer-${topology}-${layer_num}-mkldnn-${bs}.log"
19+
elif [ $4 == "False" ]; then
20+
thread=`nproc`
21+
if [ $thread -gt $bs ]; then
22+
thread=$bs
23+
fi
24+
log="logs/infer-${topology}-${layer_num}-${thread}mklml-${bs}.log"
25+
else
26+
echo "Wrong input $4, use True or False."
27+
exit 0
28+
fi
29+
30+
models_in="models/${topology}-${layer_num}/pass-00000/"
31+
if [ ! -d $models_in ]; then
32+
echo "Training model ${topology}_${layer_num}"
33+
paddle train --job=train \
34+
--config="${topology}.py" \
35+
--use_mkldnn=True \
36+
--use_gpu=False \
37+
--trainer_count=1 \
38+
--num_passes=1 \
39+
--save_dir="models/${topology}-${layer_num}" \
40+
--config_args="batch_size=128,layer_num=${layer_num}" \
41+
> /dev/null 2>&1
42+
echo "Done"
43+
fi
44+
log_period=$((256 / bs))
45+
paddle train --job=test \
46+
--config="${topology}.py" \
47+
--use_mkldnn=$use_mkldnn \
48+
--use_gpu=False \
49+
--trainer_count=$thread \
50+
--log_period=$log_period \
51+
--config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True" \
52+
--init_model_path=$models_in \
53+
2>&1 | tee ${log}
54+
55+
# calculate the last 5 logs period time of 1280 samples,
56+
# the time before are burning time.
57+
start=`tail ${log} -n 7 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
58+
end=`tail ${log} -n 2 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
59+
start_sec=`clock_to_seconds $start`
60+
end_sec=`clock_to_seconds $end`
61+
fps=`awk 'BEGIN{printf "%.2f",(1280 / ('$end_sec' - '$start_sec'))}'`
62+
echo "Last 1280 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log}
63+
echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
64+
}
65+
66+
if [ ! -f "train.list" ]; then
67+
echo " " > train.list
68+
fi
69+
if [ ! -f "test.list" ]; then
70+
echo " " > test.list
71+
fi
72+
if [ ! -d "logs" ]; then
73+
mkdir logs
74+
fi
75+
if [ ! -d "models" ]; then
76+
mkdir -p models
77+
fi
78+
79+
# inference benchmark
80+
for use_mkldnn in True False; do
81+
for batchsize in 1 2 4 8 16; do
82+
infer googlenet v1 $batchsize $use_mkldnn
83+
infer resnet 50 $batchsize $use_mkldnn
84+
infer vgg 19 $batchsize $use_mkldnn
85+
done
86+
done

benchmark/paddle/image/run_mkldnn.sh renamed to benchmark/paddle/image/run_mkldnn_train.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ function train() {
88
use_mkldnn=$4
99
if [ $4 == "True" ]; then
1010
thread=1
11-
log="logs/${topology}-${layer_num}-mkldnn-${bs}.log"
11+
log="logs/train-${topology}-${layer_num}-mkldnn-${bs}.log"
1212
elif [ $4 == "False" ]; then
1313
thread=`nproc`
1414
# each trainer_count use only 1 core to avoid conflict
15-
log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log"
15+
log="logs/train-${topology}-${layer_num}-${thread}mklml-${bs}.log"
1616
else
17-
echo "Wrong input $3, use True or False."
17+
echo "Wrong input $4, use True or False."
1818
exit 0
1919
fi
2020
args="batch_size=${bs},layer_num=${layer_num}"
@@ -30,13 +30,14 @@ function train() {
3030
2>&1 | tee ${log}
3131
}
3232

33-
if [ ! -d "train.list" ]; then
33+
if [ ! -f "train.list" ]; then
3434
echo " " > train.list
3535
fi
3636
if [ ! -d "logs" ]; then
3737
mkdir logs
3838
fi
3939

40+
# training benchmark
4041
for use_mkldnn in True False; do
4142
for batchsize in 64 128 256; do
4243
train vgg 19 $batchsize $use_mkldnn

benchmark/paddle/image/vgg.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,21 @@
66
num_class = 1000
77
batch_size = get_config_arg('batch_size', int, 64)
88
layer_num = get_config_arg('layer_num', int, 19)
9+
is_infer = get_config_arg("is_infer", bool, False)
910

10-
args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
11+
args = {
12+
'height': height,
13+
'width': width,
14+
'color': True,
15+
'num_class': num_class,
16+
'is_infer': is_infer
17+
}
1118
define_py_data_sources2(
12-
"train.list", None, module="provider", obj="process", args=args)
19+
"train.list" if not is_infer else None,
20+
"test.list" if is_infer else None,
21+
module="provider",
22+
obj="process",
23+
args=args)
1324

1425
settings(
1526
batch_size=batch_size,
@@ -98,6 +109,9 @@ def vgg_network(vgg_num=3):
98109
else:
99110
print("Wrong layer number.")
100111

101-
lab = data_layer('label', num_class)
102-
loss = cross_entropy(input=vgg, label=lab)
103-
outputs(loss)
112+
if is_infer:
113+
outputs(vgg)
114+
else:
115+
lab = data_layer('label', num_class)
116+
loss = cross_entropy(input=vgg, label=lab)
117+
outputs(loss)

cmake/external/cares.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414
#
1515

16-
IF(MOBILE_INFERENCE)
16+
IF(MOBILE_INFERENCE OR NOT WITH_DISTRIBUTE)
1717
return()
1818
ENDIF()
1919

cmake/external/glog.cmake

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,21 @@ ENDIF(WIN32)
2626

2727
INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
2828

29+
IF(ANDROID AND ${CMAKE_SYSTEM_VERSION} VERSION_LESS "21")
30+
# Using the unofficial glog for Android API < 21
31+
SET(GLOG_REPOSITORY "https://github.com/Xreki/glog.git")
32+
SET(GLOG_TAG "8a547150548b284382ccb6582408e9140ff2bea8")
33+
ELSE()
34+
SET(GLOG_REPOSITORY "https://github.com/google/glog.git")
35+
SET(GLOG_TAG "v0.3.5")
36+
ENDIF()
37+
2938
ExternalProject_Add(
3039
extern_glog
3140
${EXTERNAL_PROJECT_LOG_ARGS}
3241
DEPENDS gflags
33-
GIT_REPOSITORY "https://github.com/google/glog.git"
34-
GIT_TAG v0.3.5
42+
GIT_REPOSITORY ${GLOG_REPOSITORY}
43+
GIT_TAG ${GLOG_TAG}
3544
PREFIX ${GLOG_SOURCES_DIR}
3645
UPDATE_COMMAND ""
3746
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}

cmake/external/grpc.cmake

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414
#
1515

16-
IF(MOBILE_INFERENCE)
16+
IF(MOBILE_INFERENCE OR NOT WITH_DISTRIBUTE)
1717
return()
1818
ENDIF()
1919

@@ -23,6 +23,11 @@ SET(GRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/grpc)
2323
SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc)
2424
SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE)
2525
SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE)
26+
IF(APPLE)
27+
SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin | sed "s/-Werror//g" | sh)
28+
ELSE()
29+
SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin)
30+
ENDIF()
2631

2732
ExternalProject_Add(
2833
extern_grpc
@@ -33,7 +38,11 @@ ExternalProject_Add(
3338
UPDATE_COMMAND ""
3439
CONFIGURE_COMMAND ""
3540
BUILD_IN_SOURCE 1
36-
BUILD_COMMAND make
41+
# NOTE(yuyang18):
42+
# Disable -Werror, otherwise the compile will fail in MacOS.
43+
# It seems that we cannot configure that by make command.
44+
# Just dry run make command and remove `-Werror`, then use a shell to run make commands
45+
BUILD_COMMAND ${BUILD_CMD}
3746
INSTALL_COMMAND make prefix=${GRPC_INSTALL_DIR} install
3847
)
3948

@@ -55,4 +64,3 @@ SET_PROPERTY(TARGET grpc_unsecure PROPERTY IMPORTED_LOCATION
5564

5665
include_directories(${GRPC_INCLUDE_DIR})
5766
ADD_DEPENDENCIES(grpc++_unsecure extern_grpc)
58-

0 commit comments

Comments
 (0)