PaddlePaddle
diff --git a/‎CMakeLists.txt
Lines changed: 5 additions & 3 deletions b/‎CMakeLists.txt
Lines changed: 5 additions & 3 deletions
diff --git a/‎benchmark/paddle/image/googlenet.py
Lines changed: 20 additions & 6 deletions b/‎benchmark/paddle/image/googlenet.py
Lines changed: 20 additions & 6 deletions
diff --git a/‎benchmark/paddle/image/provider.py
Lines changed: 11 additions & 5 deletions b/‎benchmark/paddle/image/provider.py
Lines changed: 11 additions & 5 deletions
diff --git a/‎benchmark/paddle/image/resnet.py
Lines changed: 24 additions & 9 deletions b/‎benchmark/paddle/image/resnet.py
Lines changed: 24 additions & 9 deletions
diff --git a/‎benchmark/paddle/image/run_mkldnn_infer.sh
Lines changed: 86 additions & 0 deletions b/‎benchmark/paddle/image/run_mkldnn_infer.sh
Lines changed: 86 additions & 0 deletions
diff --git a/‎benchmark/paddle/image/run_mkldnn.sh renamed to ‎benchmark/paddle/image/run_mkldnn_train.sh
Lines changed: 5 additions & 4 deletions b/‎benchmark/paddle/image/run_mkldnn.sh renamed to ‎benchmark/paddle/image/run_mkldnn_train.sh
Lines changed: 5 additions & 4 deletions
diff --git a/‎benchmark/paddle/image/vgg.py
Lines changed: 19 additions & 5 deletions b/‎benchmark/paddle/image/vgg.py
Lines changed: 19 additions & 5 deletions
diff --git a/‎cmake/external/cares.cmake
Lines changed: 1 addition & 1 deletion b/‎cmake/external/cares.cmake
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmake/external/glog.cmake
Lines changed: 11 additions & 2 deletions b/‎cmake/external/glog.cmake
Lines changed: 11 additions & 2 deletions
diff --git a/‎cmake/external/grpc.cmake
Lines changed: 11 additions & 3 deletions b/‎cmake/external/grpc.cmake
Lines changed: 11 additions & 3 deletions
@@ -16,6 +16,8 @@ cmake_minimum_required(VERSION 3.0)
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
+SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
+SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
 
 include(system)
 
@@ -54,6 +56,7 @@ option(WITH_C_API       "Compile PaddlePaddle with C-API(Prediction)"   OFF)
 option(WITH_GOLANG      "Compile PaddlePaddle with GOLANG"              OFF)
 option(GLIDE_INSTALL    "Download and install go dependencies "         ON)
 option(USE_NNPACK       "Compile PaddlePaddle with NNPACK library"      OFF)
+option(WITH_DISTRIBUTE  "Compile with grpc distributed support"         OFF)
 option(USE_EIGEN_FOR_BLAS   "Use matrix multiplication in Eigen"        OFF)
 
 # CMAKE_BUILD_TYPE
@@ -67,9 +70,6 @@ if(ANDROID OR IOS)
     if(ANDROID)
         if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "16")
             message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 16")
-        elseif(${CMAKE_SYSTEM_VERSION} VERSION_LESS "21")
-            # TODO: support glog for Android api 16 ~ 19 in the future
-            message(WARNING "Using the unofficial git repository <https://github.com/Xreki/glog.git> instead")
         endif()
     endif()
 
@@ -83,6 +83,8 @@ if(ANDROID OR IOS)
         "Disable RDMA when cross-compiling for Android and iOS" FORCE)
     set(WITH_MKL OFF CACHE STRING
         "Disable MKL when cross-compiling for Android and iOS" FORCE)
+    set(WITH_GOLANG OFF CACHE STRING
+        "Disable golang when cross-compiling for Android and iOS" FORCE)
 
     # Compile PaddlePaddle mobile inference library
     if (NOT WITH_C_API)
 
@@ -6,10 +6,21 @@
 num_class = 1000
 batch_size = get_config_arg('batch_size', int, 128)
 use_gpu = get_config_arg('use_gpu', bool, True)
-
-args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
+is_infer = get_config_arg("is_infer", bool, False)
+
+args = {
+    'height': height,
+    'width': width,
+    'color': True,
+    'num_class': num_class,
+    'is_infer': is_infer
+}
 define_py_data_sources2(
-    "train.list", None, module="provider", obj="process", args=args)
+    "train.list" if not is_infer else None,
+    "test.list" if is_infer else None,
+    module="provider",
+    obj="process",
+    args=args)
 
 settings(
     batch_size=batch_size,
@@ -146,7 +157,6 @@ def inception(name, input, channels, \
     return cat
 
 
-lab = data_layer(name="label", size=1000)
 data = data_layer(name="input", size=3 * height * width)
 
 # stage 1
@@ -224,6 +234,10 @@ def inception(name, input, channels, \
 dropout = dropout_layer(name="dropout", input=pool5, dropout_rate=0.4)
 out3 = fc_layer(
     name="output3", input=dropout, size=1000, act=SoftmaxActivation())
-loss3 = cross_entropy(name='loss3', input=out3, label=lab)
 
-outputs(loss3)
+if is_infer:
+    outputs(out3)
+else:
+    lab = data_layer(name="label", size=num_class)
+    loss3 = cross_entropy(name='loss3', input=out3, label=lab)
+    outputs(loss3)
@@ -13,14 +13,20 @@ def initHook(settings, height, width, color, num_class, **kwargs):
         settings.data_size = settings.height * settings.width * 3
     else:
         settings.data_size = settings.height * settings.width
-
-    settings.slots = [dense_vector(settings.data_size), integer_value(1)]
+    settings.is_infer = kwargs.get('is_infer', False)
+    if settings.is_infer:
+        settings.slots = [dense_vector(settings.data_size)]
+    else:
+        settings.slots = [dense_vector(settings.data_size), integer_value(1)]
 
 
 @provider(
     init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
 def process(settings, file_list):
-    for i in xrange(1024):
+    for i in xrange(2560 if settings.is_infer else 1024):
         img = np.random.rand(1, settings.data_size).reshape(-1, 1).flatten()
-        lab = random.randint(0, settings.num_class - 1)
-        yield img.astype('float32'), int(lab)
+        if settings.is_infer:
+            yield img.astype('float32')
+        else:
+            lab = random.randint(0, settings.num_class - 1)
+            yield img.astype('float32'), int(lab)
@@ -6,11 +6,21 @@
 num_class = 1000
 batch_size = get_config_arg('batch_size', int, 64)
 layer_num = get_config_arg("layer_num", int, 50)
-is_test = get_config_arg("is_test", bool, False)
-
-args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
+is_infer = get_config_arg("is_infer", bool, False)
+
+args = {
+    'height': height,
+    'width': width,
+    'color': True,
+    'num_class': num_class,
+    'is_infer': is_infer
+}
 define_py_data_sources2(
-    "train.list", None, module="provider", obj="process", args=args)
+    "train.list" if not is_infer else None,
+    "test.list" if is_infer else None,
+    module="provider",
+    obj="process",
+    args=args)
 
 settings(
     batch_size=batch_size,
@@ -45,7 +55,10 @@ def conv_bn_layer(name,
         act=LinearActivation(),
         bias_attr=False)
     return batch_norm_layer(
-        name=name + "_bn", input=tmp, act=active_type, use_global_stats=is_test)
+        name=name + "_bn",
+        input=tmp,
+        act=active_type,
+        use_global_stats=is_infer)
 
 
 def bottleneck_block(name, input, num_filters1, num_filters2):
@@ -207,7 +220,9 @@ def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
 else:
     print("Wrong layer number.")
 
-lbl = data_layer(name="label", size=num_class)
-loss = cross_entropy(name='loss', input=resnet, label=lbl)
-inputs(img, lbl)
-outputs(loss)
+if is_infer:
+    outputs(resnet)
+else:
+    lbl = data_layer(name="label", size=num_class)
+    loss = cross_entropy(name='loss', input=resnet, label=lbl)
+    outputs(loss)
@@ -0,0 +1,86 @@
+set -e
+
+function clock_to_seconds() {
+  hours=`echo $1 | awk -F ':' '{print $1}'`
+  mins=`echo $1 | awk -F ':' '{print $2}'`
+  secs=`echo $1 | awk -F ':' '{print $3}'`
+  echo `awk 'BEGIN{printf "%.2f",('$secs' + '$mins' * 60 + '$hours' * 3600)}'`
+}
+
+function infer() {
+  unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY
+  topology=$1
+  layer_num=$2
+  bs=$3
+  use_mkldnn=$4
+  if [ $4 == "True" ]; then
+    thread=1
+    log="logs/infer-${topology}-${layer_num}-mkldnn-${bs}.log"
+  elif [ $4 == "False" ]; then
+    thread=`nproc`
+    if [ $thread -gt $bs ]; then
+      thread=$bs
+    fi
+    log="logs/infer-${topology}-${layer_num}-${thread}mklml-${bs}.log"
+  else
+    echo "Wrong input $4, use True or False."
+    exit 0
+  fi
+
+  models_in="models/${topology}-${layer_num}/pass-00000/"
+  if [ ! -d $models_in ]; then
+    echo "Training model ${topology}_${layer_num}"
+    paddle train --job=train \
+      --config="${topology}.py" \
+      --use_mkldnn=True \
+      --use_gpu=False \
+      --trainer_count=1 \
+      --num_passes=1 \
+      --save_dir="models/${topology}-${layer_num}" \
+      --config_args="batch_size=128,layer_num=${layer_num}" \
+      > /dev/null 2>&1
+    echo "Done"
+  fi
+  log_period=$((256 / bs))
+  paddle train --job=test \
+    --config="${topology}.py" \
+    --use_mkldnn=$use_mkldnn \
+    --use_gpu=False \
+    --trainer_count=$thread \
+    --log_period=$log_period \
+    --config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True" \
+    --init_model_path=$models_in \
+    2>&1 | tee ${log}
+
+  # calculate the last 5 logs period time of 1280 samples,
+  # the time before are burning time.
+  start=`tail ${log} -n 7 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
+  end=`tail ${log} -n 2 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
+  start_sec=`clock_to_seconds $start`
+  end_sec=`clock_to_seconds $end`
+  fps=`awk 'BEGIN{printf "%.2f",(1280 / ('$end_sec' - '$start_sec'))}'`
+  echo "Last 1280 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log}
+  echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
+}
+
+if [ ! -f "train.list" ]; then
+  echo " " > train.list
+fi
+if [ ! -f "test.list" ]; then
+  echo " " > test.list
+fi
+if [ ! -d "logs" ]; then
+  mkdir logs
+fi
+if [ ! -d "models" ]; then
+  mkdir -p models
+fi
+
+# inference benchmark
+for use_mkldnn in True False; do
+  for batchsize in 1 2 4 8 16; do
+    infer googlenet v1 $batchsize $use_mkldnn
+    infer resnet 50 $batchsize $use_mkldnn
+    infer vgg 19 $batchsize $use_mkldnn
+  done
+done
@@ -8,13 +8,13 @@ function train() {
   use_mkldnn=$4
   if [ $4 == "True" ]; then
     thread=1
-    log="logs/${topology}-${layer_num}-mkldnn-${bs}.log"
+    log="logs/train-${topology}-${layer_num}-mkldnn-${bs}.log"
   elif [ $4 == "False" ]; then
     thread=`nproc`
     # each trainer_count use only 1 core to avoid conflict
-    log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log"
+    log="logs/train-${topology}-${layer_num}-${thread}mklml-${bs}.log"
   else
-    echo "Wrong input $3, use True or False."
+    echo "Wrong input $4, use True or False."
     exit 0
   fi
   args="batch_size=${bs},layer_num=${layer_num}"
@@ -30,13 +30,14 @@ function train() {
     2>&1 | tee ${log} 
 }
 
-if [ ! -d "train.list" ]; then
+if [ ! -f "train.list" ]; then
   echo " " > train.list
 fi
 if [ ! -d "logs" ]; then
   mkdir logs
 fi
 
+# training benchmark
 for use_mkldnn in True False; do
   for batchsize in 64 128 256; do
     train vgg 19 $batchsize $use_mkldnn
 
@@ -6,10 +6,21 @@
 num_class = 1000
 batch_size = get_config_arg('batch_size', int, 64)
 layer_num = get_config_arg('layer_num', int, 19)
+is_infer = get_config_arg("is_infer", bool, False)
 
-args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
+args = {
+    'height': height,
+    'width': width,
+    'color': True,
+    'num_class': num_class,
+    'is_infer': is_infer
+}
 define_py_data_sources2(
-    "train.list", None, module="provider", obj="process", args=args)
+    "train.list" if not is_infer else None,
+    "test.list" if is_infer else None,
+    module="provider",
+    obj="process",
+    args=args)
 
 settings(
     batch_size=batch_size,
@@ -98,6 +109,9 @@ def vgg_network(vgg_num=3):
 else:
     print("Wrong layer number.")
 
-lab = data_layer('label', num_class)
-loss = cross_entropy(input=vgg, label=lab)
-outputs(loss)
+if is_infer:
+    outputs(vgg)
+else:
+    lab = data_layer('label', num_class)
+    loss = cross_entropy(input=vgg, label=lab)
+    outputs(loss)
@@ -13,7 +13,7 @@
 # limitations under the License.
 #
 
-IF(MOBILE_INFERENCE)
+IF(MOBILE_INFERENCE OR NOT WITH_DISTRIBUTE)
     return()
 ENDIF()
 
 
@@ -26,12 +26,21 @@ ENDIF(WIN32)
 
 INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
 
+IF(ANDROID AND ${CMAKE_SYSTEM_VERSION} VERSION_LESS "21")
+  # Using the unofficial glog for Android API < 21
+  SET(GLOG_REPOSITORY "https://github.com/Xreki/glog.git")
+  SET(GLOG_TAG "8a547150548b284382ccb6582408e9140ff2bea8")
+ELSE()
+  SET(GLOG_REPOSITORY "https://github.com/google/glog.git")
+  SET(GLOG_TAG "v0.3.5")
+ENDIF()
+
 ExternalProject_Add(
     extern_glog
     ${EXTERNAL_PROJECT_LOG_ARGS}
     DEPENDS gflags
-    GIT_REPOSITORY  "https://github.com/google/glog.git"
-    GIT_TAG         v0.3.5
+    GIT_REPOSITORY  ${GLOG_REPOSITORY}
+    GIT_TAG         ${GLOG_TAG}
     PREFIX          ${GLOG_SOURCES_DIR}
     UPDATE_COMMAND  ""
     CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
 
@@ -13,7 +13,7 @@
 # limitations under the License.
 #
 
-IF(MOBILE_INFERENCE)
+IF(MOBILE_INFERENCE OR NOT WITH_DISTRIBUTE)
     return()
 ENDIF()
 
@@ -23,6 +23,11 @@ SET(GRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/grpc)
 SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc)
 SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE)
 SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE)
+IF(APPLE)
+  SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin | sed "s/-Werror//g" | sh)
+ELSE()
+  SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin)
+ENDIF()
 
 ExternalProject_Add(
     extern_grpc
@@ -33,7 +38,11 @@ ExternalProject_Add(
     UPDATE_COMMAND  ""
     CONFIGURE_COMMAND ""
     BUILD_IN_SOURCE 1
-    BUILD_COMMAND   make
+    # NOTE(yuyang18):
+    # Disable -Werror, otherwise the compile will fail in MacOS.
+    # It seems that we cannot configure that by make command.
+    # Just dry run make command and remove `-Werror`, then use a shell to run make commands
+    BUILD_COMMAND  ${BUILD_CMD}
     INSTALL_COMMAND make prefix=${GRPC_INSTALL_DIR} install
 )
 
@@ -55,4 +64,3 @@ SET_PROPERTY(TARGET grpc_unsecure PROPERTY IMPORTED_LOCATION
 
 include_directories(${GRPC_INCLUDE_DIR})
 ADD_DEPENDENCIES(grpc++_unsecure extern_grpc)
-
Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@`
`13`	`13`	`# limitations under the License.`
`14`	`14`	`#`
`15`	`15`
`16`		`-IF(MOBILE_INFERENCE)`
	`16`	`+IF(MOBILE_INFERENCE OR NOT WITH_DISTRIBUTE)`
`17`	`17`	`return()`
`18`	`18`	`ENDIF()`
`19`	`19`