Skip to content

Commit c43edff

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into whs_doc1
2 parents 297a169 + d516ace commit c43edff

File tree

353 files changed

+12125
-3936
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

353 files changed

+12125
-3936
lines changed

CMakeLists.txt

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,14 @@ option(WITH_FLUID_ONLY "Compile PaddlePaddle fluid only" OFF)
5555
option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF)
5656
option(GLIDE_INSTALL "Download and install go dependencies " ON)
5757
option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
58-
option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF)
58+
option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
5959
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
6060
option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF)
6161
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
6262
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
6363
option(WITH_CONTRIB "Compile the third-party contributation" OFF)
64+
option(WITH_ANAKIN "Compile with Anakin library" OFF)
65+
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
6466

6567
# CMAKE_BUILD_TYPE
6668
if(NOT CMAKE_BUILD_TYPE)
@@ -147,7 +149,16 @@ include(external/any) # download libn::any
147149
include(external/eigen) # download eigen3
148150
include(external/pybind11) # download pybind11
149151
include(external/cares)
150-
include(external/grpc)
152+
153+
if(WITH_DISTRIBUTE)
154+
if(WITH_GRPC)
155+
include(external/grpc)
156+
else()
157+
include(external/leveldb)
158+
include(external/brpc)
159+
endif()
160+
endif()
161+
151162
include(external/snappy) # download snappy
152163
include(external/snappystream)
153164
include(external/threadpool)
@@ -183,7 +194,10 @@ set(EXTERNAL_LIBS
183194
if(WITH_GPU)
184195
include(cuda)
185196
include(tensorrt)
186-
endif(WITH_GPU)
197+
include(external/anakin)
198+
else()
199+
set(WITH_ANAKIN OFF CACHE STRING "Anakin is valid only when GPU is set." FORCE)
200+
endif()
187201

188202
if(WITH_AMD_GPU)
189203
find_package(HIP)

Dockerfile

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ COPY ./paddle/scripts/docker/root/ /root/
2424

2525
RUN apt-get update && \
2626
apt-get install -y --allow-downgrades \
27-
git python-pip python-dev openssh-server bison \
27+
git python-pip python-dev python-opencv openssh-server bison \
2828
libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \
2929
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
3030
curl sed grep graphviz libjpeg-dev zlib1g-dev \
@@ -76,8 +76,7 @@ RUN easy_install -U pip && \
7676
pip install sphinx-rtd-theme==0.1.9 recommonmark
7777

7878
RUN pip install pre-commit 'ipython==5.3.0' && \
79-
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
80-
pip install opencv-python
79+
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0'
8180

8281
#For docstring checker
8382
RUN pip install pylint pytest astroid isort

benchmark/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,6 @@ paddle/rnn/imdb.pkl
77
caffe/image/logs
88
tensorflow/image/logs
99
tensorflow/rnn/logs
10+
fluid/models/*.pyc
11+
fluid/logs
12+
fluid/nohup.out

benchmark/fluid/Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
2-
RUN apt-get update && apt-get install -y python python-pip iputils-ping libgtk2.0-dev wget vim net-tools iftop
2+
RUN apt-get update && apt-get install -y python python-pip iputils-ping libgtk2.0-dev wget vim net-tools iftop python-opencv
33
RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/libcudnn.so && ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/lib/libnccl.so
44
RUN pip install -U pip
5-
RUN pip install -U kubernetes opencv-python paddlepaddle
5+
RUN pip install -U kubernetes paddlepaddle
66

77
# IMPORTANT:
88
# Add "ENV http_proxy=http://ip:port" if your download is slow, and don't forget to unset it at runtime.
@@ -19,4 +19,4 @@ ADD *.whl /
1919
RUN pip install /*.whl && rm -f /*.whl && chmod +x /usr/bin/paddle_k8s
2020

2121
ENV LD_LIBRARY_PATH=/usr/local/lib
22-
ADD fluid_benchmark.py dataset.py models/ /workspace/
22+
ADD fluid_benchmark.py recordio_converter.py models/ /workspace/

benchmark/fluid/README.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,18 @@ Currently supported `--model` argument include:
2424

2525
* Run the following command to start a benchmark job locally:
2626
```bash
27-
python fluid_benchmark.py --model mnist --device GPU
27+
python fluid_benchmark.py --model mnist --device GPU
2828
```
2929
You can choose to use GPU/CPU training. With GPU training, you can specify
3030
`--gpus <gpu_num>` to run multi GPU training.
31+
You can set async mode parameter server. With async mode, you can specify
32+
`--async_mode` to train model asynchronous.
3133
* Run distributed training with parameter servers:
34+
* see [run_fluid_benchmark.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/benchmark/fluid/run_fluid_benchmark.sh) as an example.
3235
* start parameter servers:
3336
```bash
3437
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver
38+
sleep 15
3539
```
3640
* start trainers:
3741
```bash
@@ -42,6 +46,16 @@ Currently supported `--model` argument include:
4246
PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method nccl2
4347
```
4448

49+
## Prepare the RecordIO file to Achieve Better Performance
50+
51+
Run the following command will generate RecordIO files like "mnist.recordio" under the path
52+
and batch_size you choose, you can use batch_size=1 so that later reader can change the batch_size
53+
at any time using `fluid.batch`.
54+
55+
```bash
56+
python -c 'from recordio_converter import *; prepare_mnist("data", 1)'
57+
```
58+
4559
## Run Distributed Benchmark on Kubernetes Cluster
4660

4761
You may need to build a Docker image before submitting a cluster job onto Kubernetes, or you will

benchmark/fluid/args.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import argparse
16+
17+
__all__ = ['parse_args', ]
18+
19+
BENCHMARK_MODELS = [
20+
"machine_translation", "resnet", "vgg", "mnist", "stacked_dynamic_lstm"
21+
]
22+
23+
24+
def parse_args():
25+
parser = argparse.ArgumentParser('Fluid model benchmarks.')
26+
parser.add_argument(
27+
'--model',
28+
type=str,
29+
choices=BENCHMARK_MODELS,
30+
default='resnet',
31+
help='The model to run benchmark with.')
32+
parser.add_argument(
33+
'--batch_size', type=int, default=32, help='The minibatch size.')
34+
# args related to learning rate
35+
parser.add_argument(
36+
'--learning_rate', type=float, default=0.001, help='The learning rate.')
37+
# TODO(wuyi): add "--use_fake_data" option back.
38+
parser.add_argument(
39+
'--skip_batch_num',
40+
type=int,
41+
default=5,
42+
help='The first num of minibatch num to skip, for better performance test'
43+
)
44+
parser.add_argument(
45+
'--iterations', type=int, default=80, help='The number of minibatches.')
46+
parser.add_argument(
47+
'--pass_num', type=int, default=100, help='The number of passes.')
48+
parser.add_argument(
49+
'--data_format',
50+
type=str,
51+
default='NCHW',
52+
choices=['NCHW', 'NHWC'],
53+
help='The data data_format, now only support NCHW.')
54+
parser.add_argument(
55+
'--device',
56+
type=str,
57+
default='GPU',
58+
choices=['CPU', 'GPU'],
59+
help='The device type.')
60+
parser.add_argument(
61+
'--gpus',
62+
type=int,
63+
default=1,
64+
help='If gpus > 1, will use ParallelExecutor to run, else use Executor.')
65+
# this option is available only for vgg and resnet.
66+
parser.add_argument(
67+
'--cpus',
68+
type=int,
69+
default=1,
70+
help='If cpus > 1, will use ParallelDo to run, else use Executor.')
71+
parser.add_argument(
72+
'--data_set',
73+
type=str,
74+
default='flowers',
75+
choices=['cifar10', 'flowers'],
76+
help='Optional dataset for benchmark.')
77+
parser.add_argument(
78+
'--infer_only', action='store_true', help='If set, run forward only.')
79+
parser.add_argument(
80+
'--use_cprof', action='store_true', help='If set, use cProfile.')
81+
parser.add_argument(
82+
'--use_nvprof',
83+
action='store_true',
84+
help='If set, use nvprof for CUDA.')
85+
parser.add_argument(
86+
'--no_test',
87+
action='store_true',
88+
help='If set, do not test the testset during training.')
89+
parser.add_argument(
90+
'--memory_optimize',
91+
action='store_true',
92+
help='If set, optimize runtime memory before start.')
93+
parser.add_argument(
94+
'--use_fake_data',
95+
action='store_true',
96+
help='If set ommit the actual read data operators.')
97+
parser.add_argument(
98+
'--profile', action='store_true', help='If set, profile a few steps.')
99+
parser.add_argument(
100+
'--update_method',
101+
type=str,
102+
default='local',
103+
choices=['local', 'pserver', 'nccl2'],
104+
help='Choose parameter update method, can be local, pserver, nccl2.')
105+
parser.add_argument(
106+
'--no_split_var',
107+
action='store_true',
108+
default=False,
109+
help='Whether split variables into blocks when update_method is pserver')
110+
parser.add_argument(
111+
'--async_mode',
112+
action='store_true',
113+
default=False,
114+
help='Whether start pserver in async mode to support ASGD')
115+
parser.add_argument(
116+
'--use_reader_op',
117+
action='store_true',
118+
help='Whether to use reader op, and must specify the data path if set this to true.'
119+
)
120+
parser.add_argument(
121+
'--data_path',
122+
type=str,
123+
default="",
124+
help='Directory that contains all the training recordio files.')
125+
args = parser.parse_args()
126+
return args

0 commit comments

Comments
 (0)