Skip to content

Commit 0aa6f9e

Browse files
committed
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into overlap_send_op
2 parents fc06222 + d736fb8 commit 0aa6f9e

File tree

197 files changed

+8023
-3211
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

197 files changed

+8023
-3211
lines changed

CMakeLists.txt

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FO
4141
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
4242
option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF)
4343
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
44-
option(WITH_STYLE_CHECK "Compile PaddlePaddle with style check" ON)
4544
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
4645
option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF)
4746
option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
@@ -59,7 +58,6 @@ option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
5958
option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF)
6059
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
6160
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
62-
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
6361

6462
# CMAKE_BUILD_TYPE
6563
if(NOT CMAKE_BUILD_TYPE)
@@ -100,6 +98,9 @@ endif()
10098
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
10199
"A path setting third party libraries download & build directories.")
102100

101+
set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING
102+
"A path setting fluid shared and static libraries")
103+
103104
if (WITH_C_API AND WITH_PYTHON)
104105
message(WARNING "It is suggest not embedded a python interpreter in Paddle "
105106
"when using C-API. It will give an unpredictable behavior when using a "
@@ -117,13 +118,14 @@ else()
117118
endif()
118119

119120
set(WITH_MKLML ${WITH_MKL})
120-
if (WITH_MKL AND AVX2_FOUND)
121-
set(WITH_MKLDNN ON)
122-
else()
123-
message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN")
124-
set(WITH_MKLDNN OFF)
121+
if (NOT DEFINED WITH_MKLDNN)
122+
if (WITH_MKL AND AVX2_FOUND)
123+
set(WITH_MKLDNN ON)
124+
else()
125+
message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN")
126+
set(WITH_MKLDNN OFF)
127+
endif()
125128
endif()
126-
127129
########################################################################################
128130

129131
include(external/mklml) # download mklml package
@@ -152,7 +154,6 @@ include(cupti)
152154
include(configure) # add paddle env configuration
153155
include(generic) # simplify cmake module
154156
include(package) # set paddle packages
155-
include(cpplint) # set paddle c++ style
156157
include(ccache) # set ccache for compilation
157158
include(util) # set unittest and link libs
158159
include(rdma) # set rdma libraries

benchmark/cluster/vgg16/vgg16_fluid.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def str2bool(v):
3838

3939
parser = argparse.ArgumentParser(description=__doc__)
4040
parser.add_argument(
41-
'--batch_size', type=int, default=128, help="Batch size for training.")
41+
'--batch_size', type=int, default=16, help="Batch size for training.")
4242
parser.add_argument(
4343
'--learning_rate',
4444
type=float,
@@ -61,7 +61,7 @@ def str2bool(v):
6161
parser.add_argument(
6262
'--data_set',
6363
type=str,
64-
default='cifar10',
64+
default='flowers',
6565
choices=['cifar10', 'flowers'],
6666
help='Optional dataset for benchmark.')
6767
parser.add_argument(
@@ -200,26 +200,30 @@ def run_step(batch_id, data):
200200
fetch_list=[avg_cost, batch_acc, batch_size])
201201
return loss, acc, b_size
202202

203-
if args.profile and args.task_index == 0:
204-
# warmup.
205-
for batch_id, data in enumerate(train_reader()):
206-
if batch_id > 5: break
207-
run_step(batch_id, data)
208-
with profiler.profiler('All', 'total', '/tmp/profile_vgg'):
203+
if args.profile:
204+
with profiler.profiler('All', 'total',
205+
'/tmp/profile_vgg_%d' % args.task_index):
209206
for batch_id, data in enumerate(train_reader()):
210207
if batch_id > 5: break
211208
run_step(batch_id, data)
212209

210+
total_time = 0.0
211+
count = 0
213212
for batch_id, data in enumerate(train_reader()):
214213
ts = time.time()
215214
loss, acc, b_size = run_step(batch_id, data)
216215
iters += 1
217216
num_samples += len(data)
218217
train_pass_acc.add(value=acc, weight=b_size)
218+
219+
duration = time.time() - ts
220+
total_time += duration
221+
count += len(data)
219222
print(
220223
"Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, "
221-
"Speed = %.2f img/s" % (pass_id, iters, loss, acc,
222-
len(data) / (time.time() - ts))
224+
"Speed = %.2f (%.2f) img/s" % (pass_id, iters, loss, acc,
225+
len(data) / duration,
226+
count / total_time)
223227
) # The accuracy is the accumulation of batches, but not the current batch.
224228

225229
pass_elapsed = time.time() - start_time

benchmark/fluid/README.md

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Fluid Benchmark
2+
3+
This directory contains several models configurations and tools that used to run
4+
Fluid benchmarks for local and distributed training.
5+
6+
7+
## Run the Benchmark
8+
9+
To start, run the following command to get the full help message:
10+
11+
```bash
12+
python fluid_benchmark.py --help
13+
```
14+
15+
Currently supported `--model` argument include:
16+
17+
* mnist
18+
* resnet
19+
* you can chose to use different dataset using `--data_set cifar10` or
20+
`--data_set flowers`.
21+
* vgg
22+
* stacked_dynamic_lstm
23+
* machine_translation
24+
25+
* Run the following command to start a benchmark job locally:
26+
```bash
27+
python fluid_benchmark.py --model mnist --parallel 1 --device GPU --with_test
28+
```
29+
You can choose to use GPU/CPU training. With GPU training, you can specify
30+
`--parallel 1` to run multi GPU training.
31+
* Run distributed training with parameter servers:
32+
* start parameter servers:
33+
```bash
34+
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method pserver
35+
```
36+
* start trainers:
37+
```bash
38+
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method pserver
39+
```
40+
* Run distributed training using NCCL2
41+
```bash
42+
PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method nccl2
43+
```
44+
45+
## Run Distributed Benchmark on Kubernetes Cluster
46+
47+
We provide a script `kube_gen_job.py` to generate Kubernetes yaml files to submit
48+
distributed benchmark jobs to your cluster. To generate a job yaml, just run:
49+
50+
```bash
51+
python kube_gen_job.py --jobname myjob --pscpu 4 --cpu 8 --gpu 8 --psmemory 20 --memory 40 --pservers 4 --trainers 4 --entry "python fluid_benchmark.py --model mnist --parallel 1 --device GPU --update_method pserver --with_test" --disttype pserver
52+
```
53+
54+
Then the yaml files are generated under directory `myjob`, you can run:
55+
56+
```bash
57+
kubectl create -f myjob/
58+
```
59+
60+
The job shall start.

0 commit comments

Comments
 (0)