Skip to content

Commit 5b9d09d

Browse files
authored
Merge branch 'develop' into high_level_api_machine_translation
2 parents b0868af + 87ff95d commit 5b9d09d

File tree

184 files changed

+8071
-3245
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

184 files changed

+8071
-3245
lines changed

CMakeLists.txt

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
5959
option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF)
6060
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
6161
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
62-
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
6362

6463
# CMAKE_BUILD_TYPE
6564
if(NOT CMAKE_BUILD_TYPE)
@@ -100,6 +99,9 @@ endif()
10099
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
101100
"A path setting third party libraries download & build directories.")
102101

102+
set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING
103+
"A path setting fluid shared and static libraries")
104+
103105
if (WITH_C_API AND WITH_PYTHON)
104106
message(WARNING "It is suggest not embedded a python interpreter in Paddle "
105107
"when using C-API. It will give an unpredictable behavior when using a "
@@ -117,13 +119,14 @@ else()
117119
endif()
118120

119121
set(WITH_MKLML ${WITH_MKL})
120-
if (WITH_MKL AND AVX2_FOUND)
121-
set(WITH_MKLDNN ON)
122-
else()
123-
message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN")
124-
set(WITH_MKLDNN OFF)
122+
if (NOT DEFINED WITH_MKLDNN)
123+
if (WITH_MKL AND AVX2_FOUND)
124+
set(WITH_MKLDNN ON)
125+
else()
126+
message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN")
127+
set(WITH_MKLDNN OFF)
128+
endif()
125129
endif()
126-
127130
########################################################################################
128131

129132
include(external/mklml) # download mklml package

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
7070
# specify sphinx version as 1.5.6 and remove -U option for [pip install -U
7171
# sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
7272
# version(1.7.1 for now), which causes building documentation failed.
73-
RUN pip install --upgrade pip==9.0.3 && \
73+
RUN easy_install -U pip && \
7474
pip install -U wheel && \
7575
pip install -U docopt PyYAML sphinx==1.5.6 && \
7676
pip install sphinx-rtd-theme==0.1.9 recommonmark

benchmark/cluster/vgg16/vgg16_fluid.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def str2bool(v):
3838

3939
parser = argparse.ArgumentParser(description=__doc__)
4040
parser.add_argument(
41-
'--batch_size', type=int, default=128, help="Batch size for training.")
41+
'--batch_size', type=int, default=16, help="Batch size for training.")
4242
parser.add_argument(
4343
'--learning_rate',
4444
type=float,
@@ -61,7 +61,7 @@ def str2bool(v):
6161
parser.add_argument(
6262
'--data_set',
6363
type=str,
64-
default='cifar10',
64+
default='flowers',
6565
choices=['cifar10', 'flowers'],
6666
help='Optional dataset for benchmark.')
6767
parser.add_argument(
@@ -200,26 +200,30 @@ def run_step(batch_id, data):
200200
fetch_list=[avg_cost, batch_acc, batch_size])
201201
return loss, acc, b_size
202202

203-
if args.profile and args.task_index == 0:
204-
# warmup.
205-
for batch_id, data in enumerate(train_reader()):
206-
if batch_id > 5: break
207-
run_step(batch_id, data)
208-
with profiler.profiler('All', 'total', '/tmp/profile_vgg'):
203+
if args.profile:
204+
with profiler.profiler('All', 'total',
205+
'/tmp/profile_vgg_%d' % args.task_index):
209206
for batch_id, data in enumerate(train_reader()):
210207
if batch_id > 5: break
211208
run_step(batch_id, data)
212209

210+
total_time = 0.0
211+
count = 0
213212
for batch_id, data in enumerate(train_reader()):
214213
ts = time.time()
215214
loss, acc, b_size = run_step(batch_id, data)
216215
iters += 1
217216
num_samples += len(data)
218217
train_pass_acc.add(value=acc, weight=b_size)
218+
219+
duration = time.time() - ts
220+
total_time += duration
221+
count += len(data)
219222
print(
220223
"Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, "
221-
"Speed = %.2f img/s" % (pass_id, iters, loss, acc,
222-
len(data) / (time.time() - ts))
224+
"Speed = %.2f (%.2f) img/s" % (pass_id, iters, loss, acc,
225+
len(data) / duration,
226+
count / total_time)
223227
) # The accuracy is the accumulation of batches, but not the current batch.
224228

225229
pass_elapsed = time.time() - start_time

benchmark/fluid/README.md

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Fluid Benchmark
2+
3+
This directory contains several models configurations and tools that used to run
4+
Fluid benchmarks for local and distributed training.
5+
6+
7+
## Run the Benchmark
8+
9+
To start, run the following command to get the full help message:
10+
11+
```bash
12+
python fluid_benchmark.py --help
13+
```
14+
15+
Currently supported `--model` argument include:
16+
17+
* mnist
18+
* resnet
19+
* you can chose to use different dataset using `--data_set cifar10` or
20+
`--data_set flowers`.
21+
* vgg
22+
* stacked_dynamic_lstm
23+
* machine_translation
24+
25+
* Run the following command to start a benchmark job locally:
26+
```bash
27+
python fluid_benchmark.py --model mnist --parallel 1 --device GPU --with_test
28+
```
29+
You can choose to use GPU/CPU training. With GPU training, you can specify
30+
`--parallel 1` to run multi GPU training.
31+
* Run distributed training with parameter servers:
32+
* start parameter servers:
33+
```bash
34+
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method pserver
35+
```
36+
* start trainers:
37+
```bash
38+
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method pserver
39+
```
40+
* Run distributed training using NCCL2
41+
```bash
42+
PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method nccl2
43+
```
44+
45+
## Run Distributed Benchmark on Kubernetes Cluster
46+
47+
We provide a script `kube_gen_job.py` to generate Kubernetes yaml files to submit
48+
distributed benchmark jobs to your cluster. To generate a job yaml, just run:
49+
50+
```bash
51+
python kube_gen_job.py --jobname myjob --pscpu 4 --cpu 8 --gpu 8 --psmemory 20 --memory 40 --pservers 4 --trainers 4 --entry "python fluid_benchmark.py --model mnist --parallel 1 --device GPU --update_method pserver --with_test" --disttype pserver
52+
```
53+
54+
Then the yaml files are generated under directory `myjob`, you can run:
55+
56+
```bash
57+
kubectl create -f myjob/
58+
```
59+
60+
The job shall start.

0 commit comments

Comments
 (0)