Skip to content

Commit 9942a30

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/non_layer_api_doc
2 parents 1c19f1a + 1d7e60f commit 9942a30

File tree

92 files changed

+3564
-1670
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+3564
-1670
lines changed

benchmark/fluid/fluid_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def dist_transpile(trainer_id, args):
9797
return train_program, fluid.default_startup_program()
9898
else:
9999
raise ValueError(
100-
'TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
100+
'PADDLE_TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
101101
)
102102

103103

benchmark/fluid/kube_gen_job.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,10 @@ def gen_job():
108108
tn_container["ports"][0]["containerPort"] = spreadport
109109

110110
envs.append({"name": "PADDLE_JOB_NAME", "value": args.jobname})
111-
envs.append({"name": "TRAINERS", "value": str(args.trainers)})
111+
envs.append({"name": "PADDLE_TRAINERS", "value": str(args.trainers)})
112112
envs.append({"name": "PSERVERS", "value": str(args.pservers)})
113113
envs.append({"name": "ENTRY", "value": args.entry})
114-
envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)})
114+
envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)})
115115
envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)})
116116
# NOTE: these directories below are cluster specific, please modify
117117
# this settings before you run on your own cluster.
@@ -167,16 +167,22 @@ def gen_job():
167167
tn_container["volumeMounts"] = volumeMounts
168168

169169
ps_container["env"] = envs
170-
ps_container["env"].append({"name": "TRAINING_ROLE", "value": "PSERVER"})
170+
ps_container["env"].append({
171+
"name": "PADDLE_TRAINING_ROLE",
172+
"value": "PSERVER"
173+
})
171174
tn_container["env"] = envs
172175
if args.disttype == "pserver":
173176
tn_container["env"].append({
174-
"name": "TRAINING_ROLE",
177+
"name": "PADDLE_TRAINING_ROLE",
175178
"value": "TRAINER"
176179
})
177180
elif args.disttype == "nccl2" or args.disttype == "local":
178181
# NCCL2 have no training role, set to plain WORKER
179-
tn_container["env"].append({"name": "TRAINING_ROLE", "value": "WORKER"})
182+
tn_container["env"].append({
183+
"name": "PADDLE_TRAINING_ROLE",
184+
"value": "WORKER"
185+
})
180186

181187
os.mkdir(args.jobname)
182188
if args.disttype == "pserver":

cmake/external/mkldnn.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
4545
ELSE()
4646
MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN")
4747
ENDIF()
48-
SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-unused-result")
48+
SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result")
49+
SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value")
4950
SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}")
5051
SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}")
5152
ExternalProject_Add(

doc/fluid/howto/cluster/fluid_cluster_train_cn.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,13 +168,13 @@ cd /paddle/python/paddle/fluid/tests/book
168168

169169
第二步,启动Parameter Server:
170170
```bash
171-
PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.2 TRAINERS=2 POD_IP=192.168.1.2 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=PSERVER python test_fit_a_line.py
171+
PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.2 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=192.168.1.2 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=PSERVER python test_fit_a_line.py
172172
```
173173
执行命令后请等待出现提示: ```Server listening on 192.168.1.2:6174 ```, 表示Paramter Server已经正常启动。
174174

175175
第三步,启动Trainer:
176176
```bash
177-
PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.3 TRAINERS=2 POD_IP=192.168.1.3 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=TRAINER python test_fit_a_line.py
177+
PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.3 PADDLE_TRAINERS=2 PADDLE_CURRENT_IPP=192.168.1.3 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=TRAINER python test_fit_a_line.py
178178
```
179179
由于我们定义的Trainer的数量是2个,因此需要在另外一个计算节点上再启动一个Trainer。
180180

doc/fluid/howto/cluster/fluid_recordio.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@ def gen_train_list(file_pattern, trainers, trainer_id):
114114
ret_list.append(f)
115115
return ret_list
116116

117-
trainers = int(os.getenv("TRAINERS"))
118-
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
117+
trainers = int(os.getenv("PADDLE_TRAINERS"))
118+
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
119119
data_file = fluid.layers.io.open_files(
120120
filenames=gen_train_list("./mnist-[0-9]*.recordio", 2, 0),
121121
thread_num=1,

paddle/contrib/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,3 @@
1414
#
1515

1616
add_subdirectory(inference)
17-
add_subdirectory(tape)

paddle/contrib/tape/README.md

Lines changed: 0 additions & 252 deletions
This file was deleted.
-94.4 KB
Binary file not shown.

0 commit comments

Comments
 (0)