Skip to content

Commit 47999c4

Browse files
author
Yan Xu
authored
cherry pick: multiple process launch utily test=release/1.3 (#15741)
1 parent 687fb40 commit 47999c4

File tree

5 files changed

+37
-17
lines changed

5 files changed

+37
-17
lines changed

python/paddle/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@
2525
import paddle.dataset
2626
import paddle.batch
2727
import paddle.compat
28+
import paddle.distributed
2829
batch = batch.batch

python/paddle/distributed/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

tools/run_mp.py renamed to python/paddle/distributed/launch.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
GPUS = 8
3838

3939

40-
def start_procs(gpus, cmd, log_dir):
40+
def start_procs(gpus, entrypoint, entrypoint_args, log_dir):
4141
procs = []
4242
log_fns = []
4343
os.system("mkdir -p %s" % log_dir)
@@ -73,12 +73,11 @@ def start_procs(gpus, cmd, log_dir):
7373
"PADDLE_TRAINER_ENDPOINTS": all_nodes_devices_endpoints
7474
})
7575

76-
print("starting process ", i, cmd, curr_env)
76+
print("starting process ", i, entrypoint, entrypoint_args, curr_env)
7777
fn = open("%s/workerlog.%d" % (log_dir, i), "w")
7878
log_fns.append(fn)
79-
procs.append(
80-
subprocess.Popen(
81-
cmd.strip().split(" "), stdout=fn, stderr=fn, env=curr_env))
79+
cmd = [sys.executable, "-u", entrypoint] + entrypoint_args
80+
procs.append(subprocess.Popen(cmd, stdout=fn, stderr=fn, env=curr_env))
8281

8382
for i in range(gpus):
8483
try:
@@ -89,7 +88,8 @@ def start_procs(gpus, cmd, log_dir):
8988
pass
9089

9190

92-
def main():
91+
def parse_args():
92+
9393
parser = argparse.ArgumentParser(
9494
description='''start paddle training using multi-process mode.
9595
NOTE: your train program ***must*** run as distributed nccl2 mode,
@@ -108,21 +108,27 @@ def main():
108108
type=int,
109109
default=8,
110110
help='start number of processes for every gpu')
111-
parser.add_argument(
112-
'--cmd',
113-
type=str,
114-
default="",
115-
help='command to run for each process, e.g. python train.py --lr 0.1')
116111
parser.add_argument(
117112
'--log_dir',
118113
type=str,
119114
default="mylog",
120115
help='directory to put logs per process.')
121-
args = parser.parse_args()
122-
if args.cmd == "":
123-
parser.print_help()
124-
exit(0)
125-
start_procs(args.gpus, args.cmd, args.log_dir)
116+
parser.add_argument(
117+
'entrypoint_script',
118+
type=str,
119+
help="The entrypoint script to be launched in parallel,"
120+
"followed by all the arguments for each process,"
121+
"e.g. train.py --lr 0.1")
122+
parser.add_argument('entrypoint_args', nargs=argparse.REMAINDER)
123+
return parser.parse_args()
124+
125+
126+
def main():
127+
args = parse_args()
128+
129+
# launch multiple training process
130+
start_procs(args.gpus, args.entrypoint_script, args.entrypoint_args,
131+
args.log_dir)
126132

127133

128134
if __name__ == "__main__":

python/paddle/fluid/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,6 @@ def __bootstrap__():
161161
'times_excess_than_required_tmp_allocation',
162162
'enable_inplace_whitelist'
163163
]
164-
165164
core.init_gflags([sys.argv[0]] +
166165
["--tryfromenv=" + ",".join(read_env_flags)])
167166
core.init_glog(sys.argv[0])

python/setup.py.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ packages=['paddle',
100100
'paddle.utils',
101101
'paddle.dataset',
102102
'paddle.reader',
103+
'paddle.distributed',
103104
'paddle.fluid',
104105
'paddle.fluid.imperative',
105106
'paddle.fluid.proto',

0 commit comments

Comments
 (0)