@@ -23,7 +23,7 @@ class Parallel:
2323 provided ``backend`` (useful for standalone scripts).
2424
2525 2) Only initialize a processing group given the ``backend``
26- (useful with tools like `torch.distributed.launch `_, `horovodrun`_, etc).
26+ (useful with tools like `torchrun `_, `horovodrun`_, etc).
2727
2828 Args:
2929 backend: backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod`. If None, no distributed
@@ -50,14 +50,14 @@ class Parallel:
5050 spawn_kwargs: kwargs to ``idist.spawn`` function.
5151
5252 Examples:
53- 1) Single node or Multi-node, Multi-GPU training launched with `torch.distributed.launch`_ or `horovodrun`_
53+ 1) Single node or Multi-node, Multi-GPU training launched with `torchrun` or `horovodrun`_
5454 tools
5555
5656 Single node option with 4 GPUs
5757
5858 .. code-block:: bash
5959
60- python -m torch.distributed.launch -- nproc_per_node=4 --use_env main.py
60+ torchrun -- nproc_per_node=4 main.py
6161 # or if installed horovod
6262 horovodrun -np=4 python main.py
6363
@@ -66,15 +66,15 @@ class Parallel:
6666 .. code-block:: bash
6767
6868 ## node 0
69- python -m torch.distributed.launch -- nnodes=2 --node_rank=0 --master_addr=master \
70- --master_port=3344 -- nproc_per_node=8 --use_env main.py
69+ torchrun -- nnodes=2 --node_rank=0 --master_addr=master --master_port=3344 \
70+ -- nproc_per_node=8 main.py
7171
7272 # or if installed horovod
7373 horovodrun -np 16 -H hostname1:8,hostname2:8 python main.py
7474
7575 ## node 1
76- python -m torch.distributed.launch -- nnodes=2 --node_rank=1 --master_addr=master \
77- --master_port=3344 -- nproc_per_node=8 --use_env main.py
76+ torchrun -- nnodes=2 --node_rank=1 --master_addr=master --master_port=3344 \
77+ -- nproc_per_node=8 main.py
7878
7979
8080 User code is the same for both options:
@@ -92,6 +92,8 @@ def training(local_rank, config, **kwargs):
9292
9393 backend = "nccl" # or "horovod" if package is installed
9494
95+ config = {"key": "value"}
96+
9597 with idist.Parallel(backend=backend) as parallel:
9698 parallel.run(training, config, a=1, b=2)
9799
@@ -152,6 +154,8 @@ def training(local_rank, config, **kwargs):
152154 print(idist.get_rank(), ": run with config:", config, "- backend=", idist.backend())
153155 # ...
154156
157+ config = {"key": "value"}
158+
155159 with idist.Parallel(backend="xla-tpu", nproc_per_node=8) as parallel:
156160 parallel.run(training, config, a=1, b=2)
157161
@@ -188,12 +192,13 @@ def training(local_rank, config, **kwargs):
188192 "master_port": 15000
189193 }
190194
195+ config = {"key": "value"}
196+
191197 with idist.Parallel(backend="nccl", **dist_config) as parallel:
192198 parallel.run(training, config, a=1, b=2)
193199
194200
195-
196- .. _torch.distributed.launch: https://pytorch.org/docs/stable/distributed.html#launch-utility
201+ .. _torchrun: https://pytorch.org/docs/stable/elastic/run.html#launcher-api
197202 .. _horovodrun: https://horovod.readthedocs.io/en/latest/api.html#module-horovod.run
198203 .. _dist.init_process_group: https://pytorch.org/docs/stable/distributed.html#torch.distributed.init_process_group
199204 .. versionchanged:: 0.4.2
@@ -294,6 +299,8 @@ def training(local_rank, config, **kwargs):
294299 print(idist.get_rank(), ": run with config:", config, "- backend=", idist.backend())
295300 # ...
296301
302+ config = {"key": "value"}
303+
297304 with idist.Parallel(backend=backend) as parallel:
298305 parallel.run(training, config, a=1, b=2)
299306
0 commit comments