File tree Expand file tree Collapse file tree 2 files changed +23
-0
lines changed
examples/new_project_templates/multi_node_examples Expand file tree Collapse file tree 2 files changed +23
-0
lines changed Original file line number Diff line number Diff line change @@ -53,6 +53,13 @@ source activate YourEnv
5353export NCCL_DEBUG=INFO
5454export PYTHONFAULTHANDLER=1
5555
56+ # on your cluster you might need these:
57+ # set the network interface
58+ export NCCL_SOCKET_IFNAME=^docker0,lo
59+
60+ # might need the latest cuda
61+ module load NCCL/2.4.7-1-cuda.10.0
62+
5663# random port between 12k and 20k
5764export MASTER_PORT=$(( 12000 + RANDOM % 20000 )) $
5865
Original file line number Diff line number Diff line change 1010# activate conda env
1111conda activate my_env
1212
13+ # -------------------------
14+ # debugging flags (optional)
15+ # export NCCL_DEBUG=INFO
16+ # export PYTHONFAULTHANDLER=1
17+
18+ # on your cluster you might need these:
19+ # set the network interface
20+ # export NCCL_SOCKET_IFNAME=^docker0,lo
21+
22+ # might need the latest cuda
23+ # module load NCCL/2.4.7-1-cuda.10.0
24+ # -------------------------
25+
26+ # random port between 12k and 20k
27+ export MASTER_PORT=$(( 12000 + RANDOM % 20000 )) $
28+
1329# run script from above
1430python minimal_multi_node_demo.py
You can’t perform that action at this time.
0 commit comments