Skip to content

Commit cf7dbf6

Browse files
committed
changed examples scripts
1 parent 30b25c8 commit cf7dbf6

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

examples/new_project_templates/multi_node_examples/demo_script.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,13 @@ source activate YourEnv
5353
export NCCL_DEBUG=INFO
5454
export PYTHONFAULTHANDLER=1
5555

56+
# on your cluster you might need these:
57+
# set the network interface
58+
export NCCL_SOCKET_IFNAME=^docker0,lo
59+
60+
# might need the latest cuda
61+
module load NCCL/2.4.7-1-cuda.10.0
62+
5663
# random port between 12k and 20k
5764
export MASTER_PORT=$((12000 + RANDOM % 20000))$
5865

examples/new_project_templates/multi_node_examples/minimal_multi_node_demo_script.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,21 @@
1010
# activate conda env
1111
conda activate my_env
1212

13+
# -------------------------
14+
# debugging flags (optional)
15+
# export NCCL_DEBUG=INFO
16+
# export PYTHONFAULTHANDLER=1
17+
18+
# on your cluster you might need these:
19+
# set the network interface
20+
# export NCCL_SOCKET_IFNAME=^docker0,lo
21+
22+
# might need the latest cuda
23+
# module load NCCL/2.4.7-1-cuda.10.0
24+
# -------------------------
25+
26+
# random port between 12k and 20k
27+
export MASTER_PORT=$((12000 + RANDOM % 20000))$
28+
1329
# run script from above
1430
python minimal_multi_node_demo.py

0 commit comments

Comments
 (0)