Skip to content

Commit 0d31c53

Browse files
Merge pull request #111 from oci-hpc/2.10.2_ds_nccl_location_update
Update nccl test location
2 parents 85de596 + 8f5379d commit 0d31c53

9 files changed

+40
-24
lines changed

samples/gpu/nccl_run_allreduce.sbatch

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,8 @@ cat $MACHINEFILE
2222
source /etc/os-release
2323
if [ $ID == "ol" ] || [ $ID == "centos" ] ; then
2424
python3 /home/opc/node_ordering_by_rack.py --input_file $MACHINEFILE > /dev/null
25-
USER=opc
2625
elif [ $ID == "debian" ] || [ $ID == "ubuntu" ] ; then
2726
python3 /home/ubuntu/node_ordering_by_rack.py --input_file $MACHINEFILE > /dev/null
28-
USER=ubuntu
2927
fi
3028

3129

@@ -74,6 +72,6 @@ fi
7472
-x NCCL_IB_GID_INDEX=3 \
7573
-x NCCL_ALGO=Ring \
7674
-x NCCL_IB_HCA="${var_NCCL_IB_HCA}" \
77-
--np $((SLURM_NNODES*SLURM_NTASKS_PER_NODE)) --rankfile $ORDEREDRANKMACHINEFILE /home/$USER/nccl-tests/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n 100
75+
--np $((SLURM_NNODES*SLURM_NTASKS_PER_NODE)) --rankfile $ORDEREDRANKMACHINEFILE /opt/oci-hpc/nccl-test/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n 100
7876

7977

samples/gpu/nccl_run_allreduce.sh

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,13 @@ echo INPUTFILE
1818
cat $hostfile
1919

2020
# will generate rack-aware ordered host file
21-
python3 /home/opc/node_ordering_by_rack.py --input_file $hostfile > /dev/null
21+
source /etc/os-release
22+
if [ $ID == "ol" ] || [ $ID == "centos" ] ; then
23+
python3 /home/opc/node_ordering_by_rack.py --input_file $hostfile > /dev/null
24+
elif [ $ID == "debian" ] || [ $ID == "ubuntu" ] ; then
25+
python3 /home/ubuntu/node_ordering_by_rack.py --input_file $hostfile > /dev/null
26+
fi
27+
2228
hostfile=$ORDEREDMACHINEFILE
2329

2430
echo ORDEREDMACHINEFILE
@@ -73,7 +79,7 @@ fi
7379
-x NCCL_IB_GID_INDEX=3 \
7480
-x NCCL_ALGO=Ring \
7581
-x NCCL_IB_HCA="${var_NCCL_IB_HCA}" \
76-
--np $np --hostfile $hostfile -N 8 /home/opc/nccl-tests/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n $iter >> $logfile
82+
--np $np --hostfile $hostfile -N 8 /opt/oci-hpc/nccl-test/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n $iter >> $logfile
7783

7884
tail -n 32 $logfile
7985

samples/gpu/nccl_run_allreduce_containers_with_ordering.sbatch

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,8 @@ cat $MACHINEFILE
2323
source /etc/os-release
2424
if [ $ID == "ol" ] || [ $ID == "centos" ] ; then
2525
python3 /home/opc/node_ordering_by_rack.py --input_file $MACHINEFILE > /dev/null
26-
USER=opc
2726
elif [ $ID == "debian" ] || [ $ID == "ubuntu" ] ; then
2827
python3 /home/ubuntu/node_ordering_by_rack.py --input_file $MACHINEFILE > /dev/null
29-
USER=ubuntu
3028
fi
3129

3230
echo ORDEREDMACHINEFILE
@@ -80,6 +78,7 @@ export RX_QUEUE_LEN=8192 \
8078
NCCL_IB_QPS_PER_CONNECTION=4
8179

8280
env | grep "SLURMD_NODENAME="
81+
USER=`whoami`
8382

8483
CONTAINER_IMAGE="/nfs/scratch/nvcr.io+nvidia+pytorch+22.12-py3.sqsh"
8584
CONTAINER_MOUNTS="/home/$USER/nccl-tests:/nccl,$LOCAL_MPI:$LOCAL_MPI"

samples/gpu/nccl_run_allreduce_srun.sbatch

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ if [[ "$mpivars_path" == "" ]]; then
2929
source $mpivars_path
3030
echo $mpivars_path
3131

32-
USER=`whoami`
33-
3432
shape=`curl -sH "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/ | jq .shape`
3533
if [ $shape == \"BM.GPU.B4.8\" ] || [ $shape == \"BM.GPU.A100-v2.8\" ]
3634
then
@@ -57,4 +55,4 @@ export NCCL_DEBUG=WARN \
5755
NCCL_IB_GID_INDEX=3 \
5856
NCCL_ALGO=Ring \
5957
NCCL_IB_HCA="${var_NCCL_IB_HCA}"
60-
srun --mpi=pmix_v3 --gpus-per-node=$SLURM_GPUS_PER_NODE --ntasks-per-node=$SLURM_NTASKS_PER_NODE /home/$USER/nccl-tests/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n 100
58+
srun --mpi=pmix_v3 --gpus-per-node=$SLURM_GPUS_PER_NODE --ntasks-per-node=$SLURM_NTASKS_PER_NODE /opt/oci-hpc/nccl-test/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n 100

samples/gpu/nccl_run_allreduce_srun.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,6 @@ do
4343
source $mpivars_path
4444
echo $mpivars_path
4545

46-
USER=`whoami`
47-
4846
first_node=`head $hostfile -n 1`
4947
shape=`ssh $first_node 'curl -sH "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/' | jq .shape`
5048
if [ $shape == \"BM.GPU.B4.8\" ] || [ $shape == \"BM.GPU.A100-v2.8\" ]
@@ -72,7 +70,7 @@ do
7270
NCCL_IB_GID_INDEX=3 \
7371
NCCL_ALGO=Ring \
7472
NCCL_IB_HCA="${var_NCCL_IB_HCA}"
75-
srun --mpi=pmix_v3 --nodefile=$hostfile --gpus-per-node=8 --ntasks-per-node=8 /home/$USER/nccl-tests/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n 100 >> $logfile
73+
srun --mpi=pmix_v3 --nodefile=$hostfile --gpus-per-node=8 --ntasks-per-node=8 /opt/oci-hpc/nccl-test/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n 100 >> $logfile
7674

7775

7876

samples/gpu/nccl_run_alltoall.sh

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,12 @@ echo INPUTFILE
2222
cat $hostfile
2323

2424
# will generate rack-aware ordered host file
25-
python3 /home/opc/node_ordering_by_rack.py --input_file $hostfile > /dev/null
25+
if [ $ID == "ol" ] || [ $ID == "centos" ] ; then
26+
python3 /home/opc/node_ordering_by_rack.py --input_file $hostfile > /dev/null
27+
elif [ $ID == "debian" ] || [ $ID == "ubuntu" ] ; then
28+
python3 /home/ubuntu/node_ordering_by_rack.py --input_file $hostfile > /dev/null
29+
fi
30+
2631
hostfile=$ORDEREDMACHINEFILE
2732

2833
echo ORDEREDMACHINEFILE
@@ -82,7 +87,7 @@ fi
8287
-x NCCL_IB_GID_INDEX=3 \
8388
-x NCCL_ALGO=Ring \
8489
-x NCCL_IB_HCA="${var_NCCL_IB_HCA}" \
85-
--np $np --hostfile $hostfile -N 8 /home/opc/nccl-tests/build/alltoall_perf -f 2 -g 1 -c 0 -n $iter >> $logfile
90+
--np $np --hostfile $hostfile -N 8 /opt/oci-hpc/nccl-test/build/alltoall_perf -f 2 -g 1 -c 0 -n $iter >> $logfile
8691

8792
tail -n 15 $logfile
8893

samples/gpu/qfabv1_nccl_run_allreduce.sbatch

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@ scontrol show hostnames $SLURM_JOB_NODELIST > $MACHINEFILE
1919
echo MACHINEFILE
2020
cat $MACHINEFILE
2121

22-
python3 /home/opc/node_ordering_by_rack.py --input_file $MACHINEFILE > /dev/null
22+
source /etc/os-release
23+
if [ $ID == "ol" ] || [ $ID == "centos" ] ; then
24+
python3 /home/opc/node_ordering_by_rack.py --input_file $MACHINEFILE > /dev/null
25+
elif [ $ID == "debian" ] || [ $ID == "ubuntu" ] ; then
26+
python3 /home/ubuntu/node_ordering_by_rack.py --input_file $MACHINEFILE > /dev/null
27+
fi
2328

2429
echo ORDEREDMACHINEFILE
2530
cat $ORDEREDMACHINEFILE
@@ -31,9 +36,6 @@ source $mpivars_path
3136

3237
if [[ "$mpivars_path" == "" ]]; then echo "Could not find MPIPATH"; exit; fi
3338

34-
#source /usr/mpi/gcc/openmpi-4.1.0rc5/bin/mpivars.sh
35-
#source /usr/mpi/gcc/openmpi-4.0.3rc4/bin/mpivars.sh
36-
3739
export NCCL_DEBUG=WARN
3840

3941

@@ -64,6 +66,6 @@ fi
6466
-x NCCL_IB_GID_INDEX=3 \
6567
-x NCCL_ALGO=Ring \
6668
-x NCCL_IB_HCA="${var_NCCL_IB_HCA}" \
67-
--np $((SLURM_NNODES*SLURM_NTASKS_PER_NODE)) --rankfile $ORDEREDRANKMACHINEFILE /home/opc/nccl-tests/build/all_reduce_perf -b8 -e 4G -f 2 -n 100
69+
--np $((SLURM_NNODES*SLURM_NTASKS_PER_NODE)) --rankfile $ORDEREDRANKMACHINEFILE /opt/oci-hpc/nccl-test/build/all_reduce_perf -b8 -e 4G -f 2 -n 100
6870

6971

samples/gpu/qfabv1_nccl_run_allreduce.sh

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,12 @@ echo INPUTFILE
1818
cat $hostfile
1919

2020
# will generate rack-aware ordered host file
21-
python3 /home/opc/node_ordering_by_rack.py --input_file $hostfile > /dev/null
21+
if [ $ID == "ol" ] || [ $ID == "centos" ] ; then
22+
python3 /home/opc/node_ordering_by_rack.py --input_file $hostfile > /dev/null
23+
elif [ $ID == "debian" ] || [ $ID == "ubuntu" ] ; then
24+
python3 /home/ubuntu/node_ordering_by_rack.py --input_file $hostfile > /dev/null
25+
fi
26+
2227
hostfile=$ORDEREDMACHINEFILE
2328

2429
echo ORDEREDMACHINEFILE
@@ -75,7 +80,7 @@ fi
7580
-x NCCL_IB_GID_INDEX=3 \
7681
-x NCCL_ALGO=Ring \
7782
-x NCCL_IB_HCA="${var_NCCL_IB_HCA}" \
78-
--np $np --hostfile $hostfile -N 8 /home/opc/nccl-tests/build/all_reduce_perf -b8 -e 4G -f 2 -n $iter >> $logfile
83+
--np $np --hostfile $hostfile -N 8 /opt/oci-hpc/nccl-test/build/all_reduce_perf -b8 -e 4G -f 2 -n $iter >> $logfile
7984

8085
tail -n 32 $logfile
8186

samples/gpu/qfabv1_nccl_run_alltoall.sh

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@ echo INPUTFILE
2424
cat $hostfile
2525

2626
# will generate rack-aware ordered host file
27-
python3 /home/opc/node_ordering_by_rack.py --input_file $hostfile > /dev/null
27+
if [ $ID == "ol" ] || [ $ID == "centos" ] ; then
28+
python3 /home/opc/node_ordering_by_rack.py --input_file $hostfile > /dev/null
29+
elif [ $ID == "debian" ] || [ $ID == "ubuntu" ] ; then
30+
python3 /home/ubuntu/node_ordering_by_rack.py --input_file $hostfile > /dev/null
31+
fi
32+
2833
hostfile=$ORDEREDMACHINEFILE
2934

3035
echo ORDEREDMACHINEFILE
@@ -87,7 +92,7 @@ fi
8792
-x NCCL_IB_GID_INDEX=3 \
8893
-x NCCL_ALGO=Ring \
8994
-x NCCL_IB_HCA="${var_NCCL_IB_HCA}" \
90-
--np $np --hostfile $hostfile -N 8 /home/opc/nccl-tests/build/alltoall_perf -f 2 -g 1 -c 0 -n $iter >> $logfile
95+
--np $np --hostfile $hostfile -N 8 /opt/oci-hpc/nccl-test/build/alltoall_perf -f 2 -g 1 -c 0 -n $iter >> $logfile
9196

9297
tail -n 15 $logfile
9398

0 commit comments

Comments
 (0)