|
| 1 | +#!/bin/bash |
| 2 | +set -e |
| 3 | + |
| 4 | +# number of times to run the nccl test to stress the GPUs and RDMA network. This is different from -n iterations parameter of nccl allreduce which is set below using $iter |
| 5 | +max=$1 |
| 6 | + |
| 7 | +# This assume, the hostfile passed is already ordered based on their rackId |
| 8 | +if [ -n "$2" ]; then |
| 9 | + hostfile=$2 |
| 10 | +else |
| 11 | + hostfile="/tmp/ordered_hostfile_system_name" |
| 12 | +fi |
| 13 | + |
| 14 | +ORDEREDMACHINEFILE="ordered_hostfile_system_name" |
| 15 | +ORDEREDRANKMACHINEFILE="rankfile_system_name" |
| 16 | +echo INPUTFILE |
| 17 | +cat $hostfile |
| 18 | + |
| 19 | +# will generate rack-aware ordered host file |
| 20 | +source /etc/os-release |
| 21 | +if [ $ID == "ol" ] || [ $ID == "centos" ] ; then |
| 22 | + python3 /home/opc/node_ordering_by_rack.py --input_file $hostfile > /dev/null |
| 23 | +elif [ $ID == "debian" ] || [ $ID == "ubuntu" ] ; then |
| 24 | + python3 /home/ubuntu/node_ordering_by_rack.py --input_file $hostfile > /dev/null |
| 25 | +fi |
| 26 | + |
| 27 | +hostfile=$ORDEREDMACHINEFILE |
| 28 | +rankfile=$ORDEREDRANKMACHINEFILE |
| 29 | + |
| 30 | +echo ORDEREDMACHINEFILE |
| 31 | +cat $ORDEREDMACHINEFILE |
| 32 | +echo ORDEREDRANKMACHINEFILE |
| 33 | +cat $ORDEREDRANKMACHINEFILE |
| 34 | + |
| 35 | +# The number of GPUs to use for the test. Has to be multiplier of 8. If not passed, all GPUs will be used. |
| 36 | +if [ -n "$3" ]; then |
| 37 | + np=$3 |
| 38 | +else |
| 39 | + np=$((`less $hostfile | wc -l` * 8 )) |
| 40 | +fi |
| 41 | + |
| 42 | +logfile="nccl_run_allreduce.sh.log" |
| 43 | + |
| 44 | +for x in $(seq 1 1 $max) |
| 45 | +do |
| 46 | + |
| 47 | + echo $x |
| 48 | + echo $x >> $logfile |
| 49 | + date >> $logfile |
| 50 | + |
| 51 | + rankfile=$rankfile; np=$np ; iter=20; |
| 52 | + |
| 53 | + mpivars_path=`ls /usr/mpi/gcc/openmpi-*/bin/mpivars.sh` |
| 54 | + source $mpivars_path |
| 55 | + |
| 56 | + if [[ "$mpivars_path" == "" ]]; then echo "Could not find MPIPATH"; exit; fi |
| 57 | + |
| 58 | +first_node=`head $hostfile -n 1` |
| 59 | +shape=`ssh $first_node 'curl -sH "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/' | jq .shape` |
| 60 | +if [ $shape == \"BM.GPU.B4.8\" ] || [ $shape == \"BM.GPU.A100-v2.8\" ] |
| 61 | +then |
| 62 | + var_UCX_NET_DEVICES=mlx5_0:1 |
| 63 | +elif [ $shape == \"BM.GPU4.8\" ] |
| 64 | +then |
| 65 | + var_UCX_NET_DEVICES=mlx5_4:1 |
| 66 | +fi |
| 67 | + |
| 68 | + # final version |
| 69 | + # all NCCL parameters are at /etc/nccl.conf on each compute node. |
| 70 | + mpirun --mca pml ucx \ |
| 71 | + --bind-to numa \ |
| 72 | + --mca coll ^hcoll \ |
| 73 | + -x UCX_TLS=ud,self,sm \ |
| 74 | + -x UCX_NET_DEVICES=${var_UCX_NET_DEVICES} \ |
| 75 | + -x HCOLL_ENABLE_MCAST_ALL=0 \ |
| 76 | + -x coll_hcoll_enable=0 \ |
| 77 | + -x NCCL_ALGO=Ring \ |
| 78 | + --np $np --rankfile $rankfile /opt/oci-hpc/nccl-test/build/all_reduce_perf -b1G -e10G -i$((1024*1024*1024*9)) -n $iter >> $logfile |
| 79 | + |
| 80 | + tail -n 32 $logfile |
| 81 | + |
| 82 | + |
| 83 | +done |
| 84 | + |
| 85 | + |
0 commit comments