@@ -80,6 +80,8 @@ function usage(){
8080 fails when not enough memory available on these nodes.
8181 -l use numactl localalloc to CPUs in same NUMA domain. Note: If
8282 memory cannot be allocated, alloc falls back to other nodes.
83+ -nr use numactl ROCR_VISIBLE_DEVICES
84+ -nm use numactl OMPI_COMM_WORLD_LOCAL_RANK
8385 --version Print version of gpurun and exit
8486
8587 Optional Input environment variables:
@@ -161,6 +163,8 @@ while [ "$_end_gpurun_opts" == "0" ] ; do
161163 -vv) GPURUN_VERBOSE=2;;
162164 -m) _use_numactl_membind=1;;
163165 -md) shift ; _devices_per_mdset=$1 ; _uses_multi_device=1;;
166+ -nr) _use_numactl_rocr=1;;
167+ -nm) _use_numactl_ompi=1;;
164168 -l) _use_numactl_localalloc=1;;
165169 -nomask) GPURUN_MASK_POLICY=" nomask" ;;
166170 * ) _end_gpurun_opts=1; break ;;
@@ -199,6 +203,14 @@ if [ -z "$_num_local_ranks" ] && [ ! -z $SLURM_CPUS_ON_NODE ] ; then
199203 _num_local_ranks=$SLURM_CPUS_ON_NODE
200204 _local_rank_num=$SLURM_LOCALID
201205fi
206+ if [ " $_use_numactl_rocr " == " 1" ] ; then
207+ numactl --cpunodebind $ROCR_VISIBLE_DEVICES --membind $ROCR_VISIBLE_DEVICES $*
208+ exit $?
209+ fi
210+ if [ " $_use_numactl_ompi " == " 1" ] ; then
211+ numactl --cpunodebind $OMPI_COMM_WORLD_LOCAL_RANK --membind $OMPI_COMM_WORLD_LOCAL_RANK $*
212+ exit $?
213+ fi
202214# If none of the above MPIs, assume gpurun is wrapper for single process on single GPU
203215if [ -z " $_num_local_ranks " ] ; then
204216 _num_local_ranks=1
0 commit comments