|
40 | 40 | BIND_LOCATION="${BIND_INFO%;*}" |
41 | 41 | CUDA_DEVICES="${BIND_INFO#*;}" |
42 | 42 |
|
| 43 | +# check for nvidia-smi vs. rocm-smi command |
43 | 44 | if [[ "$CUDA_DEVICES" != "NONE" ]]; then |
44 | | - export CUDA_VISIBLE_DEVICES=$CUDA_DEVICES |
45 | | - export ROCR_VISIBLE_DEVICES=$CUDA_DEVICES |
46 | | - export HIP_VISIBLE_DEVICES=$CUDA_DEVICES |
| 45 | + if command -v nvidia-smi &> /dev/null && nvidia-smi -L &> /dev/null; then |
| 46 | + export CUDA_VISIBLE_DEVICES="$CUDA_DEVICES" |
| 47 | + elif command -v rocm-smi &> /dev/null && rocm-smi &> /dev/null; then |
| 48 | + export ROCR_VISIBLE_DEVICES="$CUDA_DEVICES" |
| 49 | + else |
| 50 | + echo "Warning: GPU binding requested, but neither nvidia-smi nor rocm-smi found. GPU assignment may not work." >&2 |
| 51 | + fi |
47 | 52 | fi |
48 | 53 |
|
49 | 54 | if [[ "${BIND_LOCATION}" == "UNBOUND" ]]; then |
@@ -87,8 +92,11 @@ if [[ "$FLUXBIND_QUIET" != "1" ]] |
87 | 92 | echo -e "${prefix}: Effective Cpuset Mask: ${CYAN}$cpuset_mask${RESET}" |
88 | 93 | echo -e "${prefix}: Logical CPUs (PUs): ${BLUE}${logical_cpu_list:-none}${RESET}" |
89 | 94 | echo -e "${prefix}: Physical Cores: ${ORANGE}${physical_core_list:-none}${RESET}" |
90 | | - if [[ "$CUDA_DEVICES" != "NONE" ]]; then |
91 | | - echo -e "${prefix}: CUDA Devices: ${YELLOW}${CUDA_DEVICES}${RESET}" |
| 95 | + if [[ ! -z "$CUDA_VISIBLE_DEVICES" ]]; then |
| 96 | + echo -e "${prefix}: CUDA Devices: ${YELLOW}${CUDA_VISIBLE_DEVICES}${RESET}" |
| 97 | + fi |
| 98 | + if [[ ! -z "$ROCR_VISIBLE_DEVICES" ]]; then |
| 99 | + echo -e "${prefix}: ROCR Devices: ${YELLOW}${ROCR_VISIBLE_DEVICES}${RESET}" |
92 | 100 | fi |
93 | 101 | echo |
94 | 102 | fi |
|
0 commit comments