@@ -378,38 +378,39 @@ module load intelmpi && mpirun --help | grep '#{node['cfncluster']['intelmpi']['
378378 TESTNVIDIA
379379end
380380
381- bash 'test CUDA install' do
382- cwd Chef ::Config [ :file_cache_path ]
383- code <<-TESTCUDA
384- has_gpu=$(lspci | grep -o "NVIDIA")
385- if [ -z "$has_gpu" ]; then
386- echo "No GPU detected, no test needed."
387- exit 0
388- fi
389-
390- set -e
391- cuda_ver="#{ node [ 'cfncluster' ] [ 'nvidia' ] [ 'cuda_version' ] } "
392- # Test CUDA installation
393- echo "Testing CUDA install with nvcc..."
394- export PATH=/usr/local/cuda-$cuda_ver/bin:$PATH
395- export LD_LIBRARY_PATH=/usr/local/cuda-$cuda_ver/lib64:$LD_LIBRARY_PATH
396- # grep CUDA version from nvcc output. If CUDA is not installed nvcc command will fail
397- cuda_output=$(nvcc -V | grep -E -o "release [0-9]+.[0-9]+")
398- if [ "$cuda_output" != "release $cuda_ver" ]; then
399- echo "CUDA installed incorrectly! Installed $cuda_output but expected $cuda_ver"
400- exit 1
401- else
402- echo "CUDA nvcc test passed, $cuda_output"
403- fi
404-
405- # Test deviceQuery
406- echo "Testing CUDA install with deviceQuery..."
407- /usr/local/cuda-$cuda_ver/extras/demo_suite/deviceQuery | grep -o "Result = PASS"
408- echo "CUDA deviceQuery test passed"
409- echo "Correctly installed CUDA $cuda_output"
410- TESTCUDA
381+ unless node [ 'cfncluster' ] [ 'cfn_base_os' ] == 'alinux' && get_nvswitches > 1
382+ bash 'test CUDA install' do
383+ cwd Chef ::Config [ :file_cache_path ]
384+ code <<-TESTCUDA
385+ has_gpu=$(lspci | grep -o "NVIDIA")
386+ if [ -z "$has_gpu" ]; then
387+ echo "No GPU detected, no test needed."
388+ exit 0
389+ fi
390+
391+ set -e
392+ cuda_ver="#{ node [ 'cfncluster' ] [ 'nvidia' ] [ 'cuda_version' ] } "
393+ # Test CUDA installation
394+ echo "Testing CUDA install with nvcc..."
395+ export PATH=/usr/local/cuda-$cuda_ver/bin:$PATH
396+ export LD_LIBRARY_PATH=/usr/local/cuda-$cuda_ver/lib64:$LD_LIBRARY_PATH
397+ # grep CUDA version from nvcc output. If CUDA is not installed nvcc command will fail
398+ cuda_output=$(nvcc -V | grep -E -o "release [0-9]+.[0-9]+")
399+ if [ "$cuda_output" != "release $cuda_ver" ]; then
400+ echo "CUDA installed incorrectly! Installed $cuda_output but expected $cuda_ver"
401+ exit 1
402+ else
403+ echo "CUDA nvcc test passed, $cuda_output"
404+ fi
405+
406+ # Test deviceQuery
407+ echo "Testing CUDA install with deviceQuery..."
408+ /usr/local/cuda-$cuda_ver/extras/demo_suite/deviceQuery | grep -o "Result = PASS"
409+ echo "CUDA deviceQuery test passed"
410+ echo "Correctly installed CUDA $cuda_output"
411+ TESTCUDA
412+ end
411413end
412-
413414###################
414415# FabricManager
415416###################
0 commit comments