File tree Expand file tree Collapse file tree 2 files changed +13
-2
lines changed
cookbooks/aws-parallelcluster-platform
resources/nvidia_imex/partial Expand file tree Collapse file tree 2 files changed +13
-2
lines changed Original file line number Diff line number Diff line change 5555 end
5656end
5757
58+ action :configure do
59+ return unless imex_installed
60+ # Start nvidia-imex on p6e-gb200
61+ if get_nvswitch_count ( get_device_ids [ 'gb200' ] ) > 1
62+ service 'nvidia-imex' do
63+ action %i( start enable )
64+ supports status : true
65+ end unless on_docker?
66+ end
67+ end
68+
5869def imex_installed
5970 ::File . exist? ( '/usr/bin/nvidia-imex' ) || ::File . exist? ( '/usr/bin/nvidia-imex-ctl' )
6071end
Original file line number Diff line number Diff line change 2828
2929 nvidia_imex_dir = "#{ node [ 'cluster' ] [ 'shared_dir' ] } /nvidia-imex"
3030
31- %w( "#{nvidia_imex_dir}/config.cfg" "#{nvidia_imex_dir}/nodes_config.cfg" ) . each do |conf_files |
31+ [ "#{ nvidia_imex_dir } /config.cfg" , "#{ nvidia_imex_dir } /nodes_config.cfg" ] . each do |conf_files |
3232 describe file ( conf_files ) do
3333 it { should exist }
3434 its ( 'owner' ) { should eq 'root' }
3939end
4040
4141control 'tag:config_nvidia_fabric_manager_enabled' do
42- only_if { instance . nvs_switch_enabled? }
42+ only_if { instance . nvs_switch_enabled? && node [ 'cluster' ] [ 'node_type' ] == "ComputeFleet" }
4343
4444 describe service ( 'nvidia-imex' ) do
4545 it { should be_enabled }
You can’t perform that action at this time.
0 commit comments