Skip to content

Commit f9a2c4c

Browse files
author
Himani Anil Deshpande
committed
[NVIDIA-IMEX] Configure Nvidia-imex only if we use Gb200 instance
1 parent 11cd7c7 commit f9a2c4c

File tree

2 files changed

+13
-2
lines changed

2 files changed

+13
-2
lines changed

cookbooks/aws-parallelcluster-platform/resources/nvidia_imex/partial/_nvidia_imex_common.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,17 @@
5555
end
5656
end
5757

58+
action :configure do
59+
return unless imex_installed
60+
# Start nvidia-imex on p6e-gb200
61+
if get_nvswitch_count(get_device_ids['gb200']) > 1
62+
service 'nvidia-imex' do
63+
action %i(start enable)
64+
supports status: true
65+
end unless on_docker?
66+
end
67+
end
68+
5869
def imex_installed
5970
::File.exist?('/usr/bin/nvidia-imex') || ::File.exist?('/usr/bin/nvidia-imex-ctl')
6071
end

cookbooks/aws-parallelcluster-platform/test/controls/nvidia_imex_spec.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
nvidia_imex_dir = "#{node['cluster']['shared_dir']}/nvidia-imex"
3030

31-
%w("#{nvidia_imex_dir}/config.cfg" "#{nvidia_imex_dir}/nodes_config.cfg").each do |conf_files|
31+
["#{nvidia_imex_dir}/config.cfg", "#{nvidia_imex_dir}/nodes_config.cfg"].each do |conf_files|
3232
describe file(conf_files) do
3333
it { should exist }
3434
its('owner') { should eq 'root' }
@@ -39,7 +39,7 @@
3939
end
4040

4141
control 'tag:config_nvidia_fabric_manager_enabled' do
42-
only_if { instance.nvs_switch_enabled? }
42+
only_if { instance.nvs_switch_enabled? && node['cluster']['node_type'] == "ComputeFleet" }
4343

4444
describe service('nvidia-imex') do
4545
it { should be_enabled }

0 commit comments

Comments
 (0)