Skip to content

Commit a7f0576

Browse files
author
Himani Anil Deshpande
committed
[NVIDIA-IMEX] Adding Unit tests for Configuration of nvidia-imex
1 parent a4ca747 commit a7f0576

File tree

4 files changed

+98
-11
lines changed

4 files changed

+98
-11
lines changed

cookbooks/aws-parallelcluster-platform/resources/nvidia_imex/nvidia_imex_amazon2.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,7 @@ def imex_installed
2020
# We do not install NVIDIA-Imex for Alinux2 due to restriction on NVIDIA driver
2121
true
2222
end
23+
24+
action :configure do
25+
# Do nothing
26+
end

cookbooks/aws-parallelcluster-platform/resources/nvidia_imex/partial/_nvidia_imex_common.rb

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,23 +24,23 @@
2424
action :add
2525
end
2626

27-
directory "#{node['cluster']['shared_dir']}/nvidia-imex"
27+
directory "#{node['cluster']['shared_dir']}/#{nvidia_imex_service}"
2828

29-
template "#{node['cluster']['shared_dir']}/nvidia-imex/config.cfg" do
29+
template "#{node['cluster']['shared_dir']}/#{nvidia_imex_service}/config.cfg" do
3030
source 'nvidia-imex/nvidia-imex-config.erb'
3131
owner 'root'
3232
group 'root'
3333
mode '0755'
3434
end
3535

36-
template "#{node['cluster']['shared_dir']}/nvidia-imex/nodes_config.cfg" do
36+
template "#{node['cluster']['shared_dir']}/#{nvidia_imex_service}/nodes_config.cfg" do
3737
source 'nvidia-imex/nvidia-imex-nodes.erb'
3838
owner 'root'
3939
group 'root'
4040
mode '0755'
4141
end
4242

43-
template "/etc/systemd/system/nvidia-imex.service" do
43+
template "/etc/systemd/system/#{nvidia_imex_service}.service" do
4444
source 'nvidia-imex/nvidia-imex.service.erb'
4545
owner 'root'
4646
group 'root'
@@ -49,7 +49,7 @@
4949
end
5050

5151
install_packages 'Install nvidia-imex' do
52-
packages "nvidia-imex-#{_nvidia_imex_version}"
52+
packages "#{nvidia_imex_service}-#{_nvidia_imex_version}"
5353
action :install
5454
end
5555
# Save Imex version in Node Attributes for InSpec Tests
@@ -61,15 +61,19 @@
6161
return unless imex_installed
6262
# Start nvidia-imex on p6e-gb200
6363
if get_nvswitch_count(get_device_ids['gb200']) > 1
64-
service 'nvidia-imex' do
64+
service nvidia_imex_service do
6565
action %i(start enable)
6666
supports status: true
67-
end unless on_docker?
67+
end
6868
end
6969
end
7070

71+
def nvidia_imex_service
72+
'nvidia-imex'
73+
end
74+
7175
def imex_installed
72-
::File.exist?('/usr/bin/nvidia-imex') || ::File.exist?('/usr/bin/nvidia-imex-ctl')
76+
::File.exist?("/usr/bin/#{nvidia_imex_service}") || ::File.exist?("/usr/bin/#{nvidia_imex_service}-ctl")
7377
end
7478

7579
def nvidia_enabled_or_installed?

cookbooks/aws-parallelcluster-platform/spec/unit/resources/nvidia_imex_spec.rb

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@ def self.install(chef_run)
1414
end
1515
end
1616
end
17+
18+
def self.configure(chef_run)
19+
chef_run.converge_dsl('aws-parallelcluster-platform') do
20+
nvidia_imex 'configure' do
21+
action :configure
22+
end
23+
end
24+
end
1725
end
1826

1927
describe 'nvidia_imex:nvidia_enabled_or_installed?' do
@@ -245,3 +253,72 @@ def self.install(chef_run)
245253
end
246254
end
247255
end
256+
257+
describe 'nvidia_imex:configure' do
258+
for_all_oses do |platform, version|
259+
context "on #{platform}#{version}" do
260+
context 'when nvidia-imex binary is not installed' do
261+
cached(:chef_run) do
262+
stubs_for_resource('nvidia_imex') do |res|
263+
allow(res).to receive(:imex_installed).and_return(false)
264+
end
265+
runner = runner(platform: platform, version: version, step_into: ['nvidia_imex'])
266+
ConvergeNvidiaImex.configure(runner)
267+
end
268+
cached(:node) { chef_run.node }
269+
270+
it 'does not configure nvidia-imex' do
271+
is_expected.not_to configure_nvidia_imex('nvidia-imex')
272+
end
273+
end
274+
275+
context 'when get_nvswitch_count > 1' do
276+
cached(:chef_run) do
277+
stubs_for_provider('nvidia_imex[configure]') do |pro|
278+
allow(pro).to receive(:imex_installed).and_return(true)
279+
allow(pro).to receive(:get_device_ids).and_return({ 'gb200' => 'test' })
280+
allow(pro).to receive(:get_nvswitch_count).with('test').and_return(4)
281+
end
282+
runner = runner(platform: platform, version: version, step_into: ['nvidia_imex'])
283+
ConvergeNvidiaImex.configure(runner)
284+
end
285+
cached(:node) { chef_run.node }
286+
287+
before do
288+
chef_run.node.override['cluster']['region'] = 'aws_region'
289+
end
290+
291+
if platform == 'amazon' && version == '2'
292+
it 'does not configure nvidia-imex' do
293+
is_expected.not_to start_service('nvidia-imex').with_action(%i(start enable)).with_supports({ status: true })
294+
end
295+
else
296+
it 'starts nvidia-imex service' do
297+
is_expected.to start_service('nvidia-imex').with_action(%i(start enable)).with_supports({ status: true })
298+
end
299+
end
300+
end
301+
302+
context 'when get_nvswitch_count <= 1' do
303+
cached(:chef_run) do
304+
stubs_for_provider('nvidia_imex[configure]') do |pro|
305+
allow(pro).to receive(:imex_installed).and_return(true)
306+
allow(pro).to receive(:get_device_ids).and_return({ 'gb200' => 'test' })
307+
allow(pro).to receive(:get_nvswitch_count).with('test').and_return(1)
308+
end
309+
runner = runner(platform: platform, version: version, step_into: ['nvidia_imex'])
310+
ConvergeNvidiaImex.configure(runner)
311+
end
312+
cached(:node) { chef_run.node }
313+
314+
before do
315+
chef_run.node.override['cluster']['region'] = 'aws_region'
316+
end
317+
318+
it 'does not configure nvidia-imex' do
319+
is_expected.not_to start_service('nvidia-imex').with_action(%i(start enable)).with_supports({ status: true })
320+
end
321+
end
322+
end
323+
end
324+
end

cookbooks/aws-parallelcluster-platform/test/controls/nvidia_imex_spec.rb

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,14 @@
1212
control 'tag:install_expected_versions_of_nvidia_imex_installed' do
1313
only_if { ['yes', true, 'true'].include?(node['cluster']['nvidia']['enabled']) }
1414

15-
describe package('nvidia-imex') do
15+
nvidia_imex_service = 'nvidia-imex'
16+
17+
describe package(nvidia_imex_service) do
1618
it { should be_installed }
1719
its('version') { should match /#{node['cluster']['nvidia']['imex']['version']}/ }
1820
end
1921

20-
%w(/usr/bin/nvidia-imex /usr/bin/nvidia-imex-ctl).each do |path|
22+
["/usr/bin/#{nvidia_imex_service}", "/usr/bin/#{nvidia_imex_service}-ctl"].each do |path|
2123
describe file(path) do
2224
it { should exist }
2325
its('owner') { should eq 'root' }
@@ -26,7 +28,7 @@
2628
end
2729
end
2830

29-
nvidia_imex_dir = "#{node['cluster']['shared_dir']}/nvidia-imex"
31+
nvidia_imex_dir = "#{node['cluster']['shared_dir']}/#{nvidia_imex_service}"
3032

3133
["#{nvidia_imex_dir}/config.cfg", "#{nvidia_imex_dir}/nodes_config.cfg"].each do |conf_files|
3234
describe file(conf_files) do

0 commit comments

Comments
 (0)