Skip to content

Commit 1251c23

Browse files
author
Himani Anil Deshpande
committed
[NVIDIA_IMEX] Update unit tests
1 parent b6544a2 commit 1251c23

File tree

2 files changed

+137
-112
lines changed

2 files changed

+137
-112
lines changed

cookbooks/aws-parallelcluster-platform/spec/unit/resources/nvidia_imex_spec.rb

Lines changed: 129 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
require 'spec_helper'
22

33
nvidia_version = "1.2.3"
4+
SOURCE_DIR = 'SOURCE_DIR'.freeze
45
nvidia_imex_shared_dir = "SHARED_DIR/nvidia-imex"
56
imex_binary = '/usr/bin/nvidia-imex'
67
imex_ctl_binary = '/usr/bin/nvidia-imex-ctl'
8+
launch_template_id = 'lt-123456789012'
9+
cluster_artifacts_s3_url = 'https://aws_region-aws-parallelcluster.s3.aws_region.AWS_DOMAIN'
710

811
class ConvergeNvidiaImex
912
def self.install(chef_run)
@@ -146,7 +149,6 @@ def self.configure(chef_run)
146149
ConvergeNvidiaImex.install(runner)
147150
end
148151
cached(:node) { chef_run.node }
149-
150152
it 'does not install nvidia-imex' do
151153
is_expected.not_to install_package('nvidia-imex')
152154
end
@@ -167,99 +169,103 @@ def self.configure(chef_run)
167169
end
168170
end
169171

170-
context 'when nvidia is enabled' do
171-
cached(:chef_run) do
172-
stubs_for_resource('nvidia_imex') do |res|
173-
allow(res).to receive(:nvidia_enabled_or_installed?).and_return(true)
174-
allow(File).to receive(:exist?).with(imex_ctl_binary).and_return(false)
175-
allow(File).to receive(:exist?).with(imex_binary).and_return(false)
172+
%w(aarch64 x86_64).each do |arm_or_x86|
173+
context "when nvidia is enabled on #{arm_or_x86}" do
174+
cached(:nvidia_imex_version) { "1.2.3-1" }
175+
cached(:nvidia_imex_package) { "nvidia-imex-1" }
176+
cached(:nvidia_imex_name) do
177+
if %(redhat rocky).include?(platform) || platform == 'amazon' && version == '2023'
178+
"#{nvidia_imex_package}-#{nvidia_imex_version}"
179+
else
180+
"#{nvidia_imex_package}_#{nvidia_imex_version}"
181+
end
176182
end
177-
runner(platform: platform, version: version, step_into: ['nvidia_imex'])
178-
end
179-
cached(:nvidia_imex_version) { "1.2.3-1" }
180-
cached(:nvidia_imex_package) { "nvidia-imex-1" }
181-
cached(:nvidia_imex_name) do
182-
if %(redhat rocky).include?(platform) || platform == 'amazon' && version == '2023'
183-
"#{nvidia_imex_package}-#{nvidia_imex_version}"
184-
else
185-
"#{nvidia_imex_package}_#{nvidia_imex_version}"
183+
cached(:url_arch) do
184+
if %(redhat rocky amazon).include?(platform)
185+
arm_or_x86
186+
elsif platform == 'ubuntu'
187+
arm_or_x86 == 'x86_64' ? 'amd64' : 'arm64'
188+
else
189+
arm_or_x86 == 'x86_64' ? 'x86_64' : 'aarch64'
190+
end
191+
end
192+
cached(:url_suffix) do
193+
if %(redhat rocky).include?(platform)
194+
"rhel#{version}/#{nvidia_imex_name}.#{url_arch}"
195+
elsif platform == 'amazon' && version == '2023'
196+
"amzn2023/#{nvidia_imex_name}.#{url_arch}"
197+
else
198+
"#{platform}#{version.delete('.')}/#{nvidia_imex_name}_#{url_arch}"
199+
end
186200
end
187-
end
188-
cached(:node) { chef_run.node }
189-
190-
before do
191-
chef_run.node.override['cluster']['nvidia']['imex']['shared_dir'] = nvidia_imex_shared_dir
192-
chef_run.node.override['cluster']['region'] = 'aws_region'
193-
chef_run.node.override['cluster']['nvidia']['driver_version'] = nvidia_version
194-
ConvergeNvidiaImex.install(chef_run)
195-
end
196201

197-
if platform == 'amazon' && version == '2'
198-
it 'does not install nvidia-imex' do
199-
is_expected.not_to add_nvidia_repo('add nvidia repository')
200-
is_expected.not_to create_directory(nvidia_imex_shared_dir)
201-
is_expected.not_to create_template("#{nvidia_imex_shared_dir}/config.cfg")
202-
.with(source: 'nvidia-imex/nvidia-imex-config.erb')
203-
.with(user: 'root')
204-
.with(group: 'root')
205-
.with(mode: '0755')
206-
is_expected.not_to create_template("#{nvidia_imex_shared_dir}/nodes_config.cfg")
207-
.with(source: 'nvidia-imex/nvidia-imex-nodes.erb')
208-
.with(user: 'root')
209-
.with(group: 'root')
210-
.with(mode: '0755')
211-
is_expected.not_to create_template("/etc/systemd/system/nvidia-imex.service")
212-
.with(source: 'nvidia-imex/nvidia-imex.service.erb')
213-
.with(user: 'root')
214-
.with(group: 'root')
215-
.with(mode: '0644')
216-
is_expected.not_to install_install_packages('Install nvidia-imex')
217-
.with(packages: "#{nvidia_imex_name}")
218-
.with(action: %i(install))
202+
cached(:chef_run) do
203+
stubs_for_resource('nvidia_imex') do |res|
204+
allow(res).to receive(:nvidia_enabled_or_installed?).and_return(true)
205+
allow(File).to receive(:exist?).with(imex_ctl_binary).and_return(false)
206+
allow(File).to receive(:exist?).with(imex_binary).and_return(false)
207+
end
208+
runner(platform: platform, version: version, step_into: ['nvidia_imex'])
219209
end
220-
it 'does not set nvidia-imex version' do
221-
expect(node.default['cluster']['nvidia']['imex']['version']).not_to eq(nvidia_imex_version)
222-
expect(node.default['cluster']['nvidia']['imex']['package']).not_to eq(nvidia_imex_package)
223-
is_expected.not_to write_node_attributes('dump node attributes')
224-
is_expected.not_to remove_nvidia_repo('remove nvidia repository')
210+
cached(:node) { chef_run.node }
211+
212+
before do
213+
chef_run.node.override['cluster']['nvidia']['imex']['shared_dir'] = nvidia_imex_shared_dir
214+
chef_run.node.override['cluster']['artifacts_s3_url'] = cluster_artifacts_s3_url
215+
chef_run.node.override['cluster']['region'] = 'aws_region'
216+
chef_run.node.override['cluster']['sources_dir'] = SOURCE_DIR
217+
chef_run.node.automatic['kernel']['machine'] = arm_or_x86
218+
chef_run.node.override['cluster']['nvidia']['driver_version'] = nvidia_version
219+
ConvergeNvidiaImex.install(chef_run)
225220
end
226-
else
227-
it 'installs nvidia-imex' do
228-
is_expected.to add_nvidia_repo('add nvidia repository')
229-
is_expected.to create_directory(nvidia_imex_shared_dir)
230-
231-
is_expected.to create_template("#{nvidia_imex_shared_dir}/config.cfg")
232-
.with(source: 'nvidia-imex/nvidia-imex-config.erb')
233-
.with(user: 'root')
234-
.with(group: 'root')
235-
.with(mode: '0755')
236-
is_expected.to create_template("#{nvidia_imex_shared_dir}/nodes_config.cfg")
237-
.with(source: 'nvidia-imex/nvidia-imex-nodes.erb')
238-
.with(user: 'root')
239-
.with(group: 'root')
240-
.with(mode: '0755')
241-
is_expected.to create_template("/etc/systemd/system/nvidia-imex.service")
242-
.with(source: 'nvidia-imex/nvidia-imex.service.erb')
243-
.with(user: 'root')
244-
.with(group: 'root')
245-
.with(mode: '0644')
246-
if platform == 'ubuntu'
247-
is_expected.to install_apt_package('Install nvidia-imex')
248-
.with(package_name: nvidia_imex_package)
249-
.with(version: nvidia_imex_version)
250-
.with(retries: 10)
251-
.with(retry_delay: 5)
252-
else
253-
is_expected.to install_install_packages('Install nvidia-imex')
254-
.with(packages: nvidia_imex_name)
221+
if platform == 'amazon' && version == '2'
222+
it 'does not install nvidia-imex' do
223+
is_expected.not_to create_directory(nvidia_imex_shared_dir)
224+
is_expected.not_to install_install_packages('Install nvidia-imex')
225+
.with(packages: "#{nvidia_imex_name}")
255226
.with(action: %i(install))
256227
end
257-
end
258-
it 'sets nvidia-imex version' do
259-
expect(node.default['cluster']['nvidia']['imex']['version']).to eq(nvidia_imex_version)
260-
expect(node.default['cluster']['nvidia']['imex']['package']).to eq(nvidia_imex_package)
261-
is_expected.to write_node_attributes('dump node attributes')
262-
is_expected.to remove_nvidia_repo('remove nvidia repository')
228+
it 'does not set nvidia-imex version' do
229+
expect(node.default['cluster']['nvidia']['imex']['version']).not_to eq(nvidia_imex_version)
230+
expect(node.default['cluster']['nvidia']['imex']['package']).not_to eq(nvidia_imex_package)
231+
is_expected.not_to write_node_attributes('dump node attributes')
232+
end
233+
else
234+
235+
it 'installs nvidia-imex' do
236+
is_expected.to create_directory(nvidia_imex_shared_dir)
237+
if platform == 'ubuntu'
238+
is_expected.to create_if_missing_remote_file("#{SOURCE_DIR}/#{nvidia_imex_package}-#{nvidia_imex_version}.deb").with(
239+
source: "#{cluster_artifacts_s3_url}/dependencies/nvidia_imex/#{url_suffix}.deb",
240+
mode: '0644',
241+
retries: 3,
242+
retry_delay: 5
243+
)
244+
is_expected.to run_bash('Install nvidia-imex')
245+
.with(user: 'root')
246+
.with_retries(3)
247+
.with_retry_delay(5)
248+
.with_code(/ set -e\n dpkg -i #{nvidia_imex_package}-#{nvidia_imex_version}.deb && apt-mark hold #{nvidia_imex_package}/)
249+
else
250+
is_expected.to create_if_missing_remote_file("#{SOURCE_DIR}/#{nvidia_imex_package}-#{nvidia_imex_version}.rpm").with(
251+
source: "#{cluster_artifacts_s3_url}/dependencies/nvidia_imex/#{url_suffix}.rpm",
252+
mode: '0644',
253+
retries: 3,
254+
retry_delay: 5
255+
)
256+
is_expected.to install_package('yum-plugin-versionlock')
257+
is_expected.to run_bash("Install nvidia-imex")
258+
.with(user: 'root')
259+
.with_retries(3)
260+
.with_retry_delay(5)
261+
.with_code(/yum install -y #{nvidia_imex_name}.rpm/)
262+
end
263+
end
264+
it 'sets nvidia-imex version' do
265+
expect(node.default['cluster']['nvidia']['imex']['version']).to eq(nvidia_imex_version)
266+
expect(node.default['cluster']['nvidia']['imex']['package']).to eq(nvidia_imex_package)
267+
is_expected.to write_node_attributes('dump node attributes')
268+
end
263269
end
264270
end
265271
end
@@ -299,16 +305,52 @@ def self.configure(chef_run)
299305

300306
before do
301307
chef_run.node.override['cluster']['region'] = 'aws_region'
308+
chef_run.node.override['cluster']['nvidia']['imex']['shared_dir'] = nvidia_imex_shared_dir
302309
chef_run.node.override['cluster']['node_type'] = node_type
310+
chef_run.node.override['cluster']['launch_template_id'] = launch_template_id
303311
ConvergeNvidiaImex.configure(chef_run)
304312
end
305313

306314
if (platform == 'amazon' && version == '2') || %w(HeadNode LoginNode).include?(node_type)
307315
it 'does not configure nvidia-imex' do
316+
is_expected.not_to create_template("#{nvidia_imex_shared_dir}/nodes_config_#{launch_template_id}.cfg")
317+
.with(source: 'nvidia-imex/nvidia-imex-nodes.erb')
318+
.with(user: 'root')
319+
.with(group: 'root')
320+
.with(mode: '0755')
321+
is_expected.not_to create_template("#{nvidia_imex_shared_dir}/config_#{launch_template_id}.cfg")
322+
.with(source: 'nvidia-imex/nvidia-imex-config.erb')
323+
.with(user: 'root')
324+
.with(group: 'root')
325+
.with(mode: '0755')
326+
.with(variables: { imex_nodes_config_file_path: "#{nvidia_imex_shared_dir}/nodes_config_#{launch_template_id}.cfg" })
327+
is_expected.not_to create_template("/etc/systemd/system/nvidia-imex.service")
328+
.with(source: 'nvidia-imex/nvidia-imex.service.erb')
329+
.with(user: 'root')
330+
.with(group: 'root')
331+
.with(mode: '0644')
332+
.with(variables: { imex_main_config_file_path: "#{nvidia_imex_shared_dir}/config_#{launch_template_id}.cfg" })
308333
is_expected.not_to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true })
309334
end
310335
else
311336
it 'it starts nvidia-imex service' do
337+
is_expected.to create_template("#{nvidia_imex_shared_dir}/nodes_config_#{launch_template_id}.cfg")
338+
.with(source: 'nvidia-imex/nvidia-imex-nodes.erb')
339+
.with(user: 'root')
340+
.with(group: 'root')
341+
.with(mode: '0755')
342+
is_expected.to create_template("#{nvidia_imex_shared_dir}/config_#{launch_template_id}.cfg")
343+
.with(source: 'nvidia-imex/nvidia-imex-config.erb')
344+
.with(user: 'root')
345+
.with(group: 'root')
346+
.with(mode: '0755')
347+
.with(variables: { imex_nodes_config_file_path: "#{nvidia_imex_shared_dir}/nodes_config_#{launch_template_id}.cfg" })
348+
is_expected.to create_template("/etc/systemd/system/nvidia-imex.service")
349+
.with(source: 'nvidia-imex/nvidia-imex.service.erb')
350+
.with(user: 'root')
351+
.with(group: 'root')
352+
.with(mode: '0644')
353+
.with(variables: { imex_main_config_file_path: "#{nvidia_imex_shared_dir}/config_#{launch_template_id}.cfg" })
312354
is_expected.to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true })
313355
end
314356
end

cookbooks/aws-parallelcluster-platform/test/controls/nvidia_imex_spec.rb

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -22,31 +22,6 @@
2222
end
2323
end
2424

25-
nvidia_imex_dir = "#{node['cluster']['nvidia']['imex']['shared_dir']}"
26-
27-
describe file("#{nvidia_imex_dir}/config.cfg") do
28-
it { should exist }
29-
its('owner') { should eq 'root' }
30-
its('group') { should eq 'root' }
31-
its('mode') { should cmp '0755' }
32-
its('content') { should match %r{IMEX_NODE_CONFIG_FILE=#{nvidia_imex_dir}/nodes_config.cfg} }
33-
end
34-
35-
describe file("#{nvidia_imex_dir}/nodes_config.cfg") do
36-
it { should exist }
37-
its('owner') { should eq 'root' }
38-
its('group') { should eq 'root' }
39-
its('mode') { should cmp '0755' }
40-
end
41-
42-
describe file("/etc/systemd/system/#{nvidia_imex_service}.service") do
43-
it { should exist }
44-
its('owner') { should eq 'root' }
45-
its('group') { should eq 'root' }
46-
its('mode') { should cmp '0644' }
47-
its('content') { should match %r{ExecStart=/usr/bin/nvidia-imex -c #{nvidia_imex_dir}/config.cfg} }
48-
end
49-
5025
describe package("#{node['cluster']['nvidia']['imex']['package']}") do
5126
it { should be_installed }
5227
its('version') { should match /#{node['cluster']['nvidia']['imex']['version']}/ }
@@ -56,6 +31,14 @@
5631
control 'tag:config_nvidia_fabric_manager_enabled' do
5732
only_if { instance.nvs_switch_enabled? && node['cluster']['node_type'] == "ComputeFleet" && !os_properties.alinux2? }
5833

34+
describe file("/etc/systemd/system/nvidia-imex.service") do
35+
it { should exist }
36+
its('owner') { should eq 'root' }
37+
its('group') { should eq 'root' }
38+
its('mode') { should cmp '0644' }
39+
its('content') { should match %r{ExecStart=/usr/bin/nvidia-imex -c #{node['cluster']['nvidia']['imex']['shared_dir']}} }
40+
end
41+
5942
describe service('nvidia-imex') do
6043
it { should be_enabled }
6144
it { should be_running }

0 commit comments

Comments
 (0)