Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@ aws-parallelcluster-cookbook CHANGELOG

This file is used to list changes made in each version of the AWS ParallelCluster cookbook.

3.14.1
------

**CHANGES**
- Add chef attribute `cluster/in_place_update_on_fleet_enabled` to disable in-place updates on compute and login nodes
and achieve better performance at scale.
- Load kernel module `drm_client_lib` before installation of NVIDIA driver, if available on the kernel.

3.14.0
------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@
end
end

# Load kernel modules in best effort
kernel_modules_to_load.each do |km|
execute "Load kernel module if exposed by the kernel: #{km}" do
command "if modinfo #{km}; then modprobe #{km}; fi"
end
end

# Install driver
bash 'nvidia.run advanced' do
user 'root'
Expand Down Expand Up @@ -126,3 +133,7 @@ def nvidia_kernel_module
"kernel-open"
end
end

def kernel_modules_to_load
%w(drm_client_lib)
end
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,27 @@ def self.setup(chef_run, nvidia_driver_version: nil)
end
end

describe 'nvidia_driver:kernel_modules_to_load' do
cached(:chef_run) do
ChefSpec::SoloRunner.new(step_into: ['nvidia_driver'])
end

cached(:resource) do
ConvergeNvidiaDriver.setup(chef_run)
chef_run.find_resource('nvidia_driver', 'setup')
end

it 'returns expected kernel modules' do
expect(resource.kernel_modules_to_load).to eq(%w(drm_client_lib))
end
end

describe 'nvidia_driver:setup' do
for_all_oses do |platform, version|
cached(:nvidia_arch) { 'nvidia_arch' }
cached(:nvidia_kernel_module) { 'nvidia_kernel_module' }
cached(:nvidia_driver_version) { 'nvidia_driver_version' }
cached(:kernel_modules_to_load) { %w(module1 module2) }
cached(:nvidia_driver_url) { "https://us.download.nvidia.com/tesla/#{nvidia_driver_version}/NVIDIA-Linux-#{nvidia_arch}-#{nvidia_driver_version}.run" }

context "on #{platform}#{version} when nvidia_driver not enabled" do
Expand Down Expand Up @@ -176,6 +192,7 @@ def self.setup(chef_run, nvidia_driver_version: nil)
allow(res).to receive(:nvidia_arch).and_return(nvidia_arch)
allow(res).to receive(:nvidia_kernel_module).and_return(kernel_module)
allow(res).to receive(:gcc_major_version_used_by_kernel).and_return(kernel_compiler_version)
allow(res).to receive(:kernel_modules_to_load).and_return(kernel_modules_to_load)
end

stub_command("lsinitramfs /boot/initrd.img-$(uname -r) | grep nouveau").and_return(true)
Expand Down Expand Up @@ -220,6 +237,14 @@ def self.setup(chef_run, nvidia_driver_version: nil)
)
end

it 'loads kernel modules in they are exposed by the kernel' do
kernel_modules_to_load.each do |km|
is_expected.to run_execute("Load kernel module if exposed by the kernel: #{km}").with(
command: "if modinfo #{km}; then modprobe #{km}; fi"
)
end
end

if platform == 'amazon'
compiler_version = version == '2023' ? 'gcc' : 'gcc10'
compiler_path = version == '2023' ? 'CC=/usr/bin/gcc' : 'CC=/usr/bin/gcc10-gcc'
Expand Down
Loading