Skip to content

Commit 74ffc4d

Browse files
committed
[nvidia] On Ubuntu22, install the NVIDIA driver using the gcc version used to compile the kernel.
This is required because, NVIDIA driver must be compiled with the same gcc version used by the kernel. If this is not the case, the NVIDIA driver installation would fail a compiler version check. On newer version of Ubuntu22.04 (kernel 6.8+), the kernel is compiled with gcc-12, however gcc-11 is installed as default version by build-essentials, making this change necessary. Signed-off-by: Giacomo Marciani <[email protected]>
1 parent f3219fb commit 74ffc4d

File tree

4 files changed

+50
-3
lines changed

4 files changed

+50
-3
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# See the License for the specific language governing permissions and limitations under the License.
1414

1515
provides :nvidia_driver, platform: 'ubuntu' do |node|
16-
node['platform_version'].to_i >= 20
16+
node['platform_version'].to_i == 20
1717
end
1818

1919
use 'partial/_nvidia_driver_common.rb'
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# frozen_string_literal: true
2+
3+
# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License").
6+
# You may not use this file except in compliance with the License.
7+
# A copy of the License is located at
8+
#
9+
# http://aws.amazon.com/apache2.0/
10+
#
11+
# or in the "LICENSE.txt" file accompanying this file.
12+
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
13+
# See the License for the specific language governing permissions and limitations under the License.
14+
15+
provides :nvidia_driver, platform: 'ubuntu' do |node|
16+
node['platform_version'].to_i == 22
17+
end
18+
19+
use 'partial/_nvidia_driver_common.rb'
20+
21+
def rebuild_initramfs?
22+
true
23+
end
24+
25+
def compiler_path
26+
gcc_major_version = get_gcc_major_version_used_by_kernel
27+
28+
# If the gcc version used to compile the kernel cannot be detected,
29+
# empty string is returned, meaning that the NVIDIA driver will be compiled
30+
# using the system default compiler.
31+
return "" if gcc_major_version.nil?
32+
33+
"CC=/usr/bin/gcc-#{gcc_major_version}"
34+
end

cookbooks/aws-parallelcluster-platform/resources/nvidia_driver/partial/_nvidia_driver_common.rb

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,13 @@
7373
end
7474

7575
# Install driver
76-
# TODO remove --no-cc-version-check when we can update ubuntu 22 images
7776
bash 'nvidia.run advanced' do
7877
user 'root'
7978
group 'root'
8079
cwd '/tmp'
8180
code <<-NVIDIA
8281
set -e
83-
#{compiler_path} ./nvidia.run --silent --dkms --disable-nouveau --no-cc-version-check -m=#{nvidia_kernel_module}
82+
#{compiler_path} ./nvidia.run --silent --dkms --disable-nouveau -m=#{nvidia_kernel_module}
8483
rm -f /tmp/nvidia.run
8584
NVIDIA
8685
creates '/usr/bin/nvidia-smi'

cookbooks/aws-parallelcluster-shared/libraries/helpers.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,17 @@ def wait_sync_file(path)
106106
timeout 5
107107
end
108108
end
109+
110+
def get_gcc_major_version_used_by_kernel
111+
# Detects the gcc major version used to compile the kernel, e.g. 12.
112+
# If the version cannot be detected, nil is returned.
113+
begin
114+
gcc_full_version = shell_out!("awk '{print $8}' /proc/version | tr -d ',' | cut -d '.' -f 1").stdout.strip
115+
gcc_major_version = gcc_full_version.split('.')[0].to_i
116+
rescue => error
117+
Chef::Log.error("Cannot detect gcc version used to compile the kernel: #{error}")
118+
return nil
119+
end
120+
Chef::Log.info("Detected version of gcc used to compile the kernel is: #{gcc_major_version}")
121+
gcc_major_version
122+
end

0 commit comments

Comments
 (0)