Skip to content

Commit 3e25057

Browse files
committed
DONOTMERGE Force NVIDIA driver to use the same gcc version used to compile the kernel
Signed-off-by: Giacomo Marciani <[email protected]>
1 parent f3219fb commit 3e25057

File tree

3 files changed

+43
-2
lines changed

3 files changed

+43
-2
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# See the License for the specific language governing permissions and limitations under the License.
1414

1515
provides :nvidia_driver, platform: 'ubuntu' do |node|
16-
node['platform_version'].to_i >= 20
16+
node['platform_version'].to_i == 20
1717
end
1818

1919
use 'partial/_nvidia_driver_common.rb'
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# frozen_string_literal: true
2+
3+
# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License").
6+
# You may not use this file except in compliance with the License.
7+
# A copy of the License is located at
8+
#
9+
# http://aws.amazon.com/apache2.0/
10+
#
11+
# or in the "LICENSE.txt" file accompanying this file.
12+
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
13+
# See the License for the specific language governing permissions and limitations under the License.
14+
15+
provides :nvidia_driver, platform: 'ubuntu' do |node|
16+
node['platform_version'].to_i == 22
17+
end
18+
19+
use 'partial/_nvidia_driver_common.rb'
20+
21+
def set_compiler?
22+
# Ubuntu22.04 with Kernel 6.8.x needs to set CC to /usr/bin/gcc12 using dkms override
23+
node['kernel']['release'].split('.')[0].to_i == 6
24+
node['kernel']['release'].split('.')[1].to_i == 8
25+
end
26+
27+
def rebuild_initramfs?
28+
true
29+
end
30+
31+
def compiler_version
32+
'gcc'
33+
end
34+
35+
def compiler_path
36+
'CC=/usr/bin/gcc-12'
37+
end

cookbooks/aws-parallelcluster-platform/resources/nvidia_driver/partial/_nvidia_driver_common.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
cwd '/tmp'
8181
code <<-NVIDIA
8282
set -e
83-
#{compiler_path} ./nvidia.run --silent --dkms --disable-nouveau --no-cc-version-check -m=#{nvidia_kernel_module}
83+
#{compiler_path} ./nvidia.run --silent --dkms --disable-nouveau -m=#{nvidia_kernel_module}
8484
rm -f /tmp/nvidia.run
8585
NVIDIA
8686
creates '/usr/bin/nvidia-smi'
@@ -120,6 +120,10 @@ def compiler_path
120120
""
121121
end
122122

123+
def extra_packages
124+
[]
125+
end
126+
123127
def nvidia_kernel_module
124128
if ['false', 'no', false].include?(node['cluster']['nvidia']['kernel_open'])
125129
"kernel"

0 commit comments

Comments
 (0)