Skip to content

Commit df1b861

Browse files
authored
Merge branch 'develop' into versionbumpdevelop3.14.0
2 parents ebe3beb + cc40fa4 commit df1b861

File tree

25 files changed

+98
-44
lines changed

25 files changed

+98
-44
lines changed

CHANGELOG.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,10 @@ This file is used to list changes made in each version of the AWS ParallelCluste
1313
------
1414
**ENHANCEMENTS**
1515
- Add support for Ubuntu 24.04.
16-
Notice that ParallelCluster official AMI for Ubuntu 24.04 does not support Lustre.
1716
- Disable unused services like cups and wpa_supplicant from Official ParallelCluster AMIs to improve security.
1817

1918
**CHANGES**
20-
- Upgrade Slurm to version 24.05.6.
19+
- Upgrade Slurm to version 24.05.7.
2120
- Upgrade NVIDIA driver to version 570.86.15 (from 550.127.08) for all OSs except AL2.
2221
- Upgrade CUDA Toolkit to version 12.8.0 (from 12.4.1) for all OSs except AL2.
2322
- Upgrade Python to 3.12.8 for all OSs except AL2 (from 3.9.20).
@@ -43,6 +42,10 @@ This file is used to list changes made in each version of the AWS ParallelCluste
4342
- Remove generation of DSA keys for login nodes as DSA, which became unsupported in OpenSSH 9.7+.
4443
- Set instance ID and instance type information in Slurm upon compute nodes launch.
4544
- Install NVIDIA drivers without the option 'no-cc-version-check', which is now deprecated in the NVIDIA installer.
45+
- Reduce RHEL/Rocky Linux boot time by the following network customization:
46+
- Configuring higher priority to IPv4 than IPv6
47+
- Disabling Internet connectivity check
48+
- Configuring only IPv4 IMDS endpoint to cloud-init
4649

4750
**BUG FIXES**
4851
- Remove usage of cfn-init for compute node bootstrapping to reduce node scale-up time.

cookbooks/aws-parallelcluster-environment/libraries/fsx.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@ def aws_domain_for_fsx(region)
1010
end
1111

1212
def lustre_enabled?
13-
['yes', true].include?(node['cluster']['lustre']['enabled'])
13+
['yes', true, 'true'].include?(node['cluster']['lustre']['enabled'])
1414
end

cookbooks/aws-parallelcluster-environment/resources/lustre/partial/_install_lustre_debian.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
# See the License for the specific language governing permissions and limitations under the License.
1515

1616
action :setup do
17-
return if node['platform_version'].to_i == 24
1817
apt_repository 'fsxlustreclientrepo' do
1918
uri "https://fsx-lustre-client-repo.s3.amazonaws.com/ubuntu"
2019
components ['main']

cookbooks/aws-parallelcluster-environment/test/controls/cloudwatch_spec.rb

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,13 @@
2020

2121
describe 'Check the presence of the cloudwatch package gpg key'
2222
# In Ubuntu >20.04 due to environment variable the keyring is placed under home of the user ubuntu with the permission of root
23-
ubuntu2004 = os_properties.ubuntu2004?
24-
ubuntu2204 = os_properties.ubuntu2204?
25-
keyring = (ubuntu2004 || ubuntu2204) && !os_properties.on_docker? ? '--keyring /home/ubuntu/.gnupg/pubring.kbx' : ''
23+
is_ubuntu = os_properties.ubuntu?
24+
keyring = is_ubuntu && !os_properties.on_docker? ? '--keyring /home/ubuntu/.gnupg/pubring.kbx' : ''
2625
sudo = os_properties.redhat_on_docker? ? '' : 'sudo'
2726
describe bash("#{sudo} gpg --list-keys #{keyring}") do
2827
# Don't check exit status for Ubuntu20 because it returns 2 when executed in the validate phase of a created AMI
2928
# os_properties cannot be used in the describe block level. It can be used within an it{} block
30-
its('exit_status') { should eq 0 } unless ubuntu2004 || ubuntu2204
29+
its('exit_status') { should eq 0 } unless is_ubuntu
3130
its('stdout') { should match /3B789C72/ }
3231
its('stdout') { should match /Amazon CloudWatch Agent/ }
3332
end

cookbooks/aws-parallelcluster-environment/test/controls/lustre_spec.rb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
end
2828
end
2929

30-
if os_properties.redhat? && inspec.os.release.to_f >= 8.2 && !os_properties.on_docker? && !os_properties.ubuntu2404?
30+
if os_properties.redhat? && inspec.os.release.to_f >= 8.2 && !os_properties.on_docker?
3131
# TODO: restore installation and check on docker when Lustre is available for RH8.9
3232
# See: https://docs.aws.amazon.com/fsx/latest/LustreGuide/install-lustre-client.html
3333
unless inspec.os.release.to_f == 8.7 && (node['cluster']['kernel_release'].include?("4.18.0-425.3.1.el8") || node['cluster']['kernel_release'].include?("4.18.0-425.13.1.el8_7"))
@@ -55,7 +55,7 @@
5555
end
5656
end
5757

58-
if os_properties.debian_family? && !os_properties.ubuntu2404?
58+
if os_properties.debian_family?
5959
describe apt('https://fsx-lustre-client-repo.s3.amazonaws.com/ubuntu') do
6060
it { should exist }
6161
it { should be_enabled }
@@ -89,7 +89,7 @@
8989

9090
control 'tag:install_lustre_lnet_kernel_module_enabled' do
9191
title "Verify that lnet kernel module is enabled"
92-
only_if { !os_properties.on_docker? && !os_properties.alinux? && !os_properties.ubuntu2404? }
92+
only_if { !os_properties.on_docker? && !os_properties.alinux? }
9393
describe kernel_module("lnet") do
9494
it { should be_loaded }
9595
it { should_not be_disabled }
@@ -98,15 +98,15 @@
9898
end
9999

100100
control 'lustre_mounted' do
101-
only_if { !os_properties.on_docker? && !os_properties.ubuntu2404? }
101+
only_if { !os_properties.on_docker? }
102102
describe mount('/shared_dir') do
103103
it { should be_mounted }
104104
its('type') { should eq 'lustre' }
105105
end
106106
end
107107

108108
control 'lustre_unmounted' do
109-
only_if { !os_properties.on_docker? && !os_properties.ubuntu2404? }
109+
only_if { !os_properties.on_docker? }
110110

111111
describe mount('/shared_dir') do
112112
it { should_not be_mounted }

cookbooks/aws-parallelcluster-platform/files/ami_cleanup.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#!/bin/bash
22

3+
IS_OFFICIAL_AMI_BUILD=${1:-"false"}
4+
35
# clean up cloud init artifacts https://cloudinit.readthedocs.io/en/latest/topics/cli.html#clean
46
cloud-init clean -s
57

@@ -20,5 +22,12 @@ if [ "${ID}${VERSION_ID}" == "centos7" ]; then
2022
rm -f /etc/sysconfig/network-scripts/ifcfg-eth0
2123
fi
2224

25+
# Clean resolv.conf if it's not managed by system
26+
if [ "${IS_OFFICIAL_AMI_BUILD}" == "true" ]; then
27+
echo "Clean resolv.conf for official AMIs"
28+
echo -n > /etc/resolv.conf
29+
rm -f /run/systemd/resolve/resolv.conf
30+
fi
31+
2332
find /var/log -type f -exec /bin/rm -v {} \;
2433
touch /var/log/lastlog

cookbooks/aws-parallelcluster-platform/libraries/nvidia.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
def nvidia_enabled?
2-
['yes', true].include?(node['cluster']['nvidia']['enabled'])
2+
['yes', true, 'true'].include?(node['cluster']['nvidia']['enabled'])
33
end
44

55
#

cookbooks/aws-parallelcluster-platform/recipes/config/cluster_user.rb

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,9 @@
7373
bash "copy_auth_file" do
7474
code <<-PERMS
7575
set -e
76-
cp -p #{node['cluster']['shared_dir']}/authorized_keys #{node['cluster']['cluster_user_home']}/.ssh/authorized_keys
76+
cp #{node['cluster']['shared_dir']}/authorized_keys #{node['cluster']['cluster_user_home']}/.ssh/authorized_keys
77+
chmod --reference=#{node['cluster']['shared_dir']}/authorized_keys #{node['cluster']['cluster_user_home']}/.ssh/authorized_keys
78+
chown --reference=#{node['cluster']['shared_dir']}/authorized_keys #{node['cluster']['cluster_user_home']}/.ssh/authorized_keys
7779
PERMS
7880
only_if { node['cluster']['default_user_home'] == 'local' }
7981
end
@@ -90,7 +92,9 @@
9092
bash "copy_auth_file" do
9193
code <<-PERMS
9294
set -e
93-
cp -p #{node['cluster']['shared_dir_login_nodes']}/authorized_keys #{node['cluster']['cluster_user_home']}/.ssh/authorized_keys
95+
cp #{node['cluster']['shared_dir_login_nodes']}/authorized_keys #{node['cluster']['cluster_user_home']}/.ssh/authorized_keys
96+
chmod --reference=#{node['cluster']['shared_dir_login_nodes']}/authorized_keys #{node['cluster']['cluster_user_home']}/.ssh/authorized_keys
97+
chown --reference=#{node['cluster']['shared_dir_login_nodes']}/authorized_keys #{node['cluster']['cluster_user_home']}/.ssh/authorized_keys
9498
PERMS
9599
only_if { node['cluster']['default_user_home'] == 'local' }
96100
end

cookbooks/aws-parallelcluster-platform/resources/fabric_manager/partial/_fabric_manager_common.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def _fabric_manager_enabled
4545
end
4646

4747
def _nvidia_enabled
48-
nvidia_enabled.nil? ? ['yes', true].include?(node['cluster']['nvidia']['enabled']) : nvidia_enabled
48+
nvidia_enabled.nil? ? ['yes', true, 'true'].include?(node['cluster']['nvidia']['enabled']) : nvidia_enabled
4949
end
5050

5151
def _nvidia_driver_version

cookbooks/aws-parallelcluster-platform/resources/nvidia_dcgm/partial/_nvidia_dcgm_common.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
end
2525

2626
def _nvidia_enabled
27-
nvidia_enabled.nil? ? ['yes', true].include?(node['cluster']['nvidia']['enabled']) : nvidia_enabled
27+
nvidia_enabled.nil? ? ['yes', true, 'true'].include?(node['cluster']['nvidia']['enabled']) : nvidia_enabled
2828
end
2929

3030
def package_version

0 commit comments

Comments
 (0)