From ca44f898f124829de5fb798e3238cae35bc4af1e Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 13 May 2025 10:31:29 +0000 Subject: [PATCH 1/3] bump OpenHPC snapshots to v3.1.1 (slurm 24.11.5) and v2.9.1 (slurm 23.11.11) for CVE-2025-43904 --- environments/common/inventory/group_vars/all/timestamps.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environments/common/inventory/group_vars/all/timestamps.yml b/environments/common/inventory/group_vars/all/timestamps.yml index 2f31ee1a3..8d046437a 100644 --- a/environments/common/inventory/group_vars/all/timestamps.yml +++ b/environments/common/inventory/group_vars/all/timestamps.yml @@ -63,10 +63,10 @@ appliances_pulp_repos: openhpc_updates: '8': path: OpenHPC/2/updates/EL_8 - timestamp: 20241218T154614 + timestamp: 20250512T003315 '9': path: OpenHPC/3/updates/EL_9 - timestamp: 20241218T154614 + timestamp: 20250510T003301 grafana: '8': path: grafana/oss/rpm From 929dde27778c0e8d70f39c196c6ca2111e031538 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 13 May 2025 11:13:59 +0000 Subject: [PATCH 2/3] bump CI image --- environments/.stackhpc/tofu/cluster_image.auto.tfvars.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json index f9117a26a..614c0adb9 100644 --- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json @@ -1,6 +1,6 @@ { "cluster_image": { - "RL8": "openhpc-RL8-250506-1259-abb6394b", - "RL9": "openhpc-RL9-250506-1259-abb6394b" + "RL8": "openhpc-RL8-250513-1045-ca44f898", + "RL9": "openhpc-RL9-250513-1046-ca44f898" } } From a77ef88e9163713d554f2c97217499ce452be067 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 13 May 2025 12:13:15 +0000 Subject: [PATCH 3/3] extend timeout for slurmdbd startup to cope with major version upgrade on startup --- .../common/inventory/group_vars/all/openhpc.yml | 5 +++++ .../common/inventory/group_vars/all/systemd.yml | 16 +++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/environments/common/inventory/group_vars/all/openhpc.yml b/environments/common/inventory/group_vars/all/openhpc.yml index bcda89b56..89f0a859c 100644 --- a/environments/common/inventory/group_vars/all/openhpc.yml +++ b/environments/common/inventory/group_vars/all/openhpc.yml @@ -57,3 +57,8 @@ ohpc_openhpc_repos: ohpc_default_extra_repos: "9": [] "8": [] + +# systemd.service.unit.TimeoutStartSec to wait for slurmdbd startup +# Set long enought to avoid problems with a major version upgrade +# Currently implemented in environments/common/inventory/group_vars/all/systemd.yml +openhpc_slurmdbd_timeout_start_sec: '45 minutes' diff --git a/environments/common/inventory/group_vars/all/systemd.yml b/environments/common/inventory/group_vars/all/systemd.yml index 2c5e03e35..4c7538aa6 100644 --- a/environments/common/inventory/group_vars/all/systemd.yml +++ b/environments/common/inventory/group_vars/all/systemd.yml @@ -1,9 +1,11 @@ _systemd_requiresmount_statedir: | + {% if appliances_state_dir is defined %} [Unit] RequiresMountsFor={{ appliances_state_dir | default('') }} + {% endif %} -_systemd_dropins_statedir: - # mysql not included as role handles state dir correctly +systemd_dropins: + # NB: mysql does not need _systemd_requiresmount_statedir as role handles state dir correctly opensearch: group: opensearch content: "{{ _systemd_requiresmount_statedir }}" @@ -12,12 +14,16 @@ _systemd_dropins_statedir: content: "{{ _systemd_requiresmount_statedir }}" slurmdbd: group: openhpc - content: "{{ _systemd_requiresmount_statedir }}" + content: | + {{ _systemd_requiresmount_statedir }} + + [Service] + # Allow slurmdbd to complete major version upgrades + TimeoutStartSec={{ openhpc_slurmdbd_timeout_start_sec }} + slurmctld: group: openhpc content: "{{ _systemd_requiresmount_statedir }}" prometheus: group: prometheus content: "{{ _systemd_requiresmount_statedir }}" - -systemd_dropins: "{{ _systemd_dropins_statedir if appliances_state_dir is defined else {} }}"