diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json index f9117a26a..614c0adb9 100644 --- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json @@ -1,6 +1,6 @@ { "cluster_image": { - "RL8": "openhpc-RL8-250506-1259-abb6394b", - "RL9": "openhpc-RL9-250506-1259-abb6394b" + "RL8": "openhpc-RL8-250513-1045-ca44f898", + "RL9": "openhpc-RL9-250513-1046-ca44f898" } } diff --git a/environments/common/inventory/group_vars/all/openhpc.yml b/environments/common/inventory/group_vars/all/openhpc.yml index bcda89b56..89f0a859c 100644 --- a/environments/common/inventory/group_vars/all/openhpc.yml +++ b/environments/common/inventory/group_vars/all/openhpc.yml @@ -57,3 +57,8 @@ ohpc_openhpc_repos: ohpc_default_extra_repos: "9": [] "8": [] + +# systemd.service.unit.TimeoutStartSec to wait for slurmdbd startup +# Set long enought to avoid problems with a major version upgrade +# Currently implemented in environments/common/inventory/group_vars/all/systemd.yml +openhpc_slurmdbd_timeout_start_sec: '45 minutes' diff --git a/environments/common/inventory/group_vars/all/systemd.yml b/environments/common/inventory/group_vars/all/systemd.yml index 2c5e03e35..4c7538aa6 100644 --- a/environments/common/inventory/group_vars/all/systemd.yml +++ b/environments/common/inventory/group_vars/all/systemd.yml @@ -1,9 +1,11 @@ _systemd_requiresmount_statedir: | + {% if appliances_state_dir is defined %} [Unit] RequiresMountsFor={{ appliances_state_dir | default('') }} + {% endif %} -_systemd_dropins_statedir: - # mysql not included as role handles state dir correctly +systemd_dropins: + # NB: mysql does not need _systemd_requiresmount_statedir as role handles state dir correctly opensearch: group: opensearch content: "{{ _systemd_requiresmount_statedir }}" @@ -12,12 +14,16 @@ _systemd_dropins_statedir: content: "{{ _systemd_requiresmount_statedir }}" slurmdbd: group: openhpc - content: "{{ _systemd_requiresmount_statedir }}" + content: | + {{ _systemd_requiresmount_statedir }} + + [Service] + # Allow slurmdbd to complete major version upgrades + TimeoutStartSec={{ openhpc_slurmdbd_timeout_start_sec }} + slurmctld: group: openhpc content: "{{ _systemd_requiresmount_statedir }}" prometheus: group: prometheus content: "{{ _systemd_requiresmount_statedir }}" - -systemd_dropins: "{{ _systemd_dropins_statedir if appliances_state_dir is defined else {} }}" diff --git a/environments/common/inventory/group_vars/all/timestamps.yml b/environments/common/inventory/group_vars/all/timestamps.yml index 2f31ee1a3..8d046437a 100644 --- a/environments/common/inventory/group_vars/all/timestamps.yml +++ b/environments/common/inventory/group_vars/all/timestamps.yml @@ -63,10 +63,10 @@ appliances_pulp_repos: openhpc_updates: '8': path: OpenHPC/2/updates/EL_8 - timestamp: 20241218T154614 + timestamp: 20250512T003315 '9': path: OpenHPC/3/updates/EL_9 - timestamp: 20241218T154614 + timestamp: 20250510T003301 grafana: '8': path: grafana/oss/rpm