diff --git a/.github/workflows/stackhpc.yml b/.github/workflows/stackhpc.yml index 0d8846501..173b4e797 100644 --- a/.github/workflows/stackhpc.yml +++ b/.github/workflows/stackhpc.yml @@ -230,6 +230,16 @@ jobs: env: DEMO_USER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }} + - name: Delete possible volume snapshot from slurm upgrade + run: | + . venv/bin/activate + . environments/.stackhpc/activate + if [ -n "$SNAPSHOT" ] + then + echo Deleting $SNAPSHOT + openstack volume snapshot delete $SNAPSHOT + fi + - name: Delete infrastructure run: | . venv/bin/activate diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json index 614c0adb9..763165f62 100644 --- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json @@ -1,6 +1,6 @@ { "cluster_image": { - "RL8": "openhpc-RL8-250513-1045-ca44f898", - "RL9": "openhpc-RL9-250513-1046-ca44f898" + "RL8": "openhpc-RL8-250514-1502-5a923b2c", + "RL9": "openhpc-RL9-250514-1502-5a923b2c" } } diff --git a/environments/common/inventory/group_vars/all/openhpc.yml b/environments/common/inventory/group_vars/all/openhpc.yml index 89f0a859c..abfece409 100644 --- a/environments/common/inventory/group_vars/all/openhpc.yml +++ b/environments/common/inventory/group_vars/all/openhpc.yml @@ -58,7 +58,12 @@ ohpc_default_extra_repos: "9": [] "8": [] -# systemd.service.unit.TimeoutStartSec to wait for slurmdbd startup -# Set long enought to avoid problems with a major version upgrade -# Currently implemented in environments/common/inventory/group_vars/all/systemd.yml -openhpc_slurmdbd_timeout_start_sec: '45 minutes' +# configure slurm database pre-upgrade backups: +openhpc_slurm_accounting_storage_service: mysql +openhpc_slurm_accounting_storage_backup_cmd: >- + openstack volume snapshot create + --volume {{ openhpc_cluster_name }}-state + --force + {{ openhpc_cluster_name }}-state-{{ ansible_date_time.iso8601_basic_short }} +openhpc_slurm_accounting_storage_backup_host: localhost +openhpc_slurm_accounting_storage_backup_become: false diff --git a/environments/common/inventory/group_vars/all/systemd.yml b/environments/common/inventory/group_vars/all/systemd.yml index 4c7538aa6..ae72a7882 100644 --- a/environments/common/inventory/group_vars/all/systemd.yml +++ b/environments/common/inventory/group_vars/all/systemd.yml @@ -14,13 +14,7 @@ systemd_dropins: content: "{{ _systemd_requiresmount_statedir }}" slurmdbd: group: openhpc - content: | - {{ _systemd_requiresmount_statedir }} - - [Service] - # Allow slurmdbd to complete major version upgrades - TimeoutStartSec={{ openhpc_slurmdbd_timeout_start_sec }} - + content: "{{ _systemd_requiresmount_statedir }}" slurmctld: group: openhpc content: "{{ _systemd_requiresmount_statedir }}" diff --git a/requirements.yml b/requirements.yml index 87b2a6263..21c69c39a 100644 --- a/requirements.yml +++ b/requirements.yml @@ -4,7 +4,7 @@ roles: version: v25.3.2 name: stackhpc.nfs - src: https://github.com/stackhpc/ansible-role-openhpc.git - version: v0.28.0 + version: v0.30.0 name: stackhpc.openhpc - src: https://github.com/stackhpc/ansible-node-exporter.git version: stackhpc