Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/workflows/stackhpc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,16 @@ jobs:
env:
DEMO_USER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }}

- name: Delete possible volume snapshot from slurm upgrade
run: |
. venv/bin/activate
. environments/.stackhpc/activate
if [ -n "$SNAPSHOT" ]
then
echo Deleting $SNAPSHOT
openstack volume snapshot delete $SNAPSHOT
fi

- name: Delete infrastructure
run: |
. venv/bin/activate
Expand Down
4 changes: 2 additions & 2 deletions environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"cluster_image": {
"RL8": "openhpc-RL8-250513-1045-ca44f898",
"RL9": "openhpc-RL9-250513-1046-ca44f898"
"RL8": "openhpc-RL8-250514-1502-5a923b2c",
"RL9": "openhpc-RL9-250514-1502-5a923b2c"
}
}
13 changes: 9 additions & 4 deletions environments/common/inventory/group_vars/all/openhpc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,12 @@ ohpc_default_extra_repos:
"9": []
"8": []

# systemd.service.unit.TimeoutStartSec to wait for slurmdbd startup
# Set long enought to avoid problems with a major version upgrade
# Currently implemented in environments/common/inventory/group_vars/all/systemd.yml
openhpc_slurmdbd_timeout_start_sec: '45 minutes'
# configure slurm database pre-upgrade backups:
openhpc_slurm_accounting_storage_service: mysql
openhpc_slurm_accounting_storage_backup_cmd: >-
openstack volume snapshot create
--volume {{ openhpc_cluster_name }}-state
--force
{{ openhpc_cluster_name }}-state-{{ ansible_date_time.iso8601_basic_short }}
openhpc_slurm_accounting_storage_backup_host: localhost
openhpc_slurm_accounting_storage_backup_become: false
8 changes: 1 addition & 7 deletions environments/common/inventory/group_vars/all/systemd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,7 @@ systemd_dropins:
content: "{{ _systemd_requiresmount_statedir }}"
slurmdbd:
group: openhpc
content: |
{{ _systemd_requiresmount_statedir }}

[Service]
# Allow slurmdbd to complete major version upgrades
TimeoutStartSec={{ openhpc_slurmdbd_timeout_start_sec }}

content: "{{ _systemd_requiresmount_statedir }}"
slurmctld:
group: openhpc
content: "{{ _systemd_requiresmount_statedir }}"
Expand Down
2 changes: 1 addition & 1 deletion requirements.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ roles:
version: v25.3.2
name: stackhpc.nfs
- src: https://github.com/stackhpc/ansible-role-openhpc.git
version: v0.28.0
version: v0.30.0
name: stackhpc.openhpc
- src: https://github.com/stackhpc/ansible-node-exporter.git
version: stackhpc
Expand Down
Loading