Skip to content

Commit 2c22de1

Browse files
committed
Merge branch 'configure-gpus' of github.com:stackhpc/ansible-slurm-appliance into configure-gpus
Apply changes to task names
2 parents d5ac3f7 + fa78672 commit 2c22de1

File tree

38 files changed

+607
-234
lines changed

38 files changed

+607
-234
lines changed

.ansible-lint.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ skip_list:
66
- jinja[spacing]
77
- galaxy[no-changelog]
88
- meta-runtime[unsupported-version]
9-
10-
warn_list:
119
- name[missing]
1210
- name[play]
1311
- var-naming

.github/workflows/fatimage.yml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ jobs:
3636
build:
3737
- image_name: openhpc-RL8
3838
source_image_name: Rocky-8-GenericCloud-Base-8.10-20240528.0.x86_64.raw
39-
inventory_groups: control,compute,login,update
39+
inventory_groups: fatimage
4040
- image_name: openhpc-RL9
4141
source_image_name: Rocky-9-GenericCloud-Base-9.6-20250531.0.x86_64.qcow2
42-
inventory_groups: control,compute,login,update
42+
inventory_groups: fatimage
4343
env:
4444
ANSIBLE_FORCE_COLOR: True
4545
OS_CLOUD: openstack
@@ -118,6 +118,11 @@ jobs:
118118
. venv/bin/activate
119119
openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-id }}" || true
120120
121+
- name: Set image properties
122+
run: |
123+
. venv/bin/activate
124+
. dev/image-set-properties.sh "${{ steps.manifest.outputs.image-id }}"
125+
121126
- name: Upload manifest artifact
122127
uses: actions/upload-artifact@v4
123128
with:

.github/workflows/nightly-cleanup.yml

Lines changed: 10 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -46,53 +46,20 @@ jobs:
4646
echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml
4747
shell: bash
4848

49-
- name: Find CI clusters
49+
- name: Delete all CI clusters
5050
run: |
5151
. venv/bin/activate
52-
CI_CLUSTERS=$(openstack server list | grep --only-matching 'slurmci-RL.-[0-9]\+' | sort | uniq || true)
53-
echo "DEBUG: Raw CI clusters: $CI_CLUSTERS"
54-
55-
if [[ -z "$CI_CLUSTERS" ]]; then
56-
echo "No matching CI clusters found."
57-
else
58-
# Flatten multiline value so can be passed as env var
59-
CI_CLUSTERS_FORMATTED=$(echo "$CI_CLUSTERS" | tr '\n' ' ' | sed 's/ $//')
60-
echo "DEBUG: Formatted CI clusters: $CI_CLUSTERS_FORMATTED"
61-
echo "ci_clusters=$CI_CLUSTERS_FORMATTED" >> "$GITHUB_ENV"
62-
fi
52+
./dev/delete-cluster.py slurmci-RL --force
6353
shell: bash
64-
65-
- name: Delete CI clusters
54+
55+
- name: Delete all CI extra build VMs and volumes
6656
run: |
6757
. venv/bin/activate
68-
if [[ -z ${ci_clusters} ]]; then
69-
echo "No clusters to delete."
70-
exit 0
71-
fi
72-
73-
for cluster_prefix in ${ci_clusters}
74-
do
75-
echo "Processing cluster: $cluster_prefix"
76-
77-
# Get all servers with the matching name for control node
78-
CONTROL_SERVERS=$(openstack server list --name "${cluster_prefix}-control" --format json)
79-
80-
# Get unique server names to avoid duplicate cleanup
81-
UNIQUE_NAMES=$(echo "$CONTROL_SERVERS" | jq -r '.[].Name' | sort | uniq)
82-
for name in $UNIQUE_NAMES; do
83-
echo "Deleting cluster with control node: $name"
84-
85-
# Get the first matching server ID by name
86-
server=$(echo "$CONTROL_SERVERS" | jq -r '.[] | select(.Name=="'"$name"'") | .ID' | head -n1)
87-
88-
# Make sure server still exists (wasn't deleted earlier)
89-
if ! openstack server show "$server" &>/dev/null; then
90-
echo "Server $server no longer exists, skipping $name."
91-
continue
92-
fi
58+
./dev/delete-cluster.py openhpc-extra-RL --force
59+
shell: bash
9360

94-
echo "Deleting cluster $cluster_prefix (server $server)..."
95-
./dev/delete-cluster.py "$cluster_prefix" --force
96-
done
97-
done
61+
- name: Delete all fatimage build VMs and volumes
62+
run: |
63+
. venv/bin/activate
64+
./dev/delete-cluster.py openhpc-RL --force
9865
shell: bash

.github/workflows/s3-image-sync.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,11 @@ jobs:
168168
. venv/bin/activate
169169
bash .github/bin/get-s3-image.sh ${{ env.TARGET_IMAGE }} ${{ env.S3_BUCKET }}
170170
171+
- name: Set Glance image properties correctly for Slurm images
172+
run: |
173+
. venv/bin/activate
174+
. dev/image-set-properties.sh "${{ env.TARGET_IMAGE }}"
175+
171176
- name: Cleanup OpenStack Image (on error or cancellation)
172177
if: cancelled() || failure()
173178
run: |

.github/workflows/stackhpc.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,12 @@ jobs:
107107
. venv/bin/activate
108108
. environments/.stackhpc/activate
109109
cd "$STACKHPC_TF_DIR"
110-
tofu apply -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
110+
max_retries=3
111+
delay=30
112+
for i in $(seq 1 $max_retries); do
113+
tofu apply -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars" && break
114+
[ "$i" -lt "$max_retries" ] && sleep $delay || exit 1
115+
done
111116
112117
- name: Delete infrastructure if provisioning failed
113118
run: |

ansible/adhoc/sync-pulp.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,5 @@
55
name: pulp_site
66
tasks_from: sync.yml
77
vars:
8-
pulp_site_target_arch: "x86_64"
9-
pulp_site_target_distribution: "rocky"
108
# default distribution to *latest* specified for baseos repo:
119
pulp_site_target_distribution_version: "{{ dnf_repos_repos['baseos'].keys() | map('float') | sort | last }}"

ansible/fatimage.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@
117117
- name: Install OpenHPC
118118
ansible.builtin.import_role:
119119
name: stackhpc.openhpc
120-
tasks_from: install.yml
120+
tasks_from: install-ohpc.yml
121121
when: "'openhpc' in group_names"
122122

123123
# - import_playbook: portal.yml
@@ -206,6 +206,7 @@
206206
ansible.builtin.include_role:
207207
name: hpctests
208208
tasks_from: source-hpl.yml
209+
when: "'hpctests' in group_names"
209210

210211
- hosts: prometheus
211212
become: true

ansible/roles/dnf_repos/tasks/disable_repos.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
loop: "{{ dnf_repos_repos | dict2items }}"
1111
loop_control:
1212
label: "{{ repo_name }}[{{ repo_os }}]: {{ repo_values }}"
13+
when: repo_values | length > 0
1314
vars:
1415
repo_os: "{{ ansible_distribution_version if ansible_distribution_version in item.value else ansible_distribution_major_version }}"
15-
repo_values: "{{ item.value[repo_os] }}"
16+
repo_values: "{{ item.value.get(repo_os, {}) }}"
1617
repo_name: "{{ repo_values.repo_name | default(item.key) }}"
1718
repo_content_url: "{{ repo_values.pulp_content_url | default(dnf_repos_pulp_content_url) }}"
1819

ansible/roles/dnf_repos/tasks/set_repos.yml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212
loop: "{{ dnf_repos_repos | dict2items }}"
1313
loop_control:
1414
label: "{{ repo_name }}[{{ repo_os }}]: {{ repo_values }}"
15-
when: repo_name != 'epel'
15+
when:
16+
- repo_name != 'epel'
17+
- repo_values | length > 0
1618
vars:
1719
repo_os: "{{ ansible_distribution_version if ansible_distribution_version in item.value else ansible_distribution_major_version }}"
18-
repo_values: "{{ item.value[repo_os] }}"
20+
repo_values: "{{ item.value.get(repo_os, {}) }}"
1921
repo_name: "{{ repo_values.repo_name | default(item.key) }}"
2022
repo_content_url: "{{ repo_values.pulp_content_url | default(dnf_repos_pulp_content_url) }}"
2123

@@ -36,9 +38,11 @@
3638
loop: "{{ dnf_repos_repos | dict2items }}"
3739
loop_control:
3840
label: "{{ repo_name }}[{{ repo_os }}]: {{ repo_values }}"
39-
when: repo_name == 'epel'
41+
when:
42+
- repo_name == 'epel'
43+
- repo_values | length > 0
4044
vars:
4145
repo_os: "{{ ansible_distribution_version if ansible_distribution_version in item.value else ansible_distribution_major_version }}"
42-
repo_values: "{{ item.value[repo_os] }}"
46+
repo_values: "{{ item.value.get(repo_os, {}) }}"
4347
repo_name: "{{ repo_values.repo_name | default(item.key) }}"
4448
repo_content_url: "{{ repo_values.pulp_content_url | default(dnf_repos_pulp_content_url) }}"

ansible/roles/eessi/defaults/main.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
---
2+
cvmfs_release_version: "6-3"
3+
24
# Default to 10GB
35
cvmfs_quota_limit_mb: 10000
46

@@ -9,4 +11,4 @@ cvmfs_config_default:
911
cvmfs_config_overrides: {}
1012
cvmfs_config: "{{ cvmfs_config_default | combine(cvmfs_config_overrides) }}"
1113

12-
cvmfs_gpg_checksum: "sha256:4ac81adff957565277cfa6a4a330cdc2ce5a8fdd73b8760d1a5a32bef71c4bd6"
14+
cvmfs_gpg_checksum: "sha256:5c60679d307a96524204c127250e8ebdda66a459659faa1718bdf32dde1d7069"

0 commit comments

Comments
 (0)