43
43
TF_VAR_cluster_name : slurmci-${{ matrix.os_version }}-${{ github.run_number }}
44
44
CI_CLOUD : ${{ vars.CI_CLOUD }} # default from repo settings
45
45
TF_VAR_os_version : ${{ matrix.os_version }}
46
+ STACKHPC_TF_DIR : environments/.stackhpc/tofu
46
47
steps :
47
- - uses : actions/checkout@v2
48
+
49
+ - name : Find the latest release
50
+ run : |
51
+ echo LATEST_RELEASE_TAG=$(curl -s https://api.github.com/repos/stackhpc/ansible-slurm-appliance/releases/latest | jq -r .tag_name) >> "$GITHUB_ENV"
52
+
53
+ - name : Checkout latest release
54
+ uses : actions/checkout@v4
55
+ with :
56
+ ref : ${{ env.LATEST_RELEASE_TAG }}
57
+ fetch-depth : 0
48
58
49
59
- name : Override CI_CLOUD if PR label is present
50
60
if : ${{ github.event_name == 'pull_request' }}
60
70
fi
61
71
done
62
72
63
- - name : Record settings for CI cloud
73
+ - name : Record debug info
64
74
run : |
65
- echo CI_CLOUD: ${{ env.CI_CLOUD }}
75
+ echo LATEST_RELEASE_TAG: $LATEST_RELEASE_TAG
76
+ echo CI_CLOUD: $CI_CLOUD
66
77
67
78
- name : Setup ssh
68
79
run : |
76
87
run : cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts
77
88
shell : bash
78
89
79
- - name : Install ansible etc
90
+ - name : Install ansible, pip and galaxy requirements
80
91
run : dev/setup-env.sh
81
92
82
93
- name : Install OpenTofu
86
97
87
98
- name : Initialise tofu
88
99
run : tofu init
89
- working-directory : ${{ github.workspace }}/environments/.stackhpc/tofu
100
+ working-directory : ${{ env.STACKHPC_TF_DIR }}
90
101
91
102
- name : Write clouds.yaml
92
103
run : |
@@ -103,42 +114,90 @@ jobs:
103
114
env :
104
115
DEMO_USER_PASSWORD : ${{ secrets.TEST_USER_PASSWORD }}
105
116
106
- - name : Provision nodes using fat image
117
+ - name : Provision nodes using latest release image
107
118
id : provision_servers
108
119
run : |
109
120
. venv/bin/activate
110
121
. environments/.stackhpc/activate
111
- cd $APPLIANCES_ENVIRONMENT_ROOT/tofu
122
+ cd $STACKHPC_TF_DIR
112
123
tofu apply -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
113
124
114
125
- name : Delete infrastructure if provisioning failed
115
126
run : |
116
127
. venv/bin/activate
117
128
. environments/.stackhpc/activate
118
- cd $APPLIANCES_ENVIRONMENT_ROOT/tofu
129
+ cd $STACKHPC_TF_DIR
119
130
tofu destroy -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
120
131
if : failure() && steps.provision_servers.outcome == 'failure'
121
132
122
- - name : Configure cluster
133
+ - name : Configure cluster at latest release
123
134
run : |
124
135
. venv/bin/activate
125
136
. environments/.stackhpc/activate
126
137
ansible all -m wait_for_connection
127
138
ansible-playbook -v ansible/site.yml
128
139
ansible-playbook -v ansible/ci/check_slurm.yml
129
140
130
- - name : Run MPI-based tests
141
+ - name : Run MPI-based tests at latest release
131
142
run : |
132
143
. venv/bin/activate
133
144
. environments/.stackhpc/activate
134
- ansible-playbook -vv ansible/adhoc/hpctests.yml
145
+ ansible-playbook -vv ansible/adhoc/hpctests.yml --tags pingpong
135
146
136
147
# - name: Run EESSI tests
137
148
# run: |
138
149
# . venv/bin/activate
139
150
# . environments/.stackhpc/activate
140
151
# ansible-playbook -vv ansible/ci/check_eessi.yml
141
152
153
+ - name : Checkout current branch
154
+ run : git checkout ${{ github.head_ref || github.ref_name }}
155
+
156
+ - name : Update ansible, pip and galaxy requirements
157
+ run : dev/setup-env.sh
158
+
159
+ - name : Reimage login and control nodes to image in current branch
160
+ id : reimage_non_compute
161
+ run : |
162
+ . venv/bin/activate
163
+ . environments/.stackhpc/activate
164
+ cd $STACKHPC_TF_DIR
165
+ tofu init
166
+ tofu apply -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
167
+
168
+ - name : Configure cluster using current branch
169
+ run : |
170
+ . venv/bin/activate
171
+ . environments/.stackhpc/activate
172
+ ansible all -m wait_for_connection
173
+ ansible-playbook -v ansible/site.yml
174
+ ansible-playbook -v ansible/ci/check_slurm.yml
175
+
176
+ - name : Reimage compute nodes to image in current branch using slurm - tests compute-init
177
+ run : |
178
+ . venv/bin/activate
179
+ . environments/.stackhpc/activate
180
+ ansible-playbook -v ansible/adhoc/reboot_via_slurm.yml
181
+ ansible-playbook -v ansible/ci/check_slurm.yml
182
+
183
+ - name : Check sacct state survived reimage to current branch
184
+ run : |
185
+ . venv/bin/activate
186
+ . environments/.stackhpc/activate
187
+ ansible-playbook -vv ansible/ci/check_sacct_hpctests.yml
188
+
189
+ - name : Check MPI-based tests are shown in Grafana
190
+ run : |
191
+ . venv/bin/activate
192
+ . environments/.stackhpc/activate
193
+ ansible-playbook -vv ansible/ci/check_grafana.yml
194
+
195
+ - name : Run MPI-based tests again in current branch
196
+ run : |
197
+ . venv/bin/activate
198
+ . environments/.stackhpc/activate
199
+ ansible-playbook -vv ansible/adhoc/hpctests.yml
200
+
142
201
- name : Confirm Open Ondemand is up (via SOCKS proxy)
143
202
run : |
144
203
. venv/bin/activate
@@ -170,43 +229,10 @@ jobs:
170
229
env :
171
230
DEMO_USER_PASSWORD : ${{ secrets.TEST_USER_PASSWORD }}
172
231
173
- - name : Test reimage of login and control nodes (via rebuild adhoc)
174
- run : |
175
- . venv/bin/activate
176
- . environments/.stackhpc/activate
177
- ansible-playbook -v --limit control,login ansible/adhoc/rebuild.yml
178
- ansible-playbook -v ansible/site.yml
179
- ansible-playbook -v ansible/ci/check_slurm.yml
180
-
181
- - name : Test compute node reboot and compute-init
182
- run : |
183
- . venv/bin/activate
184
- . environments/.stackhpc/activate
185
- ansible-playbook -v ansible/adhoc/reboot_via_slurm.yml
186
- ansible-playbook -v ansible/ci/check_slurm.yml
187
-
188
- - name : Check sacct state survived reimage
189
- run : |
190
- . venv/bin/activate
191
- . environments/.stackhpc/activate
192
- ansible-playbook -vv ansible/ci/check_sacct_hpctests.yml
193
-
194
- - name : Check MPI-based tests are shown in Grafana
195
- run : |
196
- . venv/bin/activate
197
- . environments/.stackhpc/activate
198
- ansible-playbook -vv ansible/ci/check_grafana.yml
199
-
200
232
- name : Delete infrastructure
201
233
run : |
202
234
. venv/bin/activate
203
235
. environments/.stackhpc/activate
204
- cd $APPLIANCES_ENVIRONMENT_ROOT/tofu
205
- tofu destroy -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
236
+ cd $STACKHPC_TF_DIR
237
+ tofu destroy -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars" || echo "tofu failed in $STACKHPC_TF_DIR"
206
238
if : ${{ success() || cancelled() }}
207
-
208
- # - name: Delete images
209
- # run: |
210
- # . venv/bin/activate
211
- # . environments/.stackhpc/activate
212
- # ansible-playbook -vv ansible/ci/delete_images.yml
0 commit comments