2
2
name : Test deployment and reimage on OpenStack
3
3
on :
4
4
workflow_dispatch :
5
- inputs :
6
- use_RL8 :
7
- required : true
8
- description : Include RL8 tests
9
- type : boolean
10
- default : false
11
5
push :
12
6
branches :
13
7
- main
8
+ paths :
9
+ - ' **'
10
+ - ' !dev/**'
11
+ - ' dev/setup-env.sh'
12
+ - ' !docs/**'
13
+ - ' !README.md'
14
+ - ' !.gitignore'
14
15
pull_request :
16
+ paths :
17
+ - ' **'
18
+ - ' !dev/**'
19
+ - ' dev/setup-env.sh'
20
+ - ' !docs/**'
21
+ - ' !README.md'
22
+ - ' !.gitignore'
15
23
jobs :
16
24
openstack :
17
25
name : openstack-ci
18
- concurrency : ${{ github.ref }}-{{ matrix.os_version }} # to branch/PR + OS
26
+ concurrency :
27
+ group : ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }} # to branch/PR + OS
28
+ cancel-in-progress : true
19
29
runs-on : ubuntu-22.04
20
30
strategy :
31
+ fail-fast : false # allow other matrix jobs to continue even if one fails
21
32
matrix :
22
- os_version : [RL8, RL9]
23
- rl8_selected :
24
- - ${{ inputs.use_RL8 == true }} # only potentially true for workflow_dispatch
25
- rl8_branch :
26
- - ${{ startsWith(github.head_ref, 'rl8') == true }} # only potentially for pull_request, always false on merge
27
- rl8_label :
28
- - ${{ contains(github.event.pull_request.labels.*.name, 'RL8') }} # NB: needs a new commit if added after PR created
29
- exclude :
30
- - os_version : RL8
31
- rl8_selected : false
32
- rl8_branch : false
33
- rl8_label : false
33
+ os_version :
34
+ - RL8
35
+ - RL9
34
36
env :
35
37
ANSIBLE_FORCE_COLOR : True
36
38
OS_CLOUD : openstack
37
39
TF_VAR_cluster_name : slurmci-${{ matrix.os_version }}-${{ github.run_number }}
38
- CI_CLOUD : ${{ vars.CI_CLOUD }}
40
+ CI_CLOUD : ${{ vars.CI_CLOUD }} # default from repo settings
41
+ TF_VAR_os_version : ${{ matrix.os_version }}
39
42
steps :
40
43
- uses : actions/checkout@v2
41
44
45
+ - name : Override CI_CLOUD if PR label is present
46
+ if : ${{ github.event_name == 'pull_request' }}
47
+ run : |
48
+ # Iterate over the labels
49
+ labels=$(echo '${{ toJSON(github.event.pull_request.labels) }}' | jq -r '.[].name')
50
+ echo $labels
51
+ for label in $labels; do
52
+ if [[ $label == CI_CLOUD=* ]]; then
53
+ # Extract the value after 'CI_CLOUD='
54
+ CI_CLOUD_OVERRIDE=${label#CI_CLOUD=}
55
+ echo "CI_CLOUD=${CI_CLOUD_OVERRIDE}" >> $GITHUB_ENV
56
+ fi
57
+ done
58
+
42
59
- name : Record settings for CI cloud
43
60
run : |
44
- echo CI_CLOUD: ${{ vars .CI_CLOUD }}
61
+ echo CI_CLOUD: ${{ env .CI_CLOUD }}
45
62
46
63
- name : Setup ssh
47
64
run : |
48
65
set -x
49
66
mkdir ~/.ssh
50
- echo "${{ secrets[format('{0}_SSH_KEY', vars .CI_CLOUD)] }}" > ~/.ssh/id_rsa
67
+ echo "${{ secrets[format('{0}_SSH_KEY', env .CI_CLOUD)] }}" > ~/.ssh/id_rsa
51
68
chmod 0600 ~/.ssh/id_rsa
52
69
shell : bash
53
-
70
+
54
71
- name : Add bastion's ssh key to known_hosts
55
72
run : cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts
56
73
shell : bash
57
-
74
+
58
75
- name : Install ansible etc
59
76
run : dev/setup-env.sh
60
77
61
78
- name : Install OpenTofu
62
79
uses : opentofu/setup-opentofu@v1
63
80
with :
64
81
tofu_version : 1.6.2
65
-
82
+
66
83
- name : Initialise terraform
67
84
run : terraform init
68
85
working-directory : ${{ github.workspace }}/environments/.stackhpc/terraform
69
-
86
+
70
87
- name : Write clouds.yaml
71
88
run : |
72
89
mkdir -p ~/.config/openstack/
73
- echo "${{ secrets[format('{0}_CLOUDS_YAML', vars .CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml
90
+ echo "${{ secrets[format('{0}_CLOUDS_YAML', env .CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml
74
91
shell : bash
75
92
76
93
- name : Setup environment-specific inventory/terraform inputs
@@ -88,19 +105,15 @@ jobs:
88
105
. venv/bin/activate
89
106
. environments/.stackhpc/activate
90
107
cd $APPLIANCES_ENVIRONMENT_ROOT/terraform
91
- terraform apply -auto-approve -var-file="${{ vars.CI_CLOUD }}.tfvars"
92
- env :
93
- TF_VAR_os_version : ${{ matrix.os_version }}
108
+ terraform apply -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
94
109
95
110
- name : Delete infrastructure if provisioning failed
96
111
run : |
97
112
. venv/bin/activate
98
113
. environments/.stackhpc/activate
99
114
cd $APPLIANCES_ENVIRONMENT_ROOT/terraform
100
- terraform destroy -auto-approve -var-file="${{ vars .CI_CLOUD }}.tfvars"
115
+ terraform destroy -auto-approve -var-file="${{ env .CI_CLOUD }}.tfvars"
101
116
if : failure() && steps.provision_servers.outcome == 'failure'
102
- env :
103
- TF_VAR_os_version : ${{ matrix.os_version }}
104
117
105
118
- name : Configure cluster
106
119
run : |
@@ -126,14 +139,14 @@ jobs:
126
139
run : |
127
140
. venv/bin/activate
128
141
. environments/.stackhpc/activate
129
-
142
+
130
143
# load ansible variables into shell:
131
144
ansible-playbook ansible/ci/output_vars.yml \
132
145
-e output_vars_hosts=openondemand \
133
146
-e output_vars_path=$APPLIANCES_ENVIRONMENT_ROOT/vars.txt \
134
147
-e output_vars_items=bastion_ip,bastion_user,openondemand_servername
135
148
source $APPLIANCES_ENVIRONMENT_ROOT/vars.txt
136
-
149
+
137
150
# setup ssh proxying:
138
151
sudo apt-get --yes install proxychains
139
152
echo proxychains installed
@@ -170,7 +183,7 @@ jobs:
170
183
# ansible login -v -a "sudo scontrol reboot ASAP nextstate=RESUME reason='rebuild image:${{ steps.packer_build.outputs.NEW_COMPUTE_IMAGE_ID }}' ${TF_VAR_cluster_name}-compute-[0-3]"
171
184
# ansible compute -m wait_for_connection -a 'delay=60 timeout=600' # delay allows node to go down
172
185
# ansible-playbook -v ansible/ci/check_slurm.yml
173
-
186
+
174
187
- name : Test reimage of login and control nodes (via rebuild adhoc)
175
188
run : |
176
189
. venv/bin/activate
@@ -179,7 +192,7 @@ jobs:
179
192
ansible all -m wait_for_connection -a 'delay=60 timeout=600' # delay allows node to go down
180
193
ansible-playbook -v ansible/site.yml
181
194
ansible-playbook -v ansible/ci/check_slurm.yml
182
-
195
+
183
196
- name : Check sacct state survived reimage
184
197
run : |
185
198
. venv/bin/activate
@@ -197,10 +210,8 @@ jobs:
197
210
. venv/bin/activate
198
211
. environments/.stackhpc/activate
199
212
cd $APPLIANCES_ENVIRONMENT_ROOT/terraform
200
- terraform destroy -auto-approve -var-file="${{ vars .CI_CLOUD }}.tfvars"
213
+ terraform destroy -auto-approve -var-file="${{ env .CI_CLOUD }}.tfvars"
201
214
if : ${{ success() || cancelled() }}
202
- env :
203
- TF_VAR_os_version : ${{ matrix.os_version }}
204
215
205
216
# - name: Delete images
206
217
# run: |
0 commit comments