Skip to content

Commit 111107c

Browse files
committed
use latest release for initial CI cluster setup
1 parent 9816980 commit 111107c

File tree

1 file changed

+37
-16
lines changed

1 file changed

+37
-16
lines changed

.github/workflows/stackhpc.yml

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,10 @@ jobs:
4444
CI_CLOUD: ${{ vars.CI_CLOUD }} # default from repo settings
4545
TF_VAR_os_version: ${{ matrix.os_version }}
4646
steps:
47-
- uses: actions/checkout@v2
47+
- uses: actions/checkout@v4
48+
with:
49+
fetch-depth: 0
50+
fetch-tags: true
4851

4952
- name: Override CI_CLOUD if PR label is present
5053
if: ${{ github.event_name == 'pull_request' }}
@@ -76,6 +79,14 @@ jobs:
7679
run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts
7780
shell: bash
7881

82+
- name: Find the latest release
83+
run: |
84+
echo LATEST_RELEASE_TAG=$(curl -s https://api.github.com/repos/stackhpc/ansible-slurm-appliance/releases/latest | jq -r .tag_name) >> "$GITHUB_ENV"
85+
echo LATEST_RELEASE_TAG: $LATEST_RELEASE_TAG
86+
87+
- name: Checkout latest release
88+
run: git checkout $LATEST_RELEASE_TAG
89+
7990
- name: Install ansible etc
8091
run: dev/setup-env.sh
8192

@@ -103,7 +114,7 @@ jobs:
103114
env:
104115
DEMO_USER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }}
105116

106-
- name: Provision nodes using fat image
117+
- name: Provision nodes using latest release image
107118
id: provision_servers
108119
run: |
109120
. venv/bin/activate
@@ -119,15 +130,15 @@ jobs:
119130
tofu destroy -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
120131
if: failure() && steps.provision_servers.outcome == 'failure'
121132

122-
- name: Configure cluster
133+
- name: Configure cluster at latest release
123134
run: |
124135
. venv/bin/activate
125136
. environments/.stackhpc/activate
126137
ansible all -m wait_for_connection
127138
ansible-playbook -v ansible/site.yml
128139
ansible-playbook -v ansible/ci/check_slurm.yml
129140
130-
- name: Run MPI-based tests
141+
- name: Run MPI-based tests at latest release
131142
run: |
132143
. venv/bin/activate
133144
. environments/.stackhpc/activate
@@ -170,23 +181,33 @@ jobs:
170181
env:
171182
DEMO_USER_PASSWORD: ${{ secrets.TEST_USER_PASSWORD }}
172183

173-
- name: Test reimage of login and control nodes (via rebuild adhoc)
184+
- name: Switch to current branch
185+
run: git checkout -
186+
187+
- name: Reimage login and control nodes to image in current branch
188+
id: reimage_non_compute
189+
run: |
190+
. venv/bin/activate
191+
. environments/.stackhpc/activate
192+
cd $APPLIANCES_ENVIRONMENT_ROOT/tofu
193+
tofu apply -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars" -var-file=cluster_image.latest.tfvars.json
194+
195+
- name: Configure cluster using current branch
174196
run: |
175197
. venv/bin/activate
176198
. environments/.stackhpc/activate
177-
ansible-playbook -v --limit control,login ansible/adhoc/rebuild.yml
199+
ansible all -m wait_for_connection
178200
ansible-playbook -v ansible/site.yml
179201
ansible-playbook -v ansible/ci/check_slurm.yml
180202
181-
- name: Test compute node reboot and compute-init
203+
- name: Reimage compute nodes to image in current branch using slurm - tests compute-init
182204
run: |
183205
. venv/bin/activate
184206
. environments/.stackhpc/activate
185-
ansible-playbook -v --limit compute ansible/adhoc/rebuild.yml
186-
ansible-playbook -v ansible/ci/check_slurm.yml
187207
ansible-playbook -v ansible/adhoc/reboot_via_slurm.yml
208+
ansible-playbook -v ansible/ci/check_slurm.yml
188209
189-
- name: Check sacct state survived reimage
210+
- name: Check sacct state survived reimage to current branch
190211
run: |
191212
. venv/bin/activate
192213
. environments/.stackhpc/activate
@@ -198,16 +219,16 @@ jobs:
198219
. environments/.stackhpc/activate
199220
ansible-playbook -vv ansible/ci/check_grafana.yml
200221
222+
- name: Run MPI-based tests again in current branch
223+
run: |
224+
. venv/bin/activate
225+
. environments/.stackhpc/activate
226+
ansible-playbook -vv ansible/adhoc/hpctests.yml
227+
201228
- name: Delete infrastructure
202229
run: |
203230
. venv/bin/activate
204231
. environments/.stackhpc/activate
205232
cd $APPLIANCES_ENVIRONMENT_ROOT/tofu
206233
tofu destroy -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
207234
if: ${{ success() || cancelled() }}
208-
209-
# - name: Delete images
210-
# run: |
211-
# . venv/bin/activate
212-
# . environments/.stackhpc/activate
213-
# ansible-playbook -vv ansible/ci/delete_images.yml

0 commit comments

Comments
 (0)