Skip to content

Commit b5a0298

Browse files
committed
Merge tag 'tags/0.9.0' into 0.9.0-sync
2 parents bf75d66 + 78b9435 commit b5a0298

File tree

15 files changed

+461
-128
lines changed

15 files changed

+461
-128
lines changed

.github/actions/destroy/action.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,13 @@ runs:
99
set -e
1010
source ./ci.env
1111
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
12-
ansible-playbook stackhpc.azimuth_ops.destroy -e @extra-vars.yml -e force_destroy=true
12+
ansible-playbook stackhpc.azimuth_ops.destroy \
13+
-e @extra-vars.yml \
14+
-e force_destroy=true \
15+
-e capi_cluster_volumes_policy=delete
1316
if: ${{ always() }}
1417

15-
- name: Release ingress floating IP
18+
- name: Release floating IPs
1619
shell: bash
1720
run: |
1821
set -eo pipefail
@@ -21,6 +24,9 @@ runs:
2124
if [ -n "$INGRESS_IP" ]; then
2225
openstack floating ip delete $INGRESS_IP
2326
fi
27+
if [ -n "$ZENITH_SSHD_IP" ]; then
28+
openstack floating ip delete $ZENITH_SSHD_IP
29+
fi
2430
if: ${{ always() }}
2531

2632
- name: Configure S3 lock

.github/actions/setup/action.yml

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -143,25 +143,41 @@ runs:
143143
action: acquire
144144
if: ${{ steps.s3-lock-config.outputs.host != '' }}
145145

146-
- name: Allocate floating IP for ingress
146+
- name: Allocate floating IPs
147147
shell: bash
148148
run: |
149149
set -eo pipefail
150150
source ci.env
151151
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
152-
EXTNET_ID="$(
153-
ansible -m debug -a "var=infra_external_network_id" -e @extra-vars.yml all |
154-
jq -r ".plays[0].tasks[0].hosts.localhost.infra_external_network_id"
155-
)"
156-
IP_ADDRESS="$(
152+
153+
ansible_var() {
154+
ANSIBLE_LOAD_CALLBACK_PLUGINS=true \
155+
ANSIBLE_STDOUT_CALLBACK=json \
156+
ansible -m debug -a "var=$1" -e @extra-vars.yml all | \
157+
jq -r ".plays[0].tasks[0].hosts.localhost.$1"
158+
}
159+
160+
EXTNET_ID="$(ansible_var infra_external_network_id)"
161+
INSTALL_MODE="$(ansible_var install_mode)"
162+
163+
INGRESS_IP="$(
157164
openstack floating ip create $EXTNET_ID \
158165
--description "ingress IP for $AZIMUTH_ENVIRONMENT" \
159166
--format value \
160167
--column floating_ip_address
161168
)"
162169
cat >> ci.env <<EOF
163-
export INGRESS_IP="$IP_ADDRESS"
170+
export INGRESS_IP="$INGRESS_IP"
164171
EOF
165-
env:
166-
ANSIBLE_LOAD_CALLBACK_PLUGINS: "true"
167-
ANSIBLE_STDOUT_CALLBACK: json
172+
173+
if [ "$INSTALL_MODE" = "ha" ]; then
174+
ZENITH_SSHD_IP="$(
175+
openstack floating ip create $EXTNET_ID \
176+
--description "Zenith SSHD IP for $AZIMUTH_ENVIRONMENT" \
177+
--format value \
178+
--column floating_ip_address
179+
)"
180+
cat >> ci.env <<EOF
181+
export ZENITH_SSHD_IP="$ZENITH_SSHD_IP"
182+
EOF
183+
fi

.github/environments/arcus-ha/inventory/group_vars/all/variables.yml

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,12 @@ capi_cluster_worker_count: 2
2424
# The risk of failed upgrades is too great, and it is going away soon
2525
consul_server_replicas: 1
2626

27-
# Enable Velero just to check that installation works
28-
velero_enabled: true
29-
velero_s3_url: https://required-but-not-used.com
30-
velero_bucket_name: not-used
31-
velero_backup_schedule_enabled: true
32-
velero_backup_schedule_name: default
33-
velero_backup_schedule_timings: "0 0 * * *"
34-
velero_backup_schedule_ttl: "168h"
35-
velero_aws_access_key_id: required-but-not-used
36-
velero_aws_secret_access_key : required-but-not-used
27+
# Pick up the reserved IP for the Zenith SSHD LB
28+
zenith_sshd_service_load_balancer_ip: "{{ lookup('env', 'ZENITH_SSHD_IP') }}"
29+
30+
# Configure Velero backups
31+
velero_enabled: "{{ not (not velero_aws_access_key_id) }}"
32+
velero_s3_url: https://object.arcus.openstack.hpc.cam.ac.uk
33+
velero_bucket_name: azimuth-ci-backups
34+
velero_aws_access_key_id: "{{ lookup('env', 'VELERO_S3_ACCESS_KEY') }}"
35+
velero_aws_secret_access_key: "{{ lookup('env', 'VELERO_S3_SECRET_KEY') }}"

.github/environments/leafcloud-ha/inventory/group_vars/all/variables.yml

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,12 @@ capi_cluster_addons_csi_cinder_volume_type: unencrypted
3535
# The risk of failed upgrades is too great, and it is going away soon
3636
consul_server_replicas: 1
3737

38-
# Enable Velero just to check that installation works
39-
velero_enabled: true
40-
velero_s3_url: https://required-but-not-used.com
41-
velero_bucket_name: not-used
42-
velero_backup_schedule_enabled: true
43-
velero_backup_schedule_name: default
44-
velero_backup_schedule_timings: "0 0 * * *"
45-
velero_backup_schedule_ttl: "168h"
46-
velero_aws_access_key_id: required-but-not-used
47-
velero_aws_secret_access_key : required-but-not-used
38+
# Pick up the reserved IP for the Zenith SSHD LB
39+
zenith_sshd_service_load_balancer_ip: "{{ lookup('env', 'ZENITH_SSHD_IP') }}"
40+
41+
# Configure Velero backups
42+
velero_enabled: "{{ not (not velero_aws_access_key_id) }}"
43+
velero_s3_url: https://leafcloud.store
44+
velero_bucket_name: azimuth-ci-backups
45+
velero_aws_access_key_id: "{{ lookup('env', 'VELERO_S3_ACCESS_KEY') }}"
46+
velero_aws_secret_access_key: "{{ lookup('env', 'VELERO_S3_SECRET_KEY') }}"
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
name: Backup and restore test
2+
3+
on:
4+
# Allow manual execution on any branch
5+
workflow_dispatch:
6+
inputs:
7+
target-cloud:
8+
description: >-
9+
The cloud to target for the run.
10+
Leave blank to use the default cloud.
11+
type: choice
12+
options:
13+
- ""
14+
- arcus
15+
- leafcloud
16+
17+
jobs:
18+
# Tests that a backup and restore re-adopts all the existing platforms correctly
19+
#
20+
# Note that success() and failure() consider *all previous steps*, and continue-on-failure
21+
# prevents the job from being marked as failed if that step fails
22+
# This means that in order to get the execution flow that we want while still resulting in a
23+
# failed job when required, we need to use step ids and the conclusions of specific steps
24+
test_backup_restore:
25+
runs-on: ubuntu-latest
26+
steps:
27+
# We need to check out the code under test first in order to use local actions
28+
- name: Checkout code under test
29+
uses: actions/checkout@v3
30+
31+
- name: Set up Azimuth environment
32+
uses: ./.github/actions/setup
33+
with:
34+
os-clouds: ${{ secrets.OS_CLOUDS }}
35+
repository: ${{ github.repository }}
36+
ref: ${{ github.ref }}
37+
target-cloud: ${{ inputs.target-cloud || vars.TARGET_CLOUD }}
38+
install-mode: ha
39+
environment-prefix: ci-restore
40+
# GitHub terminates jobs after 6 hours
41+
# We don't want jobs to acquire the lock then get timed out before they can finish
42+
# So wait a maximum of 3 hours to acquire the lock, leaving 3 hours for other tasks in the job
43+
timeout-minutes: 180
44+
45+
- name: Generate S3 credentials for Velero
46+
run: |
47+
set -e
48+
source ci.env
49+
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
50+
VELERO_S3_ACCESS_KEY="$(openstack ec2 credentials create -f value -c access)"
51+
VELERO_S3_SECRET_KEY="$(openstack ec2 credentials show -f value -c secret $VELERO_S3_ACCESS_KEY)"
52+
cat >> ci.env <<EOF
53+
export VELERO_S3_ACCESS_KEY="$VELERO_S3_ACCESS_KEY"
54+
export VELERO_S3_SECRET_KEY="$VELERO_S3_SECRET_KEY"
55+
EOF
56+
57+
- name: Provision Azimuth
58+
uses: ./.github/actions/provision
59+
60+
- name: Generate test suite
61+
id: generate-tests
62+
run: |
63+
set -e
64+
source ./ci.env
65+
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
66+
ansible-playbook stackhpc.azimuth_ops.generate_tests -e @extra-vars.yml
67+
68+
- name: Create test platforms
69+
id: tests-create
70+
run: |
71+
set -e
72+
source ./ci.env
73+
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
74+
./bin/run-tests --include create --outputdir reports/create
75+
76+
- name: Verify test platforms
77+
run: |
78+
set -e
79+
source ./ci.env
80+
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
81+
./bin/run-tests --include verify --outputdir reports/verify-create
82+
if: ${{ !cancelled() && contains(fromJSON('["success", "failure"]'), steps.tests-create.conclusion) }}
83+
84+
- name: Create a backup
85+
id: backup-create
86+
run: |
87+
set -e
88+
source ./ci.env
89+
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
90+
./bin/seed-ssh -- \
91+
velero backup create $AZIMUTH_ENVIRONMENT \
92+
--kubeconfig ./kubeconfig-azimuth-$AZIMUTH_ENVIRONMENT.yaml \
93+
--from-schedule default \
94+
--wait
95+
if: ${{ !cancelled() && steps.generate-tests.conclusion == 'success' }}
96+
97+
- name: Create pre-restore debug bundle
98+
run: |
99+
set -e
100+
source ./ci.env
101+
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
102+
./bin/create-debug-bundle
103+
if: ${{ !cancelled() }}
104+
105+
- name: Upload pre-restore debug bundle
106+
uses: actions/upload-artifact@v3
107+
with:
108+
name: azimuth-pre-restore-debug-bundle
109+
path: debug-bundle.tar.gz
110+
if: ${{ !cancelled() }}
111+
112+
- name: Destroy Azimuth
113+
id: azimuth-destroy
114+
run: |
115+
set -e
116+
source ./ci.env
117+
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
118+
ansible-playbook stackhpc.azimuth_ops.destroy -e @extra-vars.yml
119+
if: ${{ !cancelled() && steps.backup-create.conclusion == 'success' }}
120+
121+
- name: Restore from backup
122+
id: backup-restore
123+
run: |
124+
set -e
125+
source ./ci.env
126+
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
127+
ansible-playbook stackhpc.azimuth_ops.restore \
128+
-e @extra-vars.yml \
129+
-e velero_restore_backup_name=$AZIMUTH_ENVIRONMENT
130+
if: ${{ !cancelled() && steps.azimuth-destroy.conclusion == 'success' }}
131+
132+
- name: Verify test platforms post restore
133+
run: |
134+
set -e
135+
source ./ci.env
136+
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
137+
./bin/run-tests --include verify --outputdir reports/verify-post-restore
138+
if: ${{ !cancelled() && steps.backup-restore.conclusion == 'success' }}
139+
140+
- name: Delete test platforms
141+
run: |
142+
set -e
143+
source ./ci.env
144+
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
145+
./bin/run-tests --include delete --outputdir reports/delete
146+
if: ${{ always() }}
147+
148+
- name: Delete backup
149+
run: |
150+
set -e
151+
source ./ci.env
152+
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
153+
./bin/seed-ssh -- \
154+
velero backup delete $AZIMUTH_ENVIRONMENT \
155+
--kubeconfig ./kubeconfig-azimuth-$AZIMUTH_ENVIRONMENT.yaml \
156+
--confirm
157+
if: ${{ always() }}
158+
159+
- name: Upload test report artifacts
160+
uses: actions/upload-artifact@v3
161+
with:
162+
name: azimuth-restore-test-reports
163+
path: reports/*
164+
if: ${{ always() }}
165+
166+
- name: Create debug bundle
167+
run: |
168+
set -e
169+
source ./ci.env
170+
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
171+
./bin/create-debug-bundle
172+
if: ${{ always() }}
173+
174+
- name: Upload debug bundle
175+
uses: actions/upload-artifact@v3
176+
with:
177+
name: azimuth-restore-debug-bundle
178+
path: debug-bundle.tar.gz
179+
if: ${{ always() }}
180+
181+
- name: Destroy Azimuth
182+
uses: ./.github/actions/destroy
183+
if: ${{ always() }}
184+
185+
- name: Delete Velero S3 credentials
186+
run: |
187+
set -e
188+
source ./ci.env
189+
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
190+
openstack ec2 credentials delete $VELERO_S3_ACCESS_KEY
191+
if: ${{ always() }}

.github/workflows/test-ha.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: HA test
2+
3+
on:
4+
# Allow manual execution on any branch
5+
workflow_dispatch:
6+
inputs:
7+
target-cloud:
8+
description: >-
9+
The cloud to target for the run.
10+
Leave blank to use the default cloud.
11+
type: choice
12+
options:
13+
- ""
14+
- arcus
15+
- leafcloud
16+
17+
jobs:
18+
# Tests a clean HA deployment + all appliances
19+
test_ha:
20+
runs-on: ubuntu-latest
21+
steps:
22+
# We need to check out the code under test first in order to use local actions
23+
- name: Checkout code under test
24+
uses: actions/checkout@v3
25+
26+
- name: Set up Azimuth environment
27+
uses: ./.github/actions/setup
28+
with:
29+
os-clouds: ${{ secrets.OS_CLOUDS }}
30+
repository: ${{ github.repository }}
31+
ref: ${{ github.ref }}
32+
target-cloud: ${{ inputs.target-cloud || vars.TARGET_CLOUD }}
33+
install-mode: ha
34+
environment-prefix: ci-ha
35+
# GitHub terminates jobs after 6 hours
36+
# We don't want jobs to acquire the lock then get timed out before they can finish
37+
# So wait a maximum of 3 hours to acquire the lock, leaving 3 hours for other tasks in the job
38+
timeout-minutes: 180
39+
40+
- name: Provision Azimuth
41+
uses: ./.github/actions/provision
42+
43+
- name: Run Azimuth tests
44+
uses: ./.github/actions/test
45+
46+
- name: Destroy Azimuth
47+
uses: ./.github/actions/destroy
48+
if: ${{ always() }}

.github/workflows/test-singlenode.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,18 @@ on:
2929
- requirements.txt
3030
- requirements.yml
3131
- .github/actions/**
32+
- .github/environments/common
33+
- .github/environments/arcus
34+
- .github/environments/leafcloud
3235
- .github/workflows/test-singlenode.yml
3336
- bin/**
34-
- "!bin/ci-exec"
37+
- "!bin/ci-setup"
3538
- "!bin/create-merge-branch"
39+
- "!bin/port-forward"
3640
- "!bin/tilt-*"
3741
- environments/base/**
3842
- environments/singlenode/**
3943
- environments/demo/**
40-
- environments/ci/**
4144

4245
# Use the head ref for workflow concurrency, with cancellation
4346
# This should mean that any previous runs of this workflow for the same PR

0 commit comments

Comments
 (0)