Skip to content

Commit a8b0d41

Browse files
committed
AUFN CI unfinished draft
1 parent 9771e0d commit a8b0d41

File tree

3 files changed

+400
-2
lines changed

3 files changed

+400
-2
lines changed

.github/workflows/deploy-aufn.yml

Lines changed: 398 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,398 @@
1+
---
2+
#
3+
4+
name: AUFN KIDDIN' ME?!
5+
6+
on:
7+
push:
8+
branches:
9+
- AUFN-CI
10+
workflow_dispatch:
11+
inputs:
12+
deployment_type:
13+
description: Type of deployment
14+
type: choice
15+
options:
16+
- Test
17+
- Deployment
18+
default: Test
19+
lab_vm_count:
20+
description: Total number of Lab VMs to deploy
21+
type: number
22+
required: true
23+
default: 2
24+
reg_pwd: # When using in workflow use ::add-mask::$ to mask the password
25+
description: Password for registry access
26+
type: string
27+
default: "" # NOTE: This needs to be set at runtime via secrets
28+
os_image:
29+
description: Host OS image
30+
type: choice
31+
options:
32+
- Ubuntu
33+
- Rocky9
34+
default: 'Rocky9'
35+
aufn_branch:
36+
description: Which branch of AUFN to use
37+
type: string
38+
default: smslab/2023.1
39+
au_from_seed:
40+
description: Run 'A Universe From Seed'?
41+
type: boolean
42+
default: false
43+
debug_mode:
44+
description: Keep Test up to debug?
45+
type: boolean
46+
default: false
47+
secrets:
48+
BASTION_TEST_PASSWORD:
49+
required: true
50+
CLOUDS_YAML:
51+
required: true
52+
OS_APPLICATION_CREDENTIAL_ID:
53+
required: true
54+
OS_APPLICATION_CREDENTIAL_SECRET:
55+
required: true
56+
57+
jobs:
58+
set-up-vars:
59+
name: Set up variables
60+
environment: ${{ inputs.deployment_type }}
61+
runs-on: Ubuntu-latest
62+
63+
steps:
64+
- name: Install Package
65+
uses: ConorMacBride/install-package@main
66+
with:
67+
apt: git unzip nodejs python3-pip python3-venv openssh-server openssh-client jq
68+
69+
- name: Install sshpass
70+
run: sudo apt-get update && sudo apt-get install -y sshpass
71+
72+
- name: Start the SSH service
73+
run: |
74+
sudo /etc/init.d/ssh start
75+
76+
# - name: Check if 'Deployment' Lab is already deployed
77+
# uses: softwareforgood/check-artifact-v4-existence@v0
78+
# with:
79+
# name: ${{ inputs.deployment_type }}-terraform-artifacts
80+
#
81+
# or use a ping command to check if the bastion is up
82+
#
83+
84+
- name: Checkout
85+
uses: actions/checkout@v4
86+
with:
87+
path: repo-dir
88+
89+
- name: Move contents to $GITHUB_WORKSPACE
90+
run: |
91+
mv repo-dir/* ~/
92+
93+
- name: Install terraform
94+
uses: hashicorp/setup-terraform@v2
95+
96+
- name: Initialise terraform
97+
run: terraform init
98+
99+
- name: Generate clouds.yaml
100+
run: |
101+
cat << EOF > clouds.yaml
102+
${{ secrets.CLOUDS_YAML }}
103+
EOF
104+
105+
- name: Generate terraform.tfvars
106+
run: |
107+
cat << EOF > terraform.tfvars
108+
lab_count = {{ inputs.lab_vm_count }}
109+
lab_net_ipv4 = "${{ vars.LAB_NETWORK }}"
110+
image_id = "${{ env.LAB_IMAGE_ID }}"
111+
image_name = "${{ env.LAB_IMAGE_NAME }}"
112+
lab_flavor = "aufn.v1.large"
113+
registry_flavor = "general.v1.medium"
114+
boot_labs_from_volume = true
115+
image_user = "${{ env.LAB_IMAGE_USER }}"
116+
allocate_floating_ips = false
117+
create_bastion = true
118+
EOF
119+
120+
# Conditionally append bastion_floating_ip
121+
if [ "${{ inputs.deployment_type }}" = "Deployment" ]; then
122+
echo 'bastion_floating_ip = "185.45.78.149"' >> terraform.tfvars
123+
fi
124+
env:
125+
LAB_IMAGE_ID: ${{ inputs.os_image == 'Rocky9' && 'vars.LAB_OS_IMAGE_ROCKY' || inputs.os_image == 'Ubuntu' && 'vars.LAB_OS_IMAGE_UBUNTU' }}
126+
LAB_IMAGE_NAME: ${{ inputs.os_image == 'Ubuntu' && 'Ubuntu-22.04' || inputs.os_image }}
127+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
128+
129+
- name: Terraform Plan
130+
run: terraform plan
131+
env:
132+
OS_CLOUD: ${{ vars.OS_CLOUD }}
133+
134+
- name: Terraform Apply
135+
id: tf_apply
136+
run: |
137+
for attempt in $(seq 5); do
138+
if terraform apply -auto-approve; then
139+
echo "Created infrastructure on attempt $attempt"
140+
exit 0
141+
fi
142+
echo "Failed to create infrastructure on attempt $attempt"
143+
sleep 10
144+
145+
# Need to add a check to see which part failed and then
146+
# taint and retry once more before declating failure
147+
148+
terraform destroy -auto-approve
149+
sleep 60
150+
done
151+
echo "Failed to create infrastructure after $attempt attempts"
152+
exit 1
153+
env:
154+
OS_CLOUD: ${{ vars.OS_CLOUD }}
155+
156+
- name: Get Terraform outputs
157+
id: tf_outputs
158+
run: |
159+
terraform output -json
160+
161+
- name: Write Terraform outputs
162+
run: |
163+
cat << EOF > tf-outputs.yml
164+
${{ steps.tf_outputs.outputs.stdout }}
165+
EOF
166+
167+
- name: Write out Lab VMs info
168+
run: |
169+
terraform output -raw labs > ssh_list.txt
170+
171+
- name: Update bastion password authentication and set login password
172+
run: |
173+
echo "::add-mask::${{ env.reg_pwd_var }}"
174+
175+
ssh [email protected] -i default.pem <<EOF
176+
echo '${{ env.reg_pwd_var }}' | sudo passwd --stdin rocky
177+
sudo sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/' /etc/ssh/sshd_config.d/50-cloud-init.conf
178+
sudo systemctl restart sshd
179+
EOF
180+
shell: bash
181+
env:
182+
reg_pwd_var: ${{ inputs.reg_pwd == '' && 'secrets.BASTION_TEST_PASSWORD' || inputs.reg_pwd }}
183+
184+
- name: Check connection to Lab VMs
185+
run: |
186+
bastion_ip=185.45.78.149
187+
bastion_key="default.pem"
188+
189+
while IFS= read -r line; do
190+
ip=$(echo "$line" | awk '{print $2}')
191+
name=$(echo "$line" | awk '{print $3}')
192+
password=$(echo "$line" | awk '{print $5}')
193+
194+
echo "::add-mask::$password"
195+
196+
echo "Connecting to $name at $ip via bastion..."
197+
198+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
199+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
200+
-o IdentityFile=$bastion_key \
201+
"${LAB_IMAGE_USER}@${ip}" \
202+
'echo "Connected to $(hostname)"'
203+
done < ssh_list.txt
204+
shell: bash
205+
env:
206+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
207+
208+
- name: Validate lab VMs setup
209+
run: |
210+
bastion_ip=185.45.78.149
211+
bastion_key="default.pem"
212+
index=0
213+
failed_indexes=()
214+
215+
while IFS= read -r line; do
216+
ip=$(echo "$line" | awk '{print $2}')
217+
name=$(echo "$line" | awk '{print $3}')
218+
password=$(echo "$line" | awk '{print $5}')
219+
taint="false"
220+
221+
echo "::add-mask::$password"
222+
echo "Connecting to $name at $ip..."
223+
224+
# Run the compound remote commands
225+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
226+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
227+
-o IdentityFile=$bastion_key \
228+
"${LAB_IMAGE_USER}@${ip}" <<'EOF'
229+
230+
echo Checking 'virsh list --all'..."
231+
output=$(sudo virsh list --all)
232+
echo "$output"
233+
234+
if ! echo "$output" | grep -q 'seed.*running'; then echo "'seed' not running"; taint="true"; fi
235+
if ! echo "$output" | grep -q 'compute0.*shut off'; then echo "'compute0' not shut off"; taint="true"; fi
236+
if ! echo "$output" | grep -q 'controller0.*shut off'; then echo "'controller0' not shut off"; taint="true"; fi
237+
238+
echo "Checking 'bifrost_deploy' container..."
239+
container_output=$(ssh [email protected] 'sudo docker ps')
240+
echo "$container_output"
241+
if ! echo "$container_output" | grep -q bifrost_deploy; then echo "Container bifrost_deploy not found running"; taint="true"; fi
242+
243+
echo "Checking openssh package source..."
244+
pkg_output=$(ssh [email protected] 'sudo dnf info openssh')
245+
echo "$pkg_output"
246+
if ! echo "$pkg_output" | grep -q 'Repository *: *@System'; then echo "Package openssh not from @System"; taint="true"; fi
247+
248+
echo "Checking a-seed-from-nothing.out log result..."
249+
if ! tail -n 10 a-seed-from-nothing.out | grep -q 'PLAY RECAP.*failed=0'; then
250+
echo "Ansible PLAY RECAP failed != 0"
251+
taint="true"
252+
fi
253+
254+
echo "All checks passed on $HOSTNAME"
255+
EOF
256+
if [ "$taint" == "true" ]; then failed_indexes+=($index); fi
257+
index=$((index + 1))
258+
259+
done < ssh_list.txt
260+
echo "FAILED_VM_INDEXES=${failed_indexes[*]}" >> $GITHUB_ENV
261+
shell: bash
262+
env:
263+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
264+
265+
- name: Taint failed lab VMs (if any)
266+
run: |
267+
if [ -z "${FAILED_VM_INDEXES}" ]; then
268+
echo "No failed VMs detected"
269+
exit 0
270+
fi
271+
272+
for idx in $FAILED_VM_INDEXES; do
273+
echo "Tainting openstack_compute_instance_v2.lab[$idx]"
274+
terraform taint "openstack_compute_instance_v2.lab[$idx]"
275+
done
276+
277+
echo "Re-running Terraform apply to fix failed VMs"
278+
terraform apply -auto-approve
279+
env:
280+
FAILED_VM_INDEXES: ${{ env.FAILED_VM_INDEXES }}
281+
shell: bash
282+
283+
- name: Get Terraform outputs
284+
id: tf_outputs_after_taint
285+
run: |
286+
terraform output -json
287+
288+
- name: Write Terraform outputs
289+
run: |
290+
cat << EOF > tf-outputs.yml
291+
${{ steps.tf_outputs_after_taint.outputs.stdout }}
292+
EOF
293+
294+
- name: Write out Lab VMs info
295+
run: |
296+
terraform output -raw labs > ssh_list.txt
297+
298+
- name: Re-test failed lab VMs after redeploy
299+
run: |
300+
set -euo pipefail
301+
302+
bastion_ip=185.45.78.149
303+
bastion_key="default.pem"
304+
mapfile -t ssh_lines < ssh_list.txt
305+
306+
for idx in $FAILED_VM_INDEXES; do
307+
line="${ssh_lines[$idx]}"
308+
ip=$(echo "$line" | awk '{print $2}')
309+
name=$(echo "$line" | awk '{print $3}')
310+
password=$(echo "$line" | awk '{print $5}')
311+
312+
echo "::add-mask::$password"
313+
echo "Re-testing $name at $ip (index $idx)..."
314+
315+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
316+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
317+
-o IdentityFile=$bastion_key \
318+
"${LAB_IMAGE_USER}@${ip}" <<'EOF' || {
319+
echo "Post-deploy check failed on $name. Destroying all infrastructure..."
320+
terraform destroy -auto-approve
321+
exit 1
322+
}
323+
324+
echo "Re-checking virsh VMs..."
325+
output=$(sudo virsh list --all)
326+
echo "$output"
327+
if ! echo "$output" | grep -q 'seed.*running'; then echo "'seed' not running"; exit 1; fi
328+
if ! echo "$output" | grep -q 'compute0.*shut off'; then echo "'compute0' not shut off"; exit 1; fi
329+
if ! echo "$output" | grep -q 'controller0.*shut off'; then echo "'controller0' not shut off"; exit 1; fi
330+
331+
echo "Checking bifrost container..."
332+
if ! ssh [email protected] 'sudo docker ps' | grep -q bifrost_deploy; then
333+
echo "bifrost_deploy container not running"; exit 1;
334+
fi
335+
336+
echo "Checking openssh package source..."
337+
if ! ssh [email protected] 'sudo dnf info openssh' | grep -q 'Repository *: *@System'; then
338+
echo "openssh not from @System"; exit 1;
339+
fi
340+
341+
echo "Checking a-seed-from-nothing.out for Ansible success..."
342+
if ! tail -n 20 a-seed-from-nothing.out | grep -q 'PLAY RECAP.*failed=0'; then
343+
echo "Ansible PLAY RECAP shows failures"; exit 1;
344+
fi
345+
346+
echo "All post-redeploy checks passed on $HOSTNAME"
347+
EOF
348+
349+
done
350+
shell: bash
351+
env:
352+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
353+
FAILED_VM_INDEXES: ${{ env.FAILED_VM_INDEXES }}
354+
355+
- name: Run a-universe-from-seed.sh if true
356+
if: inputs.au_from_seed == true
357+
run: |
358+
bastion_ip=185.45.78.149
359+
bastion_key="default.pem"
360+
361+
mapfile -t ssh_lines < ssh_list.txt
362+
363+
for i in "${!ssh_lines[@]}"; do
364+
line="${ssh_lines[$i]}"
365+
ip=$(echo "$line" | awk '{print $2}')
366+
name=$(echo "$line" | awk '{print $3}')
367+
password=$(echo "$line" | awk '{print $5}')
368+
369+
echo "::add-mask::$password"
370+
echo "Launching a-universe-from-seed.sh on $name at $ip in tmux..."
371+
372+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
373+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
374+
-o IdentityFile=$bastion_key \
375+
"${LAB_IMAGE_USER}@${ip}" \
376+
"tmux new-session -d -s aus-run './a-universe-from-seed.sh'"
377+
done
378+
shell: bash
379+
env:
380+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
381+
382+
# - name: Run test workflow
383+
# if: inputs.deployment_type == 'Test'
384+
# uses: ./.github/workflows/AUFN-test.yml
385+
386+
# - name: Upload Terraform outputs
387+
# if: ${{ inputs.deployment_type == 'Deployment' || inputs.debug_mode == true }}
388+
# uses: actions/upload-artifact@v4
389+
# with:
390+
# name: ${{ inputs.deployment_type }}-terraform-artifacts
391+
392+
- name: Destroy
393+
run: terraform destroy -auto-approve
394+
env:
395+
OS_CLOUD: ${{ vars.OS_CLOUD }}
396+
OS_APPLICATION_CREDENTIAL_ID: ${{ secrets.OS_APPLICATION_CREDENTIAL_ID }}
397+
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}
398+
if: always()

0 commit comments

Comments
 (0)