Skip to content

Commit 0ce11d6

Browse files
committed
AUFN CI unfinished draft
1 parent 9771e0d commit 0ce11d6

File tree

3 files changed

+397
-2
lines changed

3 files changed

+397
-2
lines changed

.github/workflows/deploy-aufn.yml

Lines changed: 395 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,395 @@
1+
---
2+
#
3+
4+
name: AUFN KIDDIN' ME?!
5+
6+
on:
7+
workflow_dispatch:
8+
inputs:
9+
deployment_type:
10+
description: Type of deployment
11+
type: choice
12+
options:
13+
- Test
14+
- Deployment
15+
default: Test
16+
lab_vm_count:
17+
description: Total number of Lab VMs to deploy
18+
type: number
19+
required: true
20+
default: 2
21+
reg_pwd: # When using in workflow use ::add-mask::$ to mask the password
22+
description: Password for registry access
23+
type: string
24+
default: "" # NOTE: This needs to be set at runtime via secrets
25+
os_image:
26+
description: Host OS image
27+
type: choice
28+
options:
29+
- Ubuntu
30+
- Rocky9
31+
default: 'Rocky9'
32+
aufn_branch:
33+
description: Which branch of AUFN to use
34+
type: string
35+
default: smslab/2023.1
36+
au_from_seed:
37+
description: Run 'A Universe From Seed'?
38+
type: boolean
39+
default: false
40+
debug_mode:
41+
description: Keep Test up to debug?
42+
type: boolean
43+
default: false
44+
secrets:
45+
BASTION_TEST_PASSWORD:
46+
required: true
47+
CLOUDS_YAML:
48+
required: true
49+
OS_APPLICATION_CREDENTIAL_ID:
50+
required: true
51+
OS_APPLICATION_CREDENTIAL_SECRET:
52+
required: true
53+
54+
jobs:
55+
set-up-vars:
56+
name: Set up variables
57+
environment: ${{ inputs.deployment_type }}
58+
runs-on: Ubuntu-latest
59+
60+
steps:
61+
- name: Install Package
62+
uses: ConorMacBride/install-package@main
63+
with:
64+
apt: git unzip nodejs python3-pip python3-venv openssh-server openssh-client jq
65+
66+
- name: Install sshpass
67+
run: sudo apt-get update && sudo apt-get install -y sshpass
68+
69+
- name: Start the SSH service
70+
run: |
71+
sudo /etc/init.d/ssh start
72+
73+
# - name: Check if 'Deployment' Lab is already deployed
74+
# uses: softwareforgood/check-artifact-v4-existence@v0
75+
# with:
76+
# name: ${{ inputs.deployment_type }}-terraform-artifacts
77+
#
78+
# or use a ping command to check if the bastion is up
79+
#
80+
81+
- name: Checkout
82+
uses: actions/checkout@v4
83+
with:
84+
path: repo-dir
85+
86+
- name: Move contents to $GITHUB_WORKSPACE
87+
run: |
88+
mv repo-dir/* ~/
89+
90+
- name: Install terraform
91+
uses: hashicorp/setup-terraform@v2
92+
93+
- name: Initialise terraform
94+
run: terraform init
95+
96+
- name: Generate clouds.yaml
97+
run: |
98+
cat << EOF > clouds.yaml
99+
${{ secrets.CLOUDS_YAML }}
100+
EOF
101+
102+
- name: Generate terraform.tfvars
103+
run: |
104+
cat << EOF > terraform.tfvars
105+
lab_count = ${{ inputs.lab_vm_count }}
106+
lab_net_ipv4 = "${{ vars.LAB_NETWORK }}"
107+
image_id = "${{ env.LAB_IMAGE_ID }}"
108+
image_name = "${{ env.LAB_IMAGE_NAME }}"
109+
lab_flavor = "aufn.v1.large"
110+
registry_flavor = "general.v1.medium"
111+
boot_labs_from_volume = true
112+
image_user = "${{ env.LAB_IMAGE_USER }}"
113+
allocate_floating_ips = false
114+
create_bastion = true
115+
EOF
116+
117+
# Conditionally append bastion_floating_ip
118+
if [ "${{ inputs.deployment_type }}" = "Deployment" ]; then
119+
echo 'bastion_floating_ip = "185.45.78.149"' >> terraform.tfvars
120+
fi
121+
env:
122+
LAB_IMAGE_ID: ${{ inputs.os_image == 'Rocky9' && '${{ vars.LAB_OS_IMAGE_ROCKY }}' || inputs.os_image == 'Ubuntu' && '${{ vars.LAB_OS_IMAGE_UBUNTU }}' }}
123+
LAB_IMAGE_NAME: ${{ inputs.os_image == 'Ubuntu' && 'Ubuntu-22.04' || inputs.os_image }}
124+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
125+
126+
- name: Terraform Plan
127+
run: terraform plan
128+
env:
129+
OS_CLOUD: ${{ vars.OS_CLOUD }}
130+
131+
- name: Terraform Apply
132+
id: tf_apply
133+
run: |
134+
for attempt in $(seq 5); do
135+
if terraform apply -auto-approve; then
136+
echo "Created infrastructure on attempt $attempt"
137+
exit 0
138+
fi
139+
echo "Failed to create infrastructure on attempt $attempt"
140+
sleep 10
141+
142+
# Need to add a check to see which part failed and then
143+
# taint and retry once more before declating failure
144+
145+
terraform destroy -auto-approve
146+
sleep 60
147+
done
148+
echo "Failed to create infrastructure after $attempt attempts"
149+
exit 1
150+
env:
151+
OS_CLOUD: ${{ vars.OS_CLOUD }}
152+
153+
- name: Get Terraform outputs
154+
id: tf_outputs
155+
run: |
156+
terraform output -json
157+
158+
- name: Write Terraform outputs
159+
run: |
160+
cat << EOF > tf-outputs.yml
161+
${{ steps.tf_outputs.outputs.stdout }}
162+
EOF
163+
164+
- name: Write out Lab VMs info
165+
run: |
166+
terraform output -raw labs > ssh_list.txt
167+
168+
- name: Update bastion password authentication and set login password
169+
run: |
170+
echo "::add-mask::${{ env.reg_pwd_var }}"
171+
172+
ssh [email protected] -i default.pem <<EOF
173+
echo '${{ env.reg_pwd_var }}' | sudo passwd --stdin rocky
174+
sudo sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/' /etc/ssh/sshd_config.d/50-cloud-init.conf
175+
sudo systemctl restart sshd
176+
EOF
177+
shell: bash
178+
env:
179+
reg_pwd_var: ${{ inputs.reg_pwd == '' && 'secrets.BASTION_TEST_PASSWORD' || inputs.reg_pwd }}
180+
181+
- name: Check connection to Lab VMs
182+
run: |
183+
bastion_ip=185.45.78.149
184+
bastion_key="default.pem"
185+
186+
while IFS= read -r line; do
187+
ip=$(echo "$line" | awk '{print $2}')
188+
name=$(echo "$line" | awk '{print $3}')
189+
password=$(echo "$line" | awk '{print $5}')
190+
191+
echo "::add-mask::$password"
192+
193+
echo "Connecting to $name at $ip via bastion..."
194+
195+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
196+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
197+
-o IdentityFile=$bastion_key \
198+
"${LAB_IMAGE_USER}@${ip}" \
199+
'echo "Connected to $(hostname)"'
200+
done < ssh_list.txt
201+
shell: bash
202+
env:
203+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
204+
205+
- name: Validate lab VMs setup
206+
run: |
207+
bastion_ip=185.45.78.149
208+
bastion_key="default.pem"
209+
index=0
210+
failed_indexes=()
211+
212+
while IFS= read -r line; do
213+
ip=$(echo "$line" | awk '{print $2}')
214+
name=$(echo "$line" | awk '{print $3}')
215+
password=$(echo "$line" | awk '{print $5}')
216+
taint="false"
217+
218+
echo "::add-mask::$password"
219+
echo "Connecting to $name at $ip..."
220+
221+
# Run the compound remote commands
222+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
223+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
224+
-o IdentityFile=$bastion_key \
225+
"${LAB_IMAGE_USER}@${ip}" <<'EOF'
226+
227+
echo Checking 'virsh list --all'..."
228+
output=$(sudo virsh list --all)
229+
echo "$output"
230+
231+
if ! echo "$output" | grep -q 'seed.*running'; then echo "'seed' not running"; taint="true"; fi
232+
if ! echo "$output" | grep -q 'compute0.*shut off'; then echo "'compute0' not shut off"; taint="true"; fi
233+
if ! echo "$output" | grep -q 'controller0.*shut off'; then echo "'controller0' not shut off"; taint="true"; fi
234+
235+
echo "Checking 'bifrost_deploy' container..."
236+
container_output=$(ssh [email protected] 'sudo docker ps')
237+
echo "$container_output"
238+
if ! echo "$container_output" | grep -q bifrost_deploy; then echo "Container bifrost_deploy not found running"; taint="true"; fi
239+
240+
echo "Checking openssh package source..."
241+
pkg_output=$(ssh [email protected] 'sudo dnf info openssh')
242+
echo "$pkg_output"
243+
if ! echo "$pkg_output" | grep -q 'Repository *: *@System'; then echo "Package openssh not from @System"; taint="true"; fi
244+
245+
echo "Checking a-seed-from-nothing.out log result..."
246+
if ! tail -n 10 a-seed-from-nothing.out | grep -q 'PLAY RECAP.*failed=0'; then
247+
echo "Ansible PLAY RECAP failed != 0"
248+
taint="true"
249+
fi
250+
251+
echo "All checks passed on $HOSTNAME"
252+
EOF
253+
if [ "$taint" == "true" ]; then failed_indexes+=($index); fi
254+
index=$((index + 1))
255+
256+
done < ssh_list.txt
257+
echo "FAILED_VM_INDEXES=${failed_indexes[*]}" >> $GITHUB_ENV
258+
shell: bash
259+
env:
260+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
261+
262+
- name: Taint failed lab VMs (if any)
263+
run: |
264+
if [ -z "${FAILED_VM_INDEXES}" ]; then
265+
echo "No failed VMs detected"
266+
exit 0
267+
fi
268+
269+
for idx in $FAILED_VM_INDEXES; do
270+
echo "Tainting openstack_compute_instance_v2.lab[$idx]"
271+
terraform taint "openstack_compute_instance_v2.lab[$idx]"
272+
done
273+
274+
echo "Re-running Terraform apply to fix failed VMs"
275+
terraform apply -auto-approve
276+
env:
277+
FAILED_VM_INDEXES: ${{ env.FAILED_VM_INDEXES }}
278+
shell: bash
279+
280+
- name: Get Terraform outputs
281+
id: tf_outputs_after_taint
282+
run: |
283+
terraform output -json
284+
285+
- name: Write Terraform outputs
286+
run: |
287+
cat << EOF > tf-outputs.yml
288+
${{ steps.tf_outputs_after_taint.outputs.stdout }}
289+
EOF
290+
291+
- name: Write out Lab VMs info
292+
run: |
293+
terraform output -raw labs > ssh_list.txt
294+
295+
- name: Re-test failed lab VMs after redeploy
296+
run: |
297+
set -euo pipefail
298+
299+
bastion_ip=185.45.78.149
300+
bastion_key="default.pem"
301+
mapfile -t ssh_lines < ssh_list.txt
302+
303+
for idx in $FAILED_VM_INDEXES; do
304+
line="${ssh_lines[$idx]}"
305+
ip=$(echo "$line" | awk '{print $2}')
306+
name=$(echo "$line" | awk '{print $3}')
307+
password=$(echo "$line" | awk '{print $5}')
308+
309+
echo "::add-mask::$password"
310+
echo "Re-testing $name at $ip (index $idx)..."
311+
312+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
313+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
314+
-o IdentityFile=$bastion_key \
315+
"${LAB_IMAGE_USER}@${ip}" <<'EOF' || {
316+
echo "Post-deploy check failed on $name. Destroying all infrastructure..."
317+
terraform destroy -auto-approve
318+
exit 1
319+
}
320+
321+
echo "Re-checking virsh VMs..."
322+
output=$(sudo virsh list --all)
323+
echo "$output"
324+
if ! echo "$output" | grep -q 'seed.*running'; then echo "'seed' not running"; exit 1; fi
325+
if ! echo "$output" | grep -q 'compute0.*shut off'; then echo "'compute0' not shut off"; exit 1; fi
326+
if ! echo "$output" | grep -q 'controller0.*shut off'; then echo "'controller0' not shut off"; exit 1; fi
327+
328+
echo "Checking bifrost container..."
329+
if ! ssh [email protected] 'sudo docker ps' | grep -q bifrost_deploy; then
330+
echo "bifrost_deploy container not running"; exit 1;
331+
fi
332+
333+
echo "Checking openssh package source..."
334+
if ! ssh [email protected] 'sudo dnf info openssh' | grep -q 'Repository *: *@System'; then
335+
echo "openssh not from @System"; exit 1;
336+
fi
337+
338+
echo "Checking a-seed-from-nothing.out for Ansible success..."
339+
if ! tail -n 20 a-seed-from-nothing.out | grep -q 'PLAY RECAP.*failed=0'; then
340+
echo "Ansible PLAY RECAP shows failures"; exit 1;
341+
fi
342+
343+
echo "All post-redeploy checks passed on $HOSTNAME"
344+
EOF
345+
346+
done
347+
shell: bash
348+
env:
349+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
350+
FAILED_VM_INDEXES: ${{ env.FAILED_VM_INDEXES }}
351+
352+
- name: Run a-universe-from-seed.sh if true
353+
if: inputs.au_from_seed == true
354+
run: |
355+
bastion_ip=185.45.78.149
356+
bastion_key="default.pem"
357+
358+
mapfile -t ssh_lines < ssh_list.txt
359+
360+
for i in "${!ssh_lines[@]}"; do
361+
line="${ssh_lines[$i]}"
362+
ip=$(echo "$line" | awk '{print $2}')
363+
name=$(echo "$line" | awk '{print $3}')
364+
password=$(echo "$line" | awk '{print $5}')
365+
366+
echo "::add-mask::$password"
367+
echo "Launching a-universe-from-seed.sh on $name at $ip in tmux..."
368+
369+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
370+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
371+
-o IdentityFile=$bastion_key \
372+
"${LAB_IMAGE_USER}@${ip}" \
373+
"tmux new-session -d -s aus-run './a-universe-from-seed.sh'"
374+
done
375+
shell: bash
376+
env:
377+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
378+
379+
# - name: Run test workflow
380+
# if: inputs.deployment_type == 'Test'
381+
# uses: ./.github/workflows/AUFN-test.yml
382+
383+
# - name: Upload Terraform outputs
384+
# if: ${{ inputs.deployment_type == 'Deployment' || inputs.debug_mode == true }}
385+
# uses: actions/upload-artifact@v4
386+
# with:
387+
# name: ${{ inputs.deployment_type }}-terraform-artifacts
388+
389+
- name: Destroy
390+
run: terraform destroy -auto-approve
391+
env:
392+
OS_CLOUD: ${{ vars.OS_CLOUD }}
393+
OS_APPLICATION_CREDENTIAL_ID: ${{ secrets.OS_APPLICATION_CREDENTIAL_ID }}
394+
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}
395+
if: always()

0 commit comments

Comments
 (0)