Skip to content

Commit 72b81fb

Browse files
committed
AUFN CI unfinished draft
1 parent 9771e0d commit 72b81fb

File tree

3 files changed

+378
-2
lines changed

3 files changed

+378
-2
lines changed

.github/workflows/deploy-aufn.yml

Lines changed: 376 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,376 @@
1+
---
2+
#
3+
4+
name: AUFN KIDDIN' ME?!
5+
6+
on:
7+
push:
8+
branches:
9+
- AUFN-CI
10+
workflow_dispatch:
11+
inputs:
12+
deployment_type:
13+
description: Type of deployment
14+
type: choice
15+
options:
16+
- Test
17+
- Deployment
18+
default: Test
19+
lab_vm_count:
20+
description: Total number of Lab VMs to deploy
21+
type: number
22+
required: true
23+
default: 2
24+
reg_pwd: # When using in workflow use ::add-mask::$ to mask the password
25+
description: Password for registry access
26+
type: string
27+
default: ${{ secrets.BASTION_TEST_PASSWORD }} # NOTE: This needs to be set
28+
os_image:
29+
description: Host OS image
30+
type: choice
31+
options:
32+
- Ubuntu
33+
- Rocky9
34+
default: 'Rocky9'
35+
aufn_branch:
36+
description: Which branch of AUFN to use
37+
type: string
38+
default: smslab/2023.1
39+
au_from_seed:
40+
description: Run 'A Universe From Seed'?
41+
type: boolean
42+
default: false
43+
debug_mode:
44+
description: Keep Test up to debug?
45+
type: boolean
46+
default: false
47+
48+
49+
jobs:
50+
set-up-vars:
51+
name: Set up variables
52+
environment: ${{ inputs.deployment_type }}
53+
runs-on: Ubuntu-latest
54+
55+
steps:
56+
- name: Install Package
57+
uses: ConorMacBride/install-package@main
58+
with:
59+
apt: git unzip nodejs python3-pip python3-venv openssh-server openssh-client jq
60+
61+
- name: Install sshpass
62+
run: sudo apt-get update && sudo apt-get install -y sshpass
63+
64+
- name: Start the SSH service
65+
run: |
66+
sudo /etc/init.d/ssh start
67+
68+
# - name: Check if 'Deployment' Lab is already deployed
69+
# uses: softwareforgood/check-artifact-v4-existence@v0
70+
# with:
71+
# name: ${{ inputs.deployment_type }}-terraform-artifacts
72+
#
73+
# or use a ping command to check if the bastion is up
74+
#
75+
76+
- name: Checkout
77+
uses: actions/checkout@v4
78+
with:
79+
path: repo-dir
80+
81+
- name: Move contents to $GITHUB_WORKSPACE
82+
run: |
83+
mv repo-dir/* ~/
84+
85+
- name: Generate clouds.yaml
86+
run: |
87+
cat << EOF > clouds.yaml
88+
${{ secrets.CLOUDS_YAML }}
89+
EOF
90+
91+
- name: Generate terraform.tfvars
92+
run: |
93+
cat << EOF > terraform.tfvars
94+
lab_count = {{ inputs.lab_vm_count }}
95+
lab_net_ipv4 = "stackhpc-ipv4-aufn"
96+
image_id = "${{ env.LAB_IMAGE_ID }}"
97+
image_name = "${{ env.LAB_IMAGE_NAME }}"
98+
lab_flavor = "aufn.v1.large"
99+
registry_flavor = "general.v1.medium"
100+
boot_labs_from_volume = true
101+
image_user = "${{ env.LAB_IMAGE_USER }}"
102+
allocate_floating_ips = false
103+
create_bastion = true
104+
EOF
105+
106+
# Conditionally append bastion_floating_ip
107+
if [ "${{ inputs.deployment_type }}" = "Deployment" ]; then
108+
echo 'bastion_floating_ip = "185.45.78.149"' >> terraform.tfvars
109+
fi
110+
env:
111+
LAB_IMAGE_ID: ${{ inputs.os_image == 'Rocky9' && 'vars.LAB_OS_IMAGE_ROCKY' || os_image == 'Ubuntu' && 'vars.LAB_OS_IMAGE_UBUNTU' }}
112+
LAB_IMAGE_NAME: ${{ inputs.os_image == 'Ubuntu' && 'Ubuntu-22.04' || inputs.os_image }}
113+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
114+
115+
- name: Terraform Plan
116+
run: terraform plan
117+
env:
118+
OS_CLOUD: ${{ vars.OS_CLOUD }}
119+
120+
- name: Terraform Apply
121+
id: tf_apply
122+
run: |
123+
for attempt in $(seq 5); do
124+
if terraform apply -auto-approve; then
125+
echo "Created infrastructure on attempt $attempt"
126+
exit 0
127+
fi
128+
echo "Failed to create infrastructure on attempt $attempt"
129+
sleep 10
130+
131+
# Need to add a check to see which part failed and then
132+
# taint and retry once more before declating failure
133+
134+
terraform destroy -auto-approve
135+
sleep 60
136+
done
137+
echo "Failed to create infrastructure after $attempt attempts"
138+
exit 1
139+
env:
140+
OS_CLOUD: ${{ vars.OS_CLOUD }}
141+
142+
- name: Get Terraform outputs
143+
id: tf_outputs
144+
run: |
145+
terraform output -json
146+
147+
- name: Write Terraform outputs
148+
run: |
149+
cat << EOF > tf-outputs.yml
150+
${{ steps.tf_outputs.outputs.stdout }}
151+
EOF
152+
153+
- name: Write out Lab VMs info
154+
run: |
155+
terraform output -raw labs > ssh_list.txt
156+
157+
- name: Update bastion password authentication and set login password
158+
run: |
159+
echo "::add-mask::${{ inputs.reg_pwd }}"
160+
161+
ssh [email protected] -i default.pem <<EOF
162+
echo '${{ inputs.reg_pwd }}' | sudo passwd --stdin rocky
163+
sudo sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/' /etc/ssh/sshd_config.d/50-cloud-init.conf
164+
sudo systemctl restart sshd
165+
EOF
166+
shell: bash
167+
168+
- name: Check connection to Lab VMs
169+
run: |
170+
bastion_ip=185.45.78.149
171+
bastion_key="default.pem"
172+
173+
while IFS= read -r line; do
174+
ip=$(echo "$line" | awk '{print $2}')
175+
name=$(echo "$line" | awk '{print $3}')
176+
password=$(echo "$line" | awk '{print $5}')
177+
178+
echo "::add-mask::$password"
179+
180+
echo "Connecting to $name at $ip via bastion..."
181+
182+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
183+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
184+
-o IdentityFile=$bastion_key \
185+
"${LAB_IMAGE_USER}@${ip}" \
186+
'echo "Connected to $(hostname)"'
187+
done < ssh_list.txt
188+
shell: bash
189+
env:
190+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
191+
192+
- name: Validate lab VMs setup
193+
run: |
194+
bastion_ip=185.45.78.149
195+
bastion_key="default.pem"
196+
index=0
197+
failed_indexes=()
198+
199+
while IFS= read -r line; do
200+
ip=$(echo "$line" | awk '{print $2}')
201+
name=$(echo "$line" | awk '{print $3}')
202+
password=$(echo "$line" | awk '{print $5}')
203+
taint="false"
204+
205+
echo "::add-mask::$password"
206+
echo "Connecting to $name at $ip..."
207+
208+
# Run the compound remote commands
209+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
210+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
211+
-o IdentityFile=$bastion_key \
212+
"${LAB_IMAGE_USER}@${ip}" <<'EOF'
213+
214+
echo Checking 'virsh list --all'..."
215+
output=$(sudo virsh list --all)
216+
echo "$output"
217+
218+
if ! echo "$output" | grep -q 'seed.*running'; then echo "'seed' not running"; taint="true"; fi
219+
if ! echo "$output" | grep -q 'compute0.*shut off'; then echo "'compute0' not shut off"; taint="true"; fi
220+
if ! echo "$output" | grep -q 'controller0.*shut off'; then echo "'controller0' not shut off"; taint="true"; fi
221+
222+
echo "Checking 'bifrost_deploy' container..."
223+
container_output=$(ssh [email protected] 'sudo docker ps')
224+
echo "$container_output"
225+
if ! echo "$container_output" | grep -q bifrost_deploy; then echo "Container bifrost_deploy not found running"; taint="true"; fi
226+
227+
echo "Checking openssh package source..."
228+
pkg_output=$(ssh [email protected] 'sudo dnf info openssh')
229+
echo "$pkg_output"
230+
if ! echo "$pkg_output" | grep -q 'Repository *: *@System'; then echo "Package openssh not from @System"; taint="true"; fi
231+
232+
echo "Checking a-seed-from-nothing.out log result..."
233+
if ! tail -n 10 a-seed-from-nothing.out | grep -q 'PLAY RECAP.*failed=0'; then
234+
echo "Ansible PLAY RECAP failed != 0"
235+
taint="true"
236+
fi
237+
238+
echo "All checks passed on $HOSTNAME"
239+
EOF
240+
if [ "$taint" == "true" ]; then failed_indexes+=($index); fi
241+
index=$((index + 1))
242+
243+
done < ssh_list.txt
244+
echo "FAILED_VM_INDEXES=${failed_indexes[*]}" >> $GITHUB_ENV
245+
shell: bash
246+
env:
247+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
248+
249+
- name: Taint failed lab VMs (if any)
250+
run: |
251+
if [ -z "${FAILED_VM_INDEXES}" ]; then
252+
echo "No failed VMs detected"
253+
exit 0
254+
fi
255+
256+
for idx in $FAILED_VM_INDEXES; do
257+
echo "Tainting openstack_compute_instance_v2.lab[$idx]"
258+
terraform taint "openstack_compute_instance_v2.lab[$idx]"
259+
done
260+
261+
echo "Re-running Terraform apply to fix failed VMs"
262+
terraform apply -auto-approve
263+
env:
264+
FAILED_VM_INDEXES: ${{ env.FAILED_VM_INDEXES }}
265+
shell: bash
266+
267+
- name: Get Terraform outputs
268+
id: tf_outputs
269+
run: |
270+
terraform output -json
271+
272+
- name: Write Terraform outputs
273+
run: |
274+
cat << EOF > tf-outputs.yml
275+
${{ steps.tf_outputs.outputs.stdout }}
276+
EOF
277+
278+
- name: Write out Lab VMs info
279+
run: |
280+
terraform output -raw labs > ssh_list.txt
281+
282+
- name: Re-test failed lab VMs after redeploy
283+
run: |
284+
set -euo pipefail
285+
286+
bastion_ip=185.45.78.149
287+
bastion_key="default.pem"
288+
mapfile -t ssh_lines < ssh_list.txt
289+
290+
for idx in $FAILED_VM_INDEXES; do
291+
line="${ssh_lines[$idx]}"
292+
ip=$(echo "$line" | awk '{print $2}')
293+
name=$(echo "$line" | awk '{print $3}')
294+
password=$(echo "$line" | awk '{print $5}')
295+
296+
echo "::add-mask::$password"
297+
echo "Re-testing $name at $ip (index $idx)..."
298+
299+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
300+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
301+
-o IdentityFile=$bastion_key \
302+
"${LAB_IMAGE_USER}@${ip}" <<'EOF' || {
303+
echo "Post-deploy check failed on $name. Destroying all infrastructure..."
304+
terraform destroy -auto-approve
305+
exit 1
306+
}
307+
308+
echo "Re-checking virsh VMs..."
309+
output=$(sudo virsh list --all)
310+
echo "$output"
311+
if ! echo "$output" | grep -q 'seed.*running'; then echo "'seed' not running"; exit 1; fi
312+
if ! echo "$output" | grep -q 'compute0.*shut off'; then echo "'compute0' not shut off"; exit 1; fi
313+
if ! echo "$output" | grep -q 'controller0.*shut off'; then echo "'controller0' not shut off"; exit 1; fi
314+
315+
echo "Checking bifrost container..."
316+
if ! ssh [email protected] 'sudo docker ps' | grep -q bifrost_deploy; then
317+
echo "bifrost_deploy container not running"; exit 1;
318+
fi
319+
320+
echo "Checking openssh package source..."
321+
if ! ssh [email protected] 'sudo dnf info openssh' | grep -q 'Repository *: *@System'; then
322+
echo "openssh not from @System"; exit 1;
323+
fi
324+
325+
echo "Checking a-seed-from-nothing.out for Ansible success..."
326+
if ! tail -n 20 a-seed-from-nothing.out | grep -q 'PLAY RECAP.*failed=0'; then
327+
echo "Ansible PLAY RECAP shows failures"; exit 1;
328+
fi
329+
330+
echo "All post-redeploy checks passed on $HOSTNAME"
331+
EOF
332+
333+
done
334+
shell: bash
335+
env:
336+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
337+
FAILED_VM_INDEXES: ${{ env.FAILED_VM_INDEXES }}
338+
339+
- name: Run a-universe-from-seed.sh if true
340+
if: inputs.au_from_seed == true
341+
run: |
342+
bastion_ip=185.45.78.149
343+
bastion_key="default.pem"
344+
345+
mapfile -t ssh_lines < ssh_list.txt
346+
347+
for i in "${!ssh_lines[@]}"; do
348+
line="${ssh_lines[$i]}"
349+
ip=$(echo "$line" | awk '{print $2}')
350+
name=$(echo "$line" | awk '{print $3}')
351+
password=$(echo "$line" | awk '{print $5}')
352+
353+
echo "::add-mask::$password"
354+
echo "Launching a-universe-from-seed.sh on $name at $ip in tmux..."
355+
356+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
357+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
358+
-o IdentityFile=$bastion_key \
359+
"${LAB_IMAGE_USER}@${ip}" \
360+
"tmux new-session -d -s aus-run './a-universe-from-seed.sh'"
361+
done
362+
shell: bash
363+
env:
364+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
365+
366+
# - name: Run test workflow
367+
# if: inputs.deployment_type == 'Test'
368+
# uses: ./.github/workflows/AUFN-test.yml
369+
370+
# - name: Upload Terraform outputs
371+
# if: ${{ inputs.deployment_type == 'Deployment' || inputs.debug_mode == true }}
372+
# uses: actions/upload-artifact@v4
373+
# with:
374+
# name: ${{ inputs.deployment_type }}-terraform-artifacts
375+
376+

a-seed-from-nothing.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ cd $HOME
101101
git clone https://github.com/stackhpc/beokay.git -b master
102102

103103
# Use Beokay to bootstrap your control host.
104-
[[ -d deployment ]] || beokay/beokay.py create --base-path ~/deployment --kayobe-repo https://opendev.org/openstack/kayobe.git --kayobe-branch stable/2023.1 --kayobe-config-repo https://github.com/stackhpc/a-universe-from-nothing.git --kayobe-config-branch stable/2023.1
104+
[[ -d deployment ]] || beokay/beokay.py create --base-path ~/deployment --kayobe-repo https://opendev.org/openstack/kayobe.git --kayobe-branch unmaintained/2023.1 --kayobe-config-repo https://github.com/stackhpc/a-universe-from-nothing.git --kayobe-config-branch stable/2023.1
105105

106106
# Bump the provisioning time - it can be lengthy on virtualised storage
107107
sed -i.bak 's%^[# ]*wait_active_timeout:.*% wait_active_timeout: 5000%' ~/deployment/src/kayobe/ansible/overcloud-provision.yml

output.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
output "labs" {
2-
value = join("\n", formatlist("ssh %s # %s", openstack_compute_instance_v2.lab.*.name, openstack_compute_instance_v2.lab.*.id))
2+
value = join("\n", formatlist("ssh %s %s # %s", openstack_compute_instance_v2.lab.*.access_ip_v4, openstack_compute_instance_v2.lab.*.name, openstack_compute_instance_v2.lab.*.id))
33
}
44

55
output "registry" {

0 commit comments

Comments
 (0)