Skip to content

Commit 46681bf

Browse files
committed
AUFN CI unfinished draft
1 parent 9771e0d commit 46681bf

File tree

4 files changed

+375
-2
lines changed

4 files changed

+375
-2
lines changed

.github/workflows/AUFN-test.yml

Whitespace-only changes.

.github/workflows/deploy-aufn.yml

Lines changed: 373 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,373 @@
1+
---
2+
#
3+
4+
name: AUFN KIDDIN' ME?!
5+
6+
on:
7+
workflow_dispatch:
8+
inputs:
9+
deployment_type:
10+
description: Type of deployment
11+
type: choice
12+
options:
13+
- Test
14+
- Deployment
15+
default: Test
16+
lab_vm_count:
17+
description: Total number of Lab VMs to deploy
18+
type: number
19+
required: true
20+
default: 2
21+
reg_pwd: # When using in workflow use ::add-mask::$ to mask the password
22+
description: Password for registry access
23+
type: string
24+
default: ${{ secrets.BASTION_TEST_PASSWORD }} # NOTE: This needs to be set
25+
os_image:
26+
description: Host OS image
27+
type: choice
28+
options:
29+
- Ubuntu
30+
- Rocky9
31+
default: 'Rocky9'
32+
aufn_branch:
33+
description: Which branch of AUFN to use
34+
type: string
35+
default: smslab/2023.1
36+
au_from_seed:
37+
description: Run 'A Universe From Seed'?
38+
type: boolean
39+
default: false
40+
debug_mode:
41+
description: Keep Test up to debug?
42+
type: boolean
43+
default: false
44+
45+
46+
jobs:
47+
set-up-vars:
48+
name: Set up variables
49+
environment: ${{ inputs.deployment_type }}
50+
runs-on: Ubuntu-latest
51+
52+
steps:
53+
- name: Install Package
54+
uses: ConorMacBride/install-package@main
55+
with:
56+
apt: git unzip nodejs python3-pip python3-venv openssh-server openssh-client jq
57+
58+
- name: Install sshpass
59+
run: sudo apt-get update && sudo apt-get install -y sshpass
60+
61+
- name: Start the SSH service
62+
run: |
63+
sudo /etc/init.d/ssh start
64+
65+
# - name: Check if 'Deployment' Lab is already deployed
66+
# uses: softwareforgood/check-artifact-v4-existence@v0
67+
# with:
68+
# name: ${{ inputs.deployment_type }}-terraform-artifacts
69+
#
70+
# or use a ping command to check if the bastion is up
71+
#
72+
73+
- name: Checkout
74+
uses: actions/checkout@v4
75+
with:
76+
path: repo-dir
77+
78+
- name: Move contents to $GITHUB_WORKSPACE
79+
run: |
80+
mv repo-dir/* ~/
81+
82+
- name: Generate clouds.yaml
83+
run: |
84+
cat << EOF > clouds.yaml
85+
${{ secrets.CLOUDS_YAML }}
86+
EOF
87+
88+
- name: Generate terraform.tfvars
89+
run: |
90+
cat << EOF > terraform.tfvars
91+
lab_count = {{ inputs.lab_vm_count }}
92+
lab_net_ipv4 = "stackhpc-ipv4-aufn"
93+
image_id = "${{ env.LAB_IMAGE_ID }}"
94+
image_name = "${{ env.LAB_IMAGE_NAME }}"
95+
lab_flavor = "aufn.v1.large"
96+
registry_flavor = "general.v1.medium"
97+
boot_labs_from_volume = true
98+
image_user = "${{ env.LAB_IMAGE_USER }}"
99+
allocate_floating_ips = false
100+
create_bastion = true
101+
EOF
102+
103+
# Conditionally append bastion_floating_ip
104+
if [ "${{ inputs.deployment_type }}" = "Deployment" ]; then
105+
echo 'bastion_floating_ip = "185.45.78.149"' >> terraform.tfvars
106+
fi
107+
env:
108+
LAB_IMAGE_ID: ${{ inputs.os_image == 'Rocky9' && 'vars.LAB_OS_IMAGE_ROCKY' || os_image == 'Ubuntu' && 'vars.LAB_OS_IMAGE_UBUNTU' }}
109+
LAB_IMAGE_NAME: ${{ inputs.os_image == 'Ubuntu' && 'Ubuntu-22.04' || inputs.os_image }}
110+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
111+
112+
- name: Terraform Plan
113+
run: terraform plan
114+
env:
115+
OS_CLOUD: ${{ vars.OS_CLOUD }}
116+
117+
- name: Terraform Apply
118+
id: tf_apply
119+
run: |
120+
for attempt in $(seq 5); do
121+
if terraform apply -auto-approve; then
122+
echo "Created infrastructure on attempt $attempt"
123+
exit 0
124+
fi
125+
echo "Failed to create infrastructure on attempt $attempt"
126+
sleep 10
127+
128+
# Need to add a check to see which part failed and then
129+
# taint and retry once more before declating failure
130+
131+
terraform destroy -auto-approve
132+
sleep 60
133+
done
134+
echo "Failed to create infrastructure after $attempt attempts"
135+
exit 1
136+
env:
137+
OS_CLOUD: ${{ vars.OS_CLOUD }}
138+
139+
- name: Get Terraform outputs
140+
id: tf_outputs
141+
run: |
142+
terraform output -json
143+
144+
- name: Write Terraform outputs
145+
run: |
146+
cat << EOF > tf-outputs.yml
147+
${{ steps.tf_outputs.outputs.stdout }}
148+
EOF
149+
150+
- name: Write out Lab VMs info
151+
run: |
152+
terraform output -raw labs > ssh_list.txt
153+
154+
- name: Update bastion password authentication and set login password
155+
run: |
156+
echo "::add-mask::${{ inputs.reg_pwd }}"
157+
158+
ssh [email protected] -i default.pem <<EOF
159+
echo '${{ inputs.reg_pwd }}' | sudo passwd --stdin rocky
160+
sudo sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/' /etc/ssh/sshd_config.d/50-cloud-init.conf
161+
sudo systemctl restart sshd
162+
EOF
163+
shell: bash
164+
165+
- name: Check connection to Lab VMs
166+
run: |
167+
bastion_ip=185.45.78.149
168+
bastion_key="default.pem"
169+
170+
while IFS= read -r line; do
171+
ip=$(echo "$line" | awk '{print $2}')
172+
name=$(echo "$line" | awk '{print $3}')
173+
password=$(echo "$line" | awk '{print $5}')
174+
175+
echo "::add-mask::$password"
176+
177+
echo "Connecting to $name at $ip via bastion..."
178+
179+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
180+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
181+
-o IdentityFile=$bastion_key \
182+
"${LAB_IMAGE_USER}@${ip}" \
183+
'echo "Connected to $(hostname)"'
184+
done < ssh_list.txt
185+
shell: bash
186+
env:
187+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
188+
189+
- name: Validate lab VMs setup
190+
run: |
191+
bastion_ip=185.45.78.149
192+
bastion_key="default.pem"
193+
index=0
194+
failed_indexes=()
195+
196+
while IFS= read -r line; do
197+
ip=$(echo "$line" | awk '{print $2}')
198+
name=$(echo "$line" | awk '{print $3}')
199+
password=$(echo "$line" | awk '{print $5}')
200+
taint="false"
201+
202+
echo "::add-mask::$password"
203+
echo "Connecting to $name at $ip..."
204+
205+
# Run the compound remote commands
206+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
207+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
208+
-o IdentityFile=$bastion_key \
209+
"${LAB_IMAGE_USER}@${ip}" <<'EOF'
210+
211+
echo Checking 'virsh list --all'..."
212+
output=$(sudo virsh list --all)
213+
echo "$output"
214+
215+
if ! echo "$output" | grep -q 'seed.*running'; then echo "'seed' not running"; taint="true"; fi
216+
if ! echo "$output" | grep -q 'compute0.*shut off'; then echo "'compute0' not shut off"; taint="true"; fi
217+
if ! echo "$output" | grep -q 'controller0.*shut off'; then echo "'controller0' not shut off"; taint="true"; fi
218+
219+
echo "Checking 'bifrost_deploy' container..."
220+
container_output=$(ssh [email protected] 'sudo docker ps')
221+
echo "$container_output"
222+
if ! echo "$container_output" | grep -q bifrost_deploy; then echo "Container bifrost_deploy not found running"; taint="true"; fi
223+
224+
echo "Checking openssh package source..."
225+
pkg_output=$(ssh [email protected] 'sudo dnf info openssh')
226+
echo "$pkg_output"
227+
if ! echo "$pkg_output" | grep -q 'Repository *: *@System'; then echo "Package openssh not from @System"; taint="true"; fi
228+
229+
echo "Checking a-seed-from-nothing.out log result..."
230+
if ! tail -n 10 a-seed-from-nothing.out | grep -q 'PLAY RECAP.*failed=0'; then
231+
echo "Ansible PLAY RECAP failed != 0"
232+
taint="true"
233+
fi
234+
235+
echo "All checks passed on $HOSTNAME"
236+
EOF
237+
if [ "$taint" == "true" ]; then failed_indexes+=($index); fi
238+
index=$((index + 1))
239+
240+
done < ssh_list.txt
241+
echo "FAILED_VM_INDEXES=${failed_indexes[*]}" >> $GITHUB_ENV
242+
shell: bash
243+
env:
244+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
245+
246+
- name: Taint failed lab VMs (if any)
247+
run: |
248+
if [ -z "${FAILED_VM_INDEXES}" ]; then
249+
echo "No failed VMs detected"
250+
exit 0
251+
fi
252+
253+
for idx in $FAILED_VM_INDEXES; do
254+
echo "Tainting openstack_compute_instance_v2.lab[$idx]"
255+
terraform taint "openstack_compute_instance_v2.lab[$idx]"
256+
done
257+
258+
echo "Re-running Terraform apply to fix failed VMs"
259+
terraform apply -auto-approve
260+
env:
261+
FAILED_VM_INDEXES: ${{ env.FAILED_VM_INDEXES }}
262+
shell: bash
263+
264+
- name: Get Terraform outputs
265+
id: tf_outputs
266+
run: |
267+
terraform output -json
268+
269+
- name: Write Terraform outputs
270+
run: |
271+
cat << EOF > tf-outputs.yml
272+
${{ steps.tf_outputs.outputs.stdout }}
273+
EOF
274+
275+
- name: Write out Lab VMs info
276+
run: |
277+
terraform output -raw labs > ssh_list.txt
278+
279+
- name: Re-test failed lab VMs after redeploy
280+
run: |
281+
set -euo pipefail
282+
283+
bastion_ip=185.45.78.149
284+
bastion_key="default.pem"
285+
mapfile -t ssh_lines < ssh_list.txt
286+
287+
for idx in $FAILED_VM_INDEXES; do
288+
line="${ssh_lines[$idx]}"
289+
ip=$(echo "$line" | awk '{print $2}')
290+
name=$(echo "$line" | awk '{print $3}')
291+
password=$(echo "$line" | awk '{print $5}')
292+
293+
echo "::add-mask::$password"
294+
echo "Re-testing $name at $ip (index $idx)..."
295+
296+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
297+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
298+
-o IdentityFile=$bastion_key \
299+
"${LAB_IMAGE_USER}@${ip}" <<'EOF' || {
300+
echo "Post-deploy check failed on $name. Destroying all infrastructure..."
301+
terraform destroy -auto-approve
302+
exit 1
303+
}
304+
305+
echo "Re-checking virsh VMs..."
306+
output=$(sudo virsh list --all)
307+
echo "$output"
308+
if ! echo "$output" | grep -q 'seed.*running'; then echo "'seed' not running"; exit 1; fi
309+
if ! echo "$output" | grep -q 'compute0.*shut off'; then echo "'compute0' not shut off"; exit 1; fi
310+
if ! echo "$output" | grep -q 'controller0.*shut off'; then echo "'controller0' not shut off"; exit 1; fi
311+
312+
echo "Checking bifrost container..."
313+
if ! ssh [email protected] 'sudo docker ps' | grep -q bifrost_deploy; then
314+
echo "bifrost_deploy container not running"; exit 1;
315+
fi
316+
317+
echo "Checking openssh package source..."
318+
if ! ssh [email protected] 'sudo dnf info openssh' | grep -q 'Repository *: *@System'; then
319+
echo "openssh not from @System"; exit 1;
320+
fi
321+
322+
echo "Checking a-seed-from-nothing.out for Ansible success..."
323+
if ! tail -n 20 a-seed-from-nothing.out | grep -q 'PLAY RECAP.*failed=0'; then
324+
echo "Ansible PLAY RECAP shows failures"; exit 1;
325+
fi
326+
327+
echo "All post-redeploy checks passed on $HOSTNAME"
328+
EOF
329+
330+
done
331+
shell: bash
332+
env:
333+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
334+
FAILED_VM_INDEXES: ${{ env.FAILED_VM_INDEXES }}
335+
336+
- name: Run a-universe-from-seed.sh if true
337+
if: inputs.au_from_seed == true
338+
run: |
339+
bastion_ip=185.45.78.149
340+
bastion_key="default.pem"
341+
342+
mapfile -t ssh_lines < ssh_list.txt
343+
344+
for i in "${!ssh_lines[@]}"; do
345+
line="${ssh_lines[$i]}"
346+
ip=$(echo "$line" | awk '{print $2}')
347+
name=$(echo "$line" | awk '{print $3}')
348+
password=$(echo "$line" | awk '{print $5}')
349+
350+
echo "::add-mask::$password"
351+
echo "Launching a-universe-from-seed.sh on $name at $ip in tmux..."
352+
353+
sshpass -p "$password" ssh -o StrictHostKeyChecking=no \
354+
-o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \
355+
-o IdentityFile=$bastion_key \
356+
"${LAB_IMAGE_USER}@${ip}" \
357+
"tmux new-session -d -s aus-run './a-universe-from-seed.sh'"
358+
done
359+
shell: bash
360+
env:
361+
LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }}
362+
363+
# - name: Run test workflow
364+
# if: inputs.deployment_type == 'Test'
365+
# uses: ./.github/workflows/AUFN-test.yml
366+
367+
# - name: Upload Terraform outputs
368+
# if: ${{ inputs.deployment_type == 'Deployment' || inputs.debug_mode == true }}
369+
# uses: actions/upload-artifact@v4
370+
# with:
371+
# name: ${{ inputs.deployment_type }}-terraform-artifacts
372+
373+

a-seed-from-nothing.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ cd $HOME
101101
git clone https://github.com/stackhpc/beokay.git -b master
102102

103103
# Use Beokay to bootstrap your control host.
104-
[[ -d deployment ]] || beokay/beokay.py create --base-path ~/deployment --kayobe-repo https://opendev.org/openstack/kayobe.git --kayobe-branch stable/2023.1 --kayobe-config-repo https://github.com/stackhpc/a-universe-from-nothing.git --kayobe-config-branch stable/2023.1
104+
[[ -d deployment ]] || beokay/beokay.py create --base-path ~/deployment --kayobe-repo https://opendev.org/openstack/kayobe.git --kayobe-branch unmaintained/2023.1 --kayobe-config-repo https://github.com/stackhpc/a-universe-from-nothing.git --kayobe-config-branch stable/2023.1
105105

106106
# Bump the provisioning time - it can be lengthy on virtualised storage
107107
sed -i.bak 's%^[# ]*wait_active_timeout:.*% wait_active_timeout: 5000%' ~/deployment/src/kayobe/ansible/overcloud-provision.yml

output.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
output "labs" {
2-
value = join("\n", formatlist("ssh %s # %s", openstack_compute_instance_v2.lab.*.name, openstack_compute_instance_v2.lab.*.id))
2+
value = join("\n", formatlist("ssh %s %s # %s", openstack_compute_instance_v2.lab.*.access_ip_v4, openstack_compute_instance_v2.lab.*.name, openstack_compute_instance_v2.lab.*.id))
33
}
44

55
output "registry" {

0 commit comments

Comments
 (0)