|
| 1 | +--- |
| 2 | +# |
| 3 | + |
| 4 | +name: AUFN KIDDIN' ME?! |
| 5 | + |
| 6 | +on: |
| 7 | + push: |
| 8 | + branches: |
| 9 | + - AUFN-CI |
| 10 | + workflow_dispatch: |
| 11 | + inputs: |
| 12 | + deployment_type: |
| 13 | + description: Type of deployment |
| 14 | + type: choice |
| 15 | + options: |
| 16 | + - Test |
| 17 | + - Deployment |
| 18 | + default: Test |
| 19 | + lab_vm_count: |
| 20 | + description: Total number of Lab VMs to deploy |
| 21 | + type: number |
| 22 | + required: true |
| 23 | + default: 2 |
| 24 | + reg_pwd: # When using in workflow use ::add-mask::$ to mask the password |
| 25 | + description: Password for registry access |
| 26 | + type: string |
| 27 | + default: ${{ secrets.BASTION_TEST_PASSWORD }} # NOTE: This needs to be set |
| 28 | + os_image: |
| 29 | + description: Host OS image |
| 30 | + type: choice |
| 31 | + options: |
| 32 | + - Ubuntu |
| 33 | + - Rocky9 |
| 34 | + default: 'Rocky9' |
| 35 | + aufn_branch: |
| 36 | + description: Which branch of AUFN to use |
| 37 | + type: string |
| 38 | + default: smslab/2023.1 |
| 39 | + au_from_seed: |
| 40 | + description: Run 'A Universe From Seed'? |
| 41 | + type: boolean |
| 42 | + default: false |
| 43 | + debug_mode: |
| 44 | + description: Keep Test up to debug? |
| 45 | + type: boolean |
| 46 | + default: false |
| 47 | + |
| 48 | + |
| 49 | +jobs: |
| 50 | + set-up-vars: |
| 51 | + name: Set up variables |
| 52 | + environment: ${{ inputs.deployment_type }} |
| 53 | + runs-on: Ubuntu-latest |
| 54 | + |
| 55 | + steps: |
| 56 | + - name: Install Package |
| 57 | + uses: ConorMacBride/install-package@main |
| 58 | + with: |
| 59 | + apt: git unzip nodejs python3-pip python3-venv openssh-server openssh-client jq |
| 60 | + |
| 61 | + - name: Install sshpass |
| 62 | + run: sudo apt-get update && sudo apt-get install -y sshpass |
| 63 | + |
| 64 | + - name: Start the SSH service |
| 65 | + run: | |
| 66 | + sudo /etc/init.d/ssh start |
| 67 | +
|
| 68 | + # - name: Check if 'Deployment' Lab is already deployed |
| 69 | + # uses: softwareforgood/check-artifact-v4-existence@v0 |
| 70 | + # with: |
| 71 | + # name: ${{ inputs.deployment_type }}-terraform-artifacts |
| 72 | + # |
| 73 | + # or use a ping command to check if the bastion is up |
| 74 | + # |
| 75 | + |
| 76 | + - name: Checkout |
| 77 | + uses: actions/checkout@v4 |
| 78 | + with: |
| 79 | + path: repo-dir |
| 80 | + |
| 81 | + - name: Move contents to $GITHUB_WORKSPACE |
| 82 | + run: | |
| 83 | + mv repo-dir/* ~/ |
| 84 | +
|
| 85 | + - name: Generate clouds.yaml |
| 86 | + run: | |
| 87 | + cat << EOF > clouds.yaml |
| 88 | + ${{ secrets.CLOUDS_YAML }} |
| 89 | + EOF |
| 90 | +
|
| 91 | + - name: Generate terraform.tfvars |
| 92 | + run: | |
| 93 | + cat << EOF > terraform.tfvars |
| 94 | + lab_count = {{ inputs.lab_vm_count }} |
| 95 | + lab_net_ipv4 = "stackhpc-ipv4-aufn" |
| 96 | + image_id = "${{ env.LAB_IMAGE_ID }}" |
| 97 | + image_name = "${{ env.LAB_IMAGE_NAME }}" |
| 98 | + lab_flavor = "aufn.v1.large" |
| 99 | + registry_flavor = "general.v1.medium" |
| 100 | + boot_labs_from_volume = true |
| 101 | + image_user = "${{ env.LAB_IMAGE_USER }}" |
| 102 | + allocate_floating_ips = false |
| 103 | + create_bastion = true |
| 104 | + EOF |
| 105 | +
|
| 106 | + # Conditionally append bastion_floating_ip |
| 107 | + if [ "${{ inputs.deployment_type }}" = "Deployment" ]; then |
| 108 | + echo 'bastion_floating_ip = "185.45.78.149"' >> terraform.tfvars |
| 109 | + fi |
| 110 | + env: |
| 111 | + LAB_IMAGE_ID: ${{ inputs.os_image == 'Rocky9' && 'vars.LAB_OS_IMAGE_ROCKY' || os_image == 'Ubuntu' && 'vars.LAB_OS_IMAGE_UBUNTU' }} |
| 112 | + LAB_IMAGE_NAME: ${{ inputs.os_image == 'Ubuntu' && 'Ubuntu-22.04' || inputs.os_image }} |
| 113 | + LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }} |
| 114 | + |
| 115 | + - name: Terraform Plan |
| 116 | + run: terraform plan |
| 117 | + env: |
| 118 | + OS_CLOUD: ${{ vars.OS_CLOUD }} |
| 119 | + |
| 120 | + - name: Terraform Apply |
| 121 | + id: tf_apply |
| 122 | + run: | |
| 123 | + for attempt in $(seq 5); do |
| 124 | + if terraform apply -auto-approve; then |
| 125 | + echo "Created infrastructure on attempt $attempt" |
| 126 | + exit 0 |
| 127 | + fi |
| 128 | + echo "Failed to create infrastructure on attempt $attempt" |
| 129 | + sleep 10 |
| 130 | + |
| 131 | + # Need to add a check to see which part failed and then |
| 132 | + # taint and retry once more before declating failure |
| 133 | + |
| 134 | + terraform destroy -auto-approve |
| 135 | + sleep 60 |
| 136 | + done |
| 137 | + echo "Failed to create infrastructure after $attempt attempts" |
| 138 | + exit 1 |
| 139 | + env: |
| 140 | + OS_CLOUD: ${{ vars.OS_CLOUD }} |
| 141 | + |
| 142 | + - name: Get Terraform outputs |
| 143 | + id: tf_outputs |
| 144 | + run: | |
| 145 | + terraform output -json |
| 146 | +
|
| 147 | + - name: Write Terraform outputs |
| 148 | + run: | |
| 149 | + cat << EOF > tf-outputs.yml |
| 150 | + ${{ steps.tf_outputs.outputs.stdout }} |
| 151 | + EOF |
| 152 | +
|
| 153 | + - name: Write out Lab VMs info |
| 154 | + run: | |
| 155 | + terraform output -raw labs > ssh_list.txt |
| 156 | +
|
| 157 | + - name: Update bastion password authentication and set login password |
| 158 | + run: | |
| 159 | + echo "::add-mask::${{ inputs.reg_pwd }}" |
| 160 | +
|
| 161 | + ssh [email protected] -i default.pem <<EOF |
| 162 | + echo '${{ inputs.reg_pwd }}' | sudo passwd --stdin rocky |
| 163 | + sudo sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/' /etc/ssh/sshd_config.d/50-cloud-init.conf |
| 164 | + sudo systemctl restart sshd |
| 165 | + EOF |
| 166 | + shell: bash |
| 167 | + |
| 168 | + - name: Check connection to Lab VMs |
| 169 | + run: | |
| 170 | + bastion_ip=185.45.78.149 |
| 171 | + bastion_key="default.pem" |
| 172 | +
|
| 173 | + while IFS= read -r line; do |
| 174 | + ip=$(echo "$line" | awk '{print $2}') |
| 175 | + name=$(echo "$line" | awk '{print $3}') |
| 176 | + password=$(echo "$line" | awk '{print $5}') |
| 177 | + |
| 178 | + echo "::add-mask::$password" |
| 179 | + |
| 180 | + echo "Connecting to $name at $ip via bastion..." |
| 181 | +
|
| 182 | + sshpass -p "$password" ssh -o StrictHostKeyChecking=no \ |
| 183 | + -o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \ |
| 184 | + -o IdentityFile=$bastion_key \ |
| 185 | + "${LAB_IMAGE_USER}@${ip}" \ |
| 186 | + 'echo "Connected to $(hostname)"' |
| 187 | + done < ssh_list.txt |
| 188 | + shell: bash |
| 189 | + env: |
| 190 | + LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }} |
| 191 | + |
| 192 | + - name: Validate lab VMs setup |
| 193 | + run: | |
| 194 | + bastion_ip=185.45.78.149 |
| 195 | + bastion_key="default.pem" |
| 196 | + index=0 |
| 197 | + failed_indexes=() |
| 198 | +
|
| 199 | + while IFS= read -r line; do |
| 200 | + ip=$(echo "$line" | awk '{print $2}') |
| 201 | + name=$(echo "$line" | awk '{print $3}') |
| 202 | + password=$(echo "$line" | awk '{print $5}') |
| 203 | + taint="false" |
| 204 | + |
| 205 | + echo "::add-mask::$password" |
| 206 | + echo "Connecting to $name at $ip..." |
| 207 | +
|
| 208 | + # Run the compound remote commands |
| 209 | + sshpass -p "$password" ssh -o StrictHostKeyChecking=no \ |
| 210 | + -o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \ |
| 211 | + -o IdentityFile=$bastion_key \ |
| 212 | + "${LAB_IMAGE_USER}@${ip}" <<'EOF' |
| 213 | +
|
| 214 | + echo Checking 'virsh list --all'..." |
| 215 | + output=$(sudo virsh list --all) |
| 216 | + echo "$output" |
| 217 | +
|
| 218 | + if ! echo "$output" | grep -q 'seed.*running'; then echo "'seed' not running"; taint="true"; fi |
| 219 | + if ! echo "$output" | grep -q 'compute0.*shut off'; then echo "'compute0' not shut off"; taint="true"; fi |
| 220 | + if ! echo "$output" | grep -q 'controller0.*shut off'; then echo "'controller0' not shut off"; taint="true"; fi |
| 221 | +
|
| 222 | + echo "Checking 'bifrost_deploy' container..." |
| 223 | + container_output=$(ssh [email protected] 'sudo docker ps') |
| 224 | + echo "$container_output" |
| 225 | + if ! echo "$container_output" | grep -q bifrost_deploy; then echo "Container bifrost_deploy not found running"; taint="true"; fi |
| 226 | +
|
| 227 | + echo "Checking openssh package source..." |
| 228 | + pkg_output=$(ssh [email protected] 'sudo dnf info openssh') |
| 229 | + echo "$pkg_output" |
| 230 | + if ! echo "$pkg_output" | grep -q 'Repository *: *@System'; then echo "Package openssh not from @System"; taint="true"; fi |
| 231 | +
|
| 232 | + echo "Checking a-seed-from-nothing.out log result..." |
| 233 | + if ! tail -n 10 a-seed-from-nothing.out | grep -q 'PLAY RECAP.*failed=0'; then |
| 234 | + echo "Ansible PLAY RECAP failed != 0" |
| 235 | + taint="true" |
| 236 | + fi |
| 237 | +
|
| 238 | + echo "All checks passed on $HOSTNAME" |
| 239 | + EOF |
| 240 | + if [ "$taint" == "true" ]; then failed_indexes+=($index); fi |
| 241 | + index=$((index + 1)) |
| 242 | +
|
| 243 | + done < ssh_list.txt |
| 244 | + echo "FAILED_VM_INDEXES=${failed_indexes[*]}" >> $GITHUB_ENV |
| 245 | + shell: bash |
| 246 | + env: |
| 247 | + LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }} |
| 248 | + |
| 249 | + - name: Taint failed lab VMs (if any) |
| 250 | + run: | |
| 251 | + if [ -z "${FAILED_VM_INDEXES}" ]; then |
| 252 | + echo "No failed VMs detected" |
| 253 | + exit 0 |
| 254 | + fi |
| 255 | +
|
| 256 | + for idx in $FAILED_VM_INDEXES; do |
| 257 | + echo "Tainting openstack_compute_instance_v2.lab[$idx]" |
| 258 | + terraform taint "openstack_compute_instance_v2.lab[$idx]" |
| 259 | + done |
| 260 | +
|
| 261 | + echo "Re-running Terraform apply to fix failed VMs" |
| 262 | + terraform apply -auto-approve |
| 263 | + env: |
| 264 | + FAILED_VM_INDEXES: ${{ env.FAILED_VM_INDEXES }} |
| 265 | + shell: bash |
| 266 | + |
| 267 | + - name: Get Terraform outputs |
| 268 | + id: tf_outputs |
| 269 | + run: | |
| 270 | + terraform output -json |
| 271 | +
|
| 272 | + - name: Write Terraform outputs |
| 273 | + run: | |
| 274 | + cat << EOF > tf-outputs.yml |
| 275 | + ${{ steps.tf_outputs.outputs.stdout }} |
| 276 | + EOF |
| 277 | +
|
| 278 | + - name: Write out Lab VMs info |
| 279 | + run: | |
| 280 | + terraform output -raw labs > ssh_list.txt |
| 281 | +
|
| 282 | + - name: Re-test failed lab VMs after redeploy |
| 283 | + run: | |
| 284 | + set -euo pipefail |
| 285 | +
|
| 286 | + bastion_ip=185.45.78.149 |
| 287 | + bastion_key="default.pem" |
| 288 | + mapfile -t ssh_lines < ssh_list.txt |
| 289 | +
|
| 290 | + for idx in $FAILED_VM_INDEXES; do |
| 291 | + line="${ssh_lines[$idx]}" |
| 292 | + ip=$(echo "$line" | awk '{print $2}') |
| 293 | + name=$(echo "$line" | awk '{print $3}') |
| 294 | + password=$(echo "$line" | awk '{print $5}') |
| 295 | +
|
| 296 | + echo "::add-mask::$password" |
| 297 | + echo "Re-testing $name at $ip (index $idx)..." |
| 298 | +
|
| 299 | + sshpass -p "$password" ssh -o StrictHostKeyChecking=no \ |
| 300 | + -o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \ |
| 301 | + -o IdentityFile=$bastion_key \ |
| 302 | + "${LAB_IMAGE_USER}@${ip}" <<'EOF' || { |
| 303 | + echo "Post-deploy check failed on $name. Destroying all infrastructure..." |
| 304 | + terraform destroy -auto-approve |
| 305 | + exit 1 |
| 306 | + } |
| 307 | +
|
| 308 | + echo "Re-checking virsh VMs..." |
| 309 | + output=$(sudo virsh list --all) |
| 310 | + echo "$output" |
| 311 | + if ! echo "$output" | grep -q 'seed.*running'; then echo "'seed' not running"; exit 1; fi |
| 312 | + if ! echo "$output" | grep -q 'compute0.*shut off'; then echo "'compute0' not shut off"; exit 1; fi |
| 313 | + if ! echo "$output" | grep -q 'controller0.*shut off'; then echo "'controller0' not shut off"; exit 1; fi |
| 314 | +
|
| 315 | + echo "Checking bifrost container..." |
| 316 | + if ! ssh [email protected] 'sudo docker ps' | grep -q bifrost_deploy; then |
| 317 | + echo "bifrost_deploy container not running"; exit 1; |
| 318 | + fi |
| 319 | +
|
| 320 | + echo "Checking openssh package source..." |
| 321 | + if ! ssh [email protected] 'sudo dnf info openssh' | grep -q 'Repository *: *@System'; then |
| 322 | + echo "openssh not from @System"; exit 1; |
| 323 | + fi |
| 324 | +
|
| 325 | + echo "Checking a-seed-from-nothing.out for Ansible success..." |
| 326 | + if ! tail -n 20 a-seed-from-nothing.out | grep -q 'PLAY RECAP.*failed=0'; then |
| 327 | + echo "Ansible PLAY RECAP shows failures"; exit 1; |
| 328 | + fi |
| 329 | +
|
| 330 | + echo "All post-redeploy checks passed on $HOSTNAME" |
| 331 | + EOF |
| 332 | +
|
| 333 | + done |
| 334 | + shell: bash |
| 335 | + env: |
| 336 | + LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }} |
| 337 | + FAILED_VM_INDEXES: ${{ env.FAILED_VM_INDEXES }} |
| 338 | + |
| 339 | + - name: Run a-universe-from-seed.sh if true |
| 340 | + if: inputs.au_from_seed == true |
| 341 | + run: | |
| 342 | + bastion_ip=185.45.78.149 |
| 343 | + bastion_key="default.pem" |
| 344 | +
|
| 345 | + mapfile -t ssh_lines < ssh_list.txt |
| 346 | +
|
| 347 | + for i in "${!ssh_lines[@]}"; do |
| 348 | + line="${ssh_lines[$i]}" |
| 349 | + ip=$(echo "$line" | awk '{print $2}') |
| 350 | + name=$(echo "$line" | awk '{print $3}') |
| 351 | + password=$(echo "$line" | awk '{print $5}') |
| 352 | +
|
| 353 | + echo "::add-mask::$password" |
| 354 | + echo "Launching a-universe-from-seed.sh on $name at $ip in tmux..." |
| 355 | +
|
| 356 | + sshpass -p "$password" ssh -o StrictHostKeyChecking=no \ |
| 357 | + -o ProxyJump="${LAB_IMAGE_USER}@${bastion_ip}" \ |
| 358 | + -o IdentityFile=$bastion_key \ |
| 359 | + "${LAB_IMAGE_USER}@${ip}" \ |
| 360 | + "tmux new-session -d -s aus-run './a-universe-from-seed.sh'" |
| 361 | + done |
| 362 | + shell: bash |
| 363 | + env: |
| 364 | + LAB_IMAGE_USER: ${{ inputs.os_image == 'Ubuntu' && 'ubuntu' || inputs.os_image == 'Rocky9' && 'rocky' }} |
| 365 | + |
| 366 | + # - name: Run test workflow |
| 367 | + # if: inputs.deployment_type == 'Test' |
| 368 | + # uses: ./.github/workflows/AUFN-test.yml |
| 369 | + |
| 370 | + # - name: Upload Terraform outputs |
| 371 | + # if: ${{ inputs.deployment_type == 'Deployment' || inputs.debug_mode == true }} |
| 372 | + # uses: actions/upload-artifact@v4 |
| 373 | + # with: |
| 374 | + # name: ${{ inputs.deployment_type }}-terraform-artifacts |
| 375 | + |
| 376 | + |
0 commit comments