Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .github/extra_vars/arcus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Cluster instance vars
cluster_id: "{{ cluster_name }}"
openhpc_slurm_partitions:
- name: "small"
count: 2
flavor_name: "vm.ska.cpu.general.small"
default: "YES"
cluster_run_validation: true
cluster_user_ssh_public_key: ""
home_volume_size: 20
cluster_use_root_volumes: true

# Cloud vars
cluster_external_network: "CUDN-Internet"
login_flavor_name: "vm.ska.cpu.general.small"
control_flavor_name: "vm.ska.cpu.general.small"
metrics_db_maximum_size: 5

# Image build
image_build_manage_infra: false
image_build_use_blockstorage_volume: true
image_build_image_disk_format: "raw"
image_build_volume_size: 10
image_build_metadata:
hw_vif_multiqueue_enabled: "yes"
hw_scsi_model: "virtio-scsi"
hw_disk_bus: "scsi"
hw_qemu_guest_agent: "yes"
os_require_quiesce: "yes"
image_build_flavor_name: "vm.ska.cpu.general.small"
image_build_network_id: "4b6b2722-ee5b-40ec-8e52-a6610e14cc51"
image_build_attach_floating_ip: true
image_build_floating_ip_network: "CUDN-Internet"
image_build_source_image_id: "2a77064b-be40-4065-b0f4-4d5417a4460a"
image_build_security_group_id: "486dfc85-099b-4bbb-9375-60f320a7de18"
131 changes: 131 additions & 0 deletions .github/workflows/build-image-deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@

name: Build, deploy and promote a new OHPC image
on:
workflow_dispatch:
inputs:
promote_community:
description: 'Set the community property on a successfully tested image'
required: true
default: false
type: boolean
pull_request:
push:
branches:
- main
tags:
- '*'
jobs:
build-deploy-promote:
name: Build, deploy and promote a new OHPC image
if: github.repository == 'stackhpc/caas-slurm-appliance'
concurrency: ${{ github.ref }}
runs-on: ubuntu-20.04
env:
PACKER_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ANSIBLE_FORCE_COLOR: True
OS_CLOUD: openstack
OS_CLIENT_CONFIG_FILE: ${{ github.workspace }}/clouds.yaml
EXTRA_VARS_FILE: .github/extra_vars/arcus.yml
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: true

- name: Install ansible etc
run: dev/setup-env.sh

- name: Write clouds.yaml
run: |
echo "${CLOUDS_YAML}" > ${OS_CLIENT_CONFIG_FILE}
shell: bash
env:
CLOUDS_YAML: ${{ secrets.CLOUDS_YAML }}

- name: Build OHPC image
id: build
run: |
source venv/bin/activate
ansible-playbook \
-i image-build/hosts \
-e @${EXTRA_VARS_FILE} \
-e '{"write_cluster_image_uuid_file": true}' \
-e image_build_cluster_name="image-build-${GITHUB_SHA::7}" \
image-build.yml
echo "CLUSTER_IMAGE=$(cat cluster_image_uuid.txt)" >> $GITHUB_OUTPUT
env:
PACKER_LOG_PATH: ${{ github.workspace }}/packer-build.log

- name: Remove image build infra
run: |
source venv/bin/activate
ansible-playbook \
-i image-build/hosts \
-e @${EXTRA_VARS_FILE} \
-e cluster_state=absent \
-e image_build_cluster_name="image-build-${GITHUB_SHA::7}" \
image-build.yml
if: always()

- name: Deploy a cluster based on the new OPHC image
id: deploy
run: |
source venv/bin/activate
ansible-playbook \
-i image-build/hosts \
-e @${EXTRA_VARS_FILE} \
-e cluster_image=${{ steps.build.outputs.CLUSTER_IMAGE }} \
-e cluster_name="caas-ci-${GITHUB_SHA::7}" \
slurm-infra.yml
env:
SLURM_INFRA_HIDE_DEBUG_OUTPUT: True
if: success()

- name: Remove cluster based on the new OHPC image
run: |
source venv/bin/activate
ansible-playbook \
-i image-build/hosts \
-e @${EXTRA_VARS_FILE} \
-e cluster_image=${{ steps.build.outputs.CLUSTER_IMAGE }} \
-e cluster_state=absent \
-e cluster_name="caas-ci-${GITHUB_SHA::7}" \
slurm-infra.yml
if: |
( success() || failure() || cancelled() ) &&
steps.build.outcome == 'success'

- name: Delete built image after testing
run: |
source venv/bin/activate
ansible-playbook \
-i image-build/hosts \
-e cluster_image=${{ steps.build.outputs.CLUSTER_IMAGE }} \
-e '{"cluster_image_delete": true}' \
image-build/image-delete-or-promote.yml
if: |
( success() || failure() || cancelled() ) &&
steps.build.outcome == 'success' &&
github.event_name == 'pull_request'

- name: Promote built image from Private to Community after testing
run: |
source venv/bin/activate
ansible-playbook \
-i image-build/hosts \
-e cluster_image=${{ steps.build.outputs.CLUSTER_IMAGE }} \
-e '{"cluster_image_promote_community": true}' \
image-build/image-delete-or-promote.yml
if: |
success() &&
steps.build.outcome == 'success' &&
steps.deploy.outcome == 'success' &&
(( github.event_name == 'workflow_dispatch' && inputs.promote_community == true )
|| github.event_name == 'push' )

- name: Upload packer build log artifact
uses: actions/upload-artifact@v3
with:
name: packer-build-log
path: ${{ github.workspace }}/packer-build.log
if: failure() || success() || cancelled()
11 changes: 11 additions & 0 deletions dev/setup-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
set -euo pipefail

/usr/bin/python3.8 -m venv venv
source venv/bin/activate
pip install -U pip
pip install -r requirements.txt
ansible --version
# Install ansible dependencies ...
ansible-galaxy role install -r requirements.yml --force
ansible-galaxy collection install -r requirements.yml --force
8 changes: 8 additions & 0 deletions image-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,12 @@
- name: Print cluster_image UUID
debug:
msg: "{{ cluster_image }}"

- name: Write cluster_image UUID to file
copy:
dest: "{{ playbook_dir }}/cluster_image_uuid.txt"
content: "{{ cluster_image }}"
when:
- write_cluster_image_uuid_file is defined
- write_cluster_image_uuid_file
when: cluster_state is not defined or (cluster_state is defined and cluster_state != "absent")
23 changes: 23 additions & 0 deletions image-build/image-delete-or-promote.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
- hosts: openstack
tasks:
- block:
# Use command module because openstack.image doesn't
# support setting the community property
- name: Set image community property
command:
cmd: >-
openstack image set --community {{ cluster_image }}
changed_when: true
when:
- cluster_image_promote_community is defined
- cluster_image_promote_community

- name: Delete image
openstack.cloud.image:
name: "{{ cluster_image }}"
state: absent
when:
- cluster_image_delete is defined
- cluster_image_delete

when: cluster_image is defined
9 changes: 9 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
ansible==6.0.0
openstacksdk<0.99.0
python-openstackclient
jmespath
passlib[bcrypt]==1.7.4
cookiecutter
selinux # this is a shim to avoid having to use --system-site-packages, you still need sudo yum install libselinux-python3
netaddr
matplotlib
3 changes: 3 additions & 0 deletions roles/image_build/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ image_build_metadata: {}
# The directory that contains the openstack.pkr.hcl to build the Slurm image
image_build_packer_root_path: "{{ playbook_dir }}/vendor/stackhpc/ansible-slurm-appliance/packer"

# Extra args to pass to the packer build command
image_build_packer_extra_args: ""

# The appliances_environment_root directory. This may contain a hooks directory
# optionally containing pre.yml, post-bootstrap.yml and post.yml playbooks, to
# run during the image-build process
Expand Down
2 changes: 1 addition & 1 deletion roles/image_build/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
- name: Build image with packer
command:
cmd: |
packer build -only openstack.openhpc -var-file={{ pkrvars_hcl_file.path }} openstack.pkr.hcl
packer build {{ image_build_packer_extra_args }} -only openstack.openhpc -var-file={{ pkrvars_hcl_file.path }} openstack.pkr.hcl
chdir: "{{ image_build_packer_root_path }}"
environment:
APPLIANCES_ENVIRONMENT_ROOT: "{{ image_build_appliances_environment_root }}"
Expand Down
2 changes: 2 additions & 0 deletions roles/image_build_infra/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

image_build_terraform_project_path: "{{ playbook_dir }}/terraform-caas-image-build"
image_build_cluster_name: "caas-image-build"
cluster_name: "{{ image_build_cluster_name }}"
cluster_id: "{{ image_build_cluster_name }}"

# Regex to capture existing cloud image names to use as the
# OpenHPC Slurm base-image
Expand Down
4 changes: 4 additions & 0 deletions slurm-infra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,12 @@

# Write the outputs as the final task
- hosts: localhost
vars:
hide_debug_outputs: "{{ lookup('ansible.builtin.env', 'SLURM_INFRA_HIDE_DEBUG_OUTPUT', default=false) | bool }}"
tasks:
- debug: var=outputs
when:
- not hide_debug_outputs
vars:
# Ansible has a fit when there are two 'hostvars' evaluations in a resolution chain,
# so we have to repeat logic here unfortunately
Expand Down