diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 9bf05f62a..5425eb4e3 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -1,16 +1,16 @@ - name: Build fat image on: workflow_dispatch: - inputs: - ci_cloud: - description: 'Select the CI_CLOUD' - required: true - type: choice - options: - - LEAFCLOUD - - SMS - - ARCUS + inputs: + ci_cloud: + description: 'Select the CI_CLOUD' + required: true + type: choice + options: + - LEAFCLOUD + - SMS + - ARCUS + jobs: openstack: name: openstack-imagebuild @@ -25,7 +25,7 @@ jobs: - RL8 - RL9 build: - - openstack.openhpc-ofed + - openstack.openhpc - openstack.openhpc-cuda exclude: - os_version: RL8 @@ -34,6 +34,18 @@ jobs: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack CI_CLOUD: ${{ github.event.inputs.ci_cloud }} + SOURCE_IMAGES_MAP: | + { + "RL8": { + "openstack.openhpc": "rocky-latest-RL8", + "openstack.openhpc-cuda": "rocky-latest-cuda-RL8" + }, + "RL9": { + "openstack.openhpc": "rocky-latest-RL9", + "openstack.openhpc-cuda": "rocky-latest-cuda-RL9" + } + } + steps: - uses: actions/checkout@v2 @@ -52,10 +64,10 @@ jobs: - name: Add bastion's ssh key to known_hosts run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts shell: bash - + - name: Install ansible etc run: dev/setup-env.sh - + - name: Write clouds.yaml run: | mkdir -p ~/.config/openstack/ @@ -66,17 +78,25 @@ jobs: run: | . venv/bin/activate . environments/.stackhpc/activate - + - name: Build fat image with packer id: packer_build run: | + set -x . venv/bin/activate . environments/.stackhpc/activate cd packer/ packer init . - PACKER_LOG=1 packer build -on-error=${{ vars.PACKER_ON_ERROR }} -only=${{ matrix.build }} -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl openstack.pkr.hcl + + PACKER_LOG=1 packer build \ + -on-error=${{ vars.PACKER_ON_ERROR }} \ + -only=${{ matrix.build }} \ + -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ + -var "source_image_name=${{ env.SOURCE_IMAGE }}" \ + openstack.pkr.hcl env: PKR_VAR_os_version: ${{ matrix.os_version }} + SOURCE_IMAGE: ${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version][matrix.build] }} - name: Get created image names from manifest id: manifest @@ -87,53 +107,14 @@ jobs: sleep 5 done IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID) - echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" - echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" - - - name: Download image - run: | - . venv/bin/activate - sudo mkdir /mnt/images - sudo chmod 777 /mnt/images - openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: install libguestfs - run: | - sudo apt -y update - sudo apt -y install libguestfs-tools - - - name: mkdir for mount - run: sudo mkdir -p './${{ steps.manifest.outputs.image-name }}' - - - name: mount qcow2 file - run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}' - - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.17.0 - with: - scan-type: fs - scan-ref: "${{ steps.manifest.outputs.image-name }}" - scanners: "vuln" - format: sarif - output: "${{ steps.manifest.outputs.image-name }}.sarif" - # turn off secret scanning to speed things up - - - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: "${{ steps.manifest.outputs.image-name }}.sarif" - category: "${{ matrix.os_version }}-${{ matrix.build }}" + echo $IMAGE_ID > image-id.txt + echo $IMAGE_NAME > image-name.txt - - name: Fail if scan has CRITICAL vulnerabilities - uses: aquasecurity/trivy-action@0.16.1 + - name: Upload manifest artifact + uses: actions/upload-artifact@v4 with: - scan-type: fs - scan-ref: "${{ steps.manifest.outputs.image-name }}" - scanners: "vuln" - format: table - exit-code: '1' - severity: 'CRITICAL' - ignore-unfixed: true + name: image-details-${{ matrix.build }}-${{ matrix.os_version }} + path: | + ./image-id.txt + ./image-name.txt + overwrite: true \ No newline at end of file diff --git a/.github/workflows/nightlybuild.yml b/.github/workflows/nightlybuild.yml new file mode 100644 index 000000000..4df3f9955 --- /dev/null +++ b/.github/workflows/nightlybuild.yml @@ -0,0 +1,265 @@ +name: Build nightly image +on: + workflow_dispatch: + inputs: + ci_cloud: + description: 'Select the CI_CLOUD' + required: true + type: choice + options: + - LEAFCLOUD + - SMS + - ARCUS + schedule: + - cron: '0 0 * * *' # Run at midnight + +jobs: + openstack: + name: openstack-imagebuild + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build + cancel-in-progress: true + runs-on: ubuntu-22.04 + strategy: + fail-fast: false # allow other matrix jobs to continue even if one fails + matrix: # build RL8, RL9, RL9+CUDA versions + os_version: + - RL8 + - RL9 + build: + - openstack.rocky-latest + - openstack.rocky-latest-cuda + exclude: + - os_version: RL8 + build: openstack.rocky-latest-cuda + + env: + ANSIBLE_FORCE_COLOR: True + OS_CLOUD: openstack + CI_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }} + SOURCE_IMAGES_MAP: | + { + "RL8": "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2", + "RL9": "Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2" + } + + steps: + - uses: actions/checkout@v2 + + - name: Record settings for CI cloud + run: | + echo CI_CLOUD: ${{ env.CI_CLOUD }} + + - name: Setup ssh + run: | + set -x + mkdir ~/.ssh + echo "${{ secrets[format('{0}_SSH_KEY', env.CI_CLOUD)] }}" > ~/.ssh/id_rsa + chmod 0600 ~/.ssh/id_rsa + shell: bash + + - name: Add bastion's ssh key to known_hosts + run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts + shell: bash + + - name: Install ansible etc + run: dev/setup-env.sh + + - name: Write clouds.yaml + run: | + mkdir -p ~/.config/openstack/ + echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml + shell: bash + + - name: Setup environment + run: | + . venv/bin/activate + . environments/.stackhpc/activate + + - name: Build fat image with packer + id: packer_build + run: | + set -x + . venv/bin/activate + . environments/.stackhpc/activate + cd packer/ + packer init . + + PACKER_LOG=1 packer build \ + -on-error=${{ vars.PACKER_ON_ERROR }} \ + -only=${{ matrix.build }} \ + -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ + -var "source_image_name=${{ env.SOURCE_IMAGE }}" + openstack.pkr.hcl + + env: + PKR_VAR_os_version: ${{ matrix.os_version }} + SOURCE_IMAGE: ${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version] }} + + - name: Get created image names from manifest + id: manifest + run: | + . venv/bin/activate + IMAGE_ID=$(jq --raw-output '.builds[-1].artifact_id' packer/packer-manifest.json) + while ! openstack image show -f value -c name $IMAGE_ID; do + sleep 5 + done + IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID) + echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" + echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" + + - name: Download image + run: | + . venv/bin/activate + sudo mkdir /mnt/images + sudo chmod 777 /mnt/images + openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-id }}" + openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-id }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: install libguestfs + run: | + sudo apt -y update + sudo apt -y install libguestfs-tools + + - name: mkdir for mount + run: sudo mkdir -p './${{ steps.manifest.outputs.image-name }}' + + - name: mount qcow2 file + run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}' + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@0.17.0 + with: + scan-type: fs + scan-ref: "${{ steps.manifest.outputs.image-name }}" + scanners: "vuln" + format: sarif + output: "${{ steps.manifest.outputs.image-name }}.sarif" + # turn off secret scanning to speed things up + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: "${{ steps.manifest.outputs.image-name }}.sarif" + category: "${{ matrix.os_version }}-${{ matrix.build }}" + + - name: Fail if scan has CRITICAL vulnerabilities + uses: aquasecurity/trivy-action@0.16.1 + with: + scan-type: fs + scan-ref: "${{ steps.manifest.outputs.image-name }}" + scanners: "vuln" + format: table + exit-code: '1' + severity: 'CRITICAL' + ignore-unfixed: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Delete new image if Trivy scan fails + if: failure() && steps.packer_build.outcome == 'success' # Runs if the Trivy scan found crit vulnerabilities or failed + run: | + . venv/bin/activate + echo "Deleting new image due to critical vulnerabilities or scan failure ..." + openstack image delete "${{ steps.manifest.outputs.image-id }}" + + - name: Delete old latest image + if: success() # Runs only if Trivy scan passed + run: | + . venv/bin/activate + IMAGE_COUNT=$(openstack image list --name ${{ steps.manifest.outputs.image-name }} -f value -c ID | wc -l) + if [ "$IMAGE_COUNT" -gt 1 ]; then + OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ steps.manifest.outputs.image-name }}" -f value -c ID | head -n 1) + echo "Deleting old image ID: $OLD_IMAGE_ID" + openstack image delete "$OLD_IMAGE_ID" + else + echo "Only one image exists, skipping deletion." + fi + + upload: + name: upload-nightly-targets + needs: openstack + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.image }}-${{ matrix.target_cloud }} + cancel-in-progress: true + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + target_cloud: + - LEAFCLOUD + - SMS + - ARCUS + os_version: + - RL8 + - RL9 + image: + - rocky-latest + - rocky-latest-cuda + exclude: + - os_version: RL8 + image: rocky-latest-cuda + - target_cloud: LEAFCLOUD + env: + OS_CLOUD: openstack + SOURCE_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }} + TARGET_CLOUD: ${{ matrix.target_cloud }} + IMAGE_NAME: "${{ matrix.image }}-${{ matrix.os_version }}" + steps: + - uses: actions/checkout@v2 + + - name: Record settings for CI cloud + run: | + echo SOURCE_CLOUD: ${{ env.SOURCE_CLOUD }} + echo TARGET_CLOUD: ${{ env.TARGET_CLOUD }} + + - name: Install openstackclient + run: | + python3 -m venv venv + . venv/bin/activate + pip install -U pip + pip install $(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt) + shell: bash + + - name: Write clouds.yaml + run: | + mkdir -p ~/.config/openstack/ + echo "${{ secrets[format('{0}_CLOUDS_YAML', env.SOURCE_CLOUD)] }}" > ~/.config/openstack/source_clouds.yaml + echo "${{ secrets[format('{0}_CLOUDS_YAML', env.TARGET_CLOUD)] }}" > ~/.config/openstack/target_clouds.yaml + shell: bash + + - name: Download source image + run: | + . venv/bin/activate + export OS_CLIENT_CONFIG_FILE=~/.config/openstack/source_clouds.yaml + openstack image save --file ${{ env.IMAGE_NAME }} ${{ env.IMAGE_NAME }} + shell: bash + + - name: Upload to target cloud + run: | + . venv/bin/activate + export OS_CLIENT_CONFIG_FILE=~/.config/openstack/target_clouds.yaml + + openstack image create "${{ env.IMAGE_NAME }}" \ + --file "${{ env.IMAGE_NAME }}" \ + --disk-format qcow2 \ + shell: bash + + - name: Delete old latest image from target cloud + run: | + . venv/bin/activate + export OS_CLIENT_CONFIG_FILE=~/.config/openstack/target_clouds.yaml + + IMAGE_COUNT=$(openstack image list --name ${{ env.IMAGE_NAME }} -f value -c ID | wc -l) + if [ "$IMAGE_COUNT" -gt 1 ]; then + OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ env.IMAGE_NAME }}" -f value -c ID | head -n 1) + openstack image delete "$OLD_IMAGE_ID" + else + echo "Only one image exists, skipping deletion." + fi + shell: bash diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml new file mode 100644 index 000000000..2957b22ee --- /dev/null +++ b/.github/workflows/trivyscan.yml @@ -0,0 +1,116 @@ +name: Trivy scan image for vulnerabilities +on: + workflow_dispatch: + pull_request: + branches: + - main + paths: + - 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json' + +jobs: + scan: + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build }} # to branch/PR + OS + build + cancel-in-progress: true + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + build: ["RL8", "RL9", "RL9-cuda"] + env: + JSON_PATH: environments/.stackhpc/terraform/cluster_image.auto.tfvars.json + OS_CLOUD: openstack + CI_CLOUD: ${{ vars.CI_CLOUD }} + + steps: + - uses: actions/checkout@v2 + + - name: Record settings for CI cloud + run: | + echo CI_CLOUD: ${{ env.CI_CLOUD }} + + - name: Setup ssh + run: | + set -x + mkdir ~/.ssh + echo "${{ secrets[format('{0}_SSH_KEY', env.CI_CLOUD)] }}" > ~/.ssh/id_rsa + chmod 0600 ~/.ssh/id_rsa + shell: bash + + - name: Add bastion's ssh key to known_hosts + run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts + shell: bash + + - name: setup environment + run: | + python3 -m venv venv + . venv/bin/activate + pip install -U pip + pip install $(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt) + shell: bash + + - name: Write clouds.yaml + run: | + mkdir -p ~/.config/openstack/ + echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml + shell: bash + + - name: Parse image name json + id: manifest + run: | + IMAGE_NAME=$(jq --arg version "${{ matrix.build }}" -r '.cluster_image[$version]' "${{ env.JSON_PATH }}") + echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" + + - name: Download image + run: | + . venv/bin/activate + sudo mkdir /mnt/images + sudo chmod 777 /mnt/images + openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: install libguestfs + run: | + sudo apt -y update + sudo apt -y install libguestfs-tools + + - name: mkdir for mount + run: sudo mkdir -p './${{ steps.manifest.outputs.image-name }}' + + - name: mount qcow2 file + run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}' + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@0.24.0 + with: + scan-type: fs + scan-ref: "${{ steps.manifest.outputs.image-name }}" + scanners: "vuln" + format: sarif + output: "${{ steps.manifest.outputs.image-name }}.sarif" + # turn off secret scanning to speed things up + timeout: 15m + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: "${{ steps.manifest.outputs.image-name }}.sarif" + category: "${{ matrix.os_version }}-${{ matrix.build }}" + + - name: Fail if scan has CRITICAL vulnerabilities + uses: aquasecurity/trivy-action@0.24.0 + with: + scan-type: fs + scan-ref: "${{ steps.manifest.outputs.image-name }}" + scanners: "vuln" + format: table + exit-code: '1' + severity: 'CRITICAL' + ignore-unfixed: true + timeout: 15m + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/ansible/bootstrap.yml b/ansible/bootstrap.yml index c43d614db..18d159996 100644 --- a/ansible/bootstrap.yml +++ b/ansible/bootstrap.yml @@ -148,9 +148,9 @@ tags: cockpit tasks: - name: Remove RHEL cockpit - dnf: - name: cockpit-ws - state: "{{ appliances_cockpit_state }}" + command: dnf -y remove cockpit-ws # N.B. using ansible dnf module is very slow + register: dnf_remove_output + ignore_errors: true # Avoid failing if a lock or other error happens - hosts: firewalld gather_facts: false diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml index 81c4a2043..e623c2794 100644 --- a/ansible/fatimage.yml +++ b/ansible/fatimage.yml @@ -56,10 +56,12 @@ include_role: name: mysql tasks_from: install.yml + when: "'mysql' in group_names" - name: OpenHPC import_role: name: stackhpc.openhpc tasks_from: install.yml + when: "'openhpc' in group_names" # - import_playbook: portal.yml - name: Open Ondemand server (packages) @@ -67,6 +69,7 @@ name: osc.ood tasks_from: install-package.yml vars_from: "Rocky/{{ ansible_distribution_major_version }}.yml" + when: "'openondemand' in group_names" # # FUTURE: install-apps.yml - this is git clones - name: Open Ondemand server (apps) @@ -74,34 +77,40 @@ name: osc.ood tasks_from: install-apps.yml vars_from: "Rocky/{{ ansible_distribution_major_version }}.yml" + when: "'openondemand' in group_names" - name: Open Ondemand remote desktop import_role: name: openondemand tasks_from: vnc_compute.yml + when: "'openondemand_desktop' in group_names" - name: Open Ondemand jupyter node import_role: name: openondemand tasks_from: jupyter_compute.yml + when: "'openondemand' in group_names" # - import_playbook: monitoring.yml: - import_role: name: opensearch tasks_from: install.yml - become: true + when: "'opensearch' in group_names" # slurm_stats - nothing to do - import_role: name: filebeat tasks_from: install.yml + when: "'filebeat' in group_names" - import_role: # can't only run cloudalchemy.node_exporter/tasks/install.yml as needs vars from preflight.yml and triggers service start # however starting node exporter is ok name: cloudalchemy.node_exporter + when: "'node_exporter' in group_names" - name: openondemand exporter dnf: - name: ondemand_exporter + name: ondemand_exporter + when: "'openondemand' in group_names" - name: slurm exporter import_role: @@ -109,7 +118,12 @@ tasks_from: install vars: slurm_exporter_state: stopped + when: "'slurm_exporter' in group_names" +- hosts: prometheus + become: yes + gather_facts: yes + tasks: - import_role: name: cloudalchemy.prometheus tasks_from: preflight.yml @@ -162,6 +176,10 @@ - prometheus - promtool +- hosts: grafana + become: yes + gather_facts: yes + tasks: - name: Include distribution variables for cloudalchemy.grafana include_vars: "{{ appliances_repository_root }}/ansible/roles/cloudalchemy.grafana/vars/redhat.yml" - import_role: diff --git a/dev/extract_logs.py b/dev/extract_logs.py index 91923f1a0..65df0140e 100644 --- a/dev/extract_logs.py +++ b/dev/extract_logs.py @@ -76,7 +76,7 @@ def extract_log_info_and_generate_csv(log_file_path, output_csv_path, target_dir print("Path to workflow log plain text file should be provided as the only arg to this script") sys.exit(1) log_file_path = sys.argv[1] # Input workflow log name -output_csv_path = log_file_path.replace('.txt.', '.csv') # Output CSV name +output_csv_path = log_file_path.replace('.txt', '.csv') # Output CSV name target_directory = '/ansible/' # Shared directory for task path extract_log_info_and_generate_csv(log_file_path, output_csv_path, target_directory) diff --git a/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json new file mode 100644 index 000000000..f62c8886e --- /dev/null +++ b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json @@ -0,0 +1,7 @@ +{ + "cluster_image": { + "RL8": "openhpc-RL8-241009-1523-354b048a", + "RL9": "openhpc-RL9-241009-1523-354b048a", + "RL9-cuda": "openhpc-cuda-RL9-241009-1523-354b048a" + } +} \ No newline at end of file diff --git a/environments/.stackhpc/terraform/main.tf b/environments/.stackhpc/terraform/main.tf index c192eb222..ad13ae45d 100644 --- a/environments/.stackhpc/terraform/main.tf +++ b/environments/.stackhpc/terraform/main.tf @@ -28,12 +28,6 @@ variable "os_version" { variable "cluster_image" { description = "single image for all cluster nodes, keyed by os_version - a convenience for CI" type = map(string) - default = { - # https://github.com/stackhpc/ansible-slurm-appliance/pull/444 - RL8: "openhpc-ofed-RL8-241002-1612-1ce702b1" - RL9: "openhpc-ofed-RL9-241003-1052-1ce702b1" - RL9-cuda: "openhpc-cuda-RL9-241002-1612-1ce702b1" - } } variable "cluster_net" {} diff --git a/environments/common/layouts/everything b/environments/common/layouts/everything index 85af46c06..205f1d334 100644 --- a/environments/common/layouts/everything +++ b/environments/common/layouts/everything @@ -28,7 +28,6 @@ slurm_stats # NB: [rebuild] not defined here as this template is used in CI [update:children] -cluster [fail2ban:children] # Hosts to install fail2ban on to protect SSH diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index 5f66c0320..ae5744ff3 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -47,21 +47,14 @@ variable "os_version" { # Must supply either source_image_name or source_image_id variable "source_image_name" { - type = map(string) - description = "name of source image, keyed from var.os_version" - default = { - RL8: "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2" - RL9: "Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2" - } + type = string + description = "name of source image" } variable "source_image" { - type = map(string) - default = { - RL8: null - RL9: null - } - description = "UUID of source image, keyed from var.os_version" + type = string + default = null + description = "UUID of source image" } variable "flavor" { @@ -132,8 +125,9 @@ variable "volume_size" { type = map(number) default = { # fat image builds, GB: + rocky-latest = 15 + rocky-latest-cuda = 30 openhpc = 15 - openhpc-ofed = 15 openhpc-cuda = 30 } } @@ -153,9 +147,10 @@ variable "groups" { description = "Additional inventory groups (other than 'builder') to add build VM to, keyed by source name" default = { # fat image builds: + rocky-latest = ["update", "ofed"] + rocky-latest-cuda = ["update", "ofed", "cuda"] openhpc = ["control", "compute", "login"] - openhpc-ofed = ["control", "compute", "login", "ofed"] - openhpc-cuda = ["control", "compute", "login", "ofed", "cuda"] + openhpc-cuda = ["control", "compute", "login"] } } @@ -171,8 +166,8 @@ source "openstack" "openhpc" { security_groups = var.security_groups # Input image: - source_image = "${var.source_image[var.os_version]}" - source_image_name = "${var.source_image_name[var.os_version]}" # NB: must already exist in OpenStack + source_image = "${var.source_image}" + source_image_name = "${var.source_image_name}" # NB: must already exist in OpenStack # SSH: ssh_username = var.ssh_username @@ -186,29 +181,39 @@ source "openstack" "openhpc" { # Output image: image_disk_format = "qcow2" image_visibility = var.image_visibility - image_name = "${source.name}-${var.os_version}-${local.timestamp}-${substr(local.git_commit, 0, 8)}" + } build { - # non-OFED fat image: + # latest nightly image: source "source.openstack.openhpc" { - name = "openhpc" + name = "rocky-latest" + image_name = "${source.name}-${var.os_version}" + } + + # latest nightly cuda image: + source "source.openstack.openhpc" { + name = "rocky-latest-cuda" + image_name = "${source.name}-${var.os_version}" } # OFED fat image: source "source.openstack.openhpc" { - name = "openhpc-ofed" + name = "openhpc" + image_name = "${source.name}-${var.os_version}-${local.timestamp}-${substr(local.git_commit, 0, 8)}" } # CUDA fat image: source "source.openstack.openhpc" { name = "openhpc-cuda" + image_name = "${source.name}-${var.os_version}-${local.timestamp}-${substr(local.git_commit, 0, 8)}" } # Extended site-specific image, built on fat image: source "source.openstack.openhpc" { name = "openhpc-extra" + image_name = "${source.name}-${var.os_version}-${local.timestamp}-${substr(local.git_commit, 0, 8)}" } provisioner "ansible" {