Skip to content

Commit abf45be

Browse files
authored
Merge branch 'main' into feat/compute-vols
2 parents 79f519c + 6f1554c commit abf45be

File tree

12 files changed

+110
-36
lines changed

12 files changed

+110
-36
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
name: Cleanup CI clusters
2+
on:
3+
workflow_dispatch:
4+
inputs:
5+
ci_cloud:
6+
description: 'Select the CI_CLOUD'
7+
required: true
8+
type: choice
9+
options:
10+
- LEAFCLOUD
11+
- SMS
12+
- ARCUS
13+
schedule:
14+
- cron: '0 20 * * *' # Run at 8PM - image sync runs at midnight
15+
16+
jobs:
17+
ci_cleanup:
18+
name: ci-cleanup
19+
concurrency: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.cloud }}
20+
strategy:
21+
fail-fast: false
22+
matrix:
23+
cloud:
24+
- LEAFCLOUD
25+
- SMS
26+
- ARCUS
27+
runs-on: ubuntu-22.04
28+
env:
29+
OS_CLOUD: openstack
30+
CI_CLOUD: ${{ matrix.cloud }}
31+
steps:
32+
- uses: actions/checkout@v2
33+
34+
- name: Record which cloud CI is running on
35+
run: |
36+
echo CI_CLOUD: ${{ env.CI_CLOUD }}
37+
38+
- name: Setup environment
39+
run: |
40+
python3 -m venv venv
41+
. venv/bin/activate
42+
pip install -U pip
43+
pip install $(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt)
44+
shell: bash
45+
46+
- name: Write clouds.yaml
47+
run: |
48+
mkdir -p ~/.config/openstack/
49+
echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml
50+
shell: bash
51+
52+
- name: Find CI clusters
53+
run: |
54+
. venv/bin/activate
55+
CI_CLUSTERS=$(openstack server list | grep --only-matching 'slurmci-RL.-[0-9]\+' | sort | uniq)
56+
echo "ci_clusters=${CI_CLUSTERS}" >> GITHUB_ENV
57+
shell: bash
58+
59+
- name: Delete clusters if control node not tagged with keep
60+
run: |
61+
. venv/bin/activate
62+
for cluster_prefix in ${CI_CLUSTERS}
63+
do
64+
TAGS=$(openstack server show ${cluster_prefix}-control --column tags --format value)
65+
if [[ $TAGS =~ "keep" ]]; then
66+
echo "Skipping ${cluster_prefix} - control instance is tagged as keep"
67+
else
68+
yes | ./dev/delete-cluster.py ${cluster_prefix}
69+
fi
70+
done
71+
shell: bash

.github/workflows/nightlybuild.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ jobs:
8989
-on-error=${{ vars.PACKER_ON_ERROR }} \
9090
-only=${{ matrix.build }} \
9191
-var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \
92-
-var "source_image_name=${{ env.SOURCE_IMAGE }}"
92+
-var "source_image_name=${{ env.SOURCE_IMAGE }}" \
9393
openstack.pkr.hcl
9494
9595
env:

.github/workflows/stackhpc.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ on:
1212
- '!docs/**'
1313
- '!README.md'
1414
- '!.gitignore'
15+
- '!.github/workflows/'
16+
- '.github/workflows/stackhpc'
1517
pull_request:
1618
paths:
1719
- '**'
@@ -20,6 +22,8 @@ on:
2022
- '!docs/**'
2123
- '!README.md'
2224
- '!.gitignore'
25+
- '!.github/workflows/'
26+
- '.github/workflows/stackhpc'
2327
jobs:
2428
openstack:
2529
name: openstack-ci

ansible/roles/cluster_infra/tasks/main.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,20 @@
1010
# outputs.cluster_gateway_ip.
1111
- block:
1212
- name: Look up floating IP
13-
include_role:
14-
name: stackhpc.terraform.infra
15-
tasks_from: lookup_floating_ip
16-
vars:
17-
os_floating_ip_id: "{{ cluster_floating_ip }}"
13+
azimuth_cloud.terraform.os_floating_ip_info:
14+
floating_ip: "{{ cluster_floating_ip }}"
15+
register: cluster_floating_ip_info
1816

1917
- name: Set floating IP address fact
2018
set_fact:
21-
cluster_floating_ip_address: "{{ os_floating_ip_info.floating_ip_address }}"
22-
when: cluster_floating_ip is defined
19+
cluster_floating_ip_address: "{{ cluster_floating_ip_info.floating_ip.floating_ip_address }}"
20+
when:
21+
- cluster_floating_ip is defined
22+
- cluster_floating_ip
2323

2424
- name: Install Terraform binary
2525
include_role:
26-
name: stackhpc.terraform.install
26+
name: azimuth_cloud.terraform.install
2727

2828
- name: Make Terraform project directory
2929
file:
@@ -59,4 +59,4 @@
5959

6060
- name: Provision infrastructure
6161
include_role:
62-
name: stackhpc.terraform.infra
62+
name: azimuth_cloud.terraform.infra
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
podman_users:
22
- name: "{{ ansible_user }}"
3-
podman_tmp_dir_root: /run # MUST be on a tmpfs

ansible/roles/podman/tasks/config.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,19 @@
4444
with_items: "{{ podman_users }}"
4545
register: podman_user_info
4646
become: yes
47+
48+
- name: Clear up podman temporary files on startup
49+
copy:
50+
content: |
51+
# Created by ansible
52+
# Delete ephemeral podman files to avoid issues where /tmp is not of type tmpfs and persists across reboots.
53+
# See tmpfiles.d(5) man page, note "R" specifies recursive removal and "!" marks as happening only on boot.
54+
#
55+
# Type Path Mode User Group Age Argument
56+
R! /tmp/containers-user-*
57+
R! /tmp/podman-run-*
58+
dest: /etc/tmpfiles.d/podman-local.conf
59+
owner: root
60+
group: root
61+
mode: 0660
62+
become: true

ansible/roles/podman/tasks/validate.yml

Lines changed: 0 additions & 9 deletions
This file was deleted.

ansible/validate.yml

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,6 @@
2929
'enable_configless' not found in openhpc_config.SlurmctldParameters - is variable openhpc_config overridden?
3030
Additional slurm.conf parameters should be provided using variable openhpc_config_extra.
3131
success_msg: Checked Slurm will be configured for configless operation
32-
33-
- name: Validate podman configuration
34-
hosts: podman
35-
gather_facts: false
36-
tags: podman
37-
tasks:
38-
- import_role:
39-
name: podman
40-
tasks_from: validate.yml
41-
tags: validate
4232

4333
- name: Validate filebeat configuration
4434
hosts: filebeat

environments/.stackhpc/hooks/post.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,7 @@
1010
state: absent
1111
with_items:
1212
- /opt/ood/ondemand/root/usr/share/gems/3.1/ondemand/3.1.7-1/gems/bootstrap_form-2.7.0/test/dummy/Gemfile.lock
13+
- /opt/ood/ondemand/root/usr/share/gems/3.1/ondemand/3.1.9-1/gems/bootstrap_form-2.7.0/test/dummy/Gemfile.lock
1314
- /opt/ood/ondemand/root/usr/share/gems/3.1/ondemand/3.1.7-1/gems/bootstrap_form-4.5.0/demo/yarn.lock
15+
- /opt/ood/ondemand/root/usr/share/gems/3.1/ondemand/3.1.9-1/gems/bootstrap_form-4.5.0/demo/yarn.lock
1416
- /var/www/ood/apps/sys/dashboard/node_modules/data-confirm-modal/Gemfile.lock
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"cluster_image": {
3-
"RL8": "openhpc-RL8-241009-1523-354b048a",
4-
"RL9": "openhpc-RL9-241009-1523-354b048a",
5-
"RL9-cuda": "openhpc-cuda-RL9-241009-1523-354b048a"
3+
"RL8": "openhpc-RL8-241022-0441-a5affa58",
4+
"RL9": "openhpc-RL9-241022-0038-a5affa58",
5+
"RL9-cuda": "openhpc-cuda-RL9-241022-0441-a5affa58"
66
}
77
}

0 commit comments

Comments
 (0)