Skip to content

Commit 723cbb6

Browse files
committed
Merge branch 'main' into feat/cloudinit-gateways
2 parents 1930281 + d317920 commit 723cbb6

35 files changed

+188
-54
lines changed

.github/workflows/extra.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@ on:
55
branches:
66
- main
77
paths:
8-
- 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json'
8+
- 'environments/.stackhpc/tofu/cluster_image.auto.tfvars.json'
99
- 'ansible/roles/doca/**'
1010
- 'ansible/roles/cuda/**'
1111
- 'ansible/roles/lustre/**'
1212
- '.github/workflows/extra.yml'
1313
pull_request:
1414
paths:
15-
- 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json'
15+
- 'environments/.stackhpc/tofu/cluster_image.auto.tfvars.json'
1616
- 'ansible/roles/doca/**'
1717
- 'ansible/roles/cuda/**'
1818
- 'ansible/roles/lustre/**'
@@ -30,7 +30,7 @@ jobs:
3030
matrix: # build RL8, RL9
3131
build:
3232
- image_name: openhpc-extra-RL8
33-
source_image_name_key: RL8 # key into environments/.stackhpc/terraform/cluster_image.auto.tfvars.json
33+
source_image_name_key: RL8 # key into environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
3434
inventory_groups: doca,cuda,lustre
3535
volume_size: 30 # needed for cuda
3636
- image_name: openhpc-extra-RL9
@@ -51,7 +51,7 @@ jobs:
5151
run: |
5252
{
5353
echo 'FAT_IMAGES<<EOF'
54-
cat environments/.stackhpc/terraform/cluster_image.auto.tfvars.json
54+
cat environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
5555
echo EOF
5656
} >> "$GITHUB_ENV"
5757

.github/workflows/s3-image-sync.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ on:
55
branches:
66
- main
77
paths:
8-
- 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json'
8+
- 'environments/.stackhpc/tofu/cluster_image.auto.tfvars.json'
99
env:
1010
S3_BUCKET: openhpc-images-prerelease
11-
IMAGE_PATH: environments/.stackhpc/terraform/cluster_image.auto.tfvars.json
11+
IMAGE_PATH: environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
1212

1313
jobs:
1414
s3_cleanup:

.github/workflows/stackhpc.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -84,17 +84,17 @@ jobs:
8484
with:
8585
tofu_version: 1.6.2
8686

87-
- name: Initialise terraform
88-
run: terraform init
89-
working-directory: ${{ github.workspace }}/environments/.stackhpc/terraform
87+
- name: Initialise tofu
88+
run: tofu init
89+
working-directory: ${{ github.workspace }}/environments/.stackhpc/tofu
9090

9191
- name: Write clouds.yaml
9292
run: |
9393
mkdir -p ~/.config/openstack/
9494
echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml
9595
shell: bash
9696

97-
- name: Setup environment-specific inventory/terraform inputs
97+
- name: Setup environment-specific inventory/tofu inputs
9898
run: |
9999
. venv/bin/activate
100100
. environments/.stackhpc/activate
@@ -108,15 +108,15 @@ jobs:
108108
run: |
109109
. venv/bin/activate
110110
. environments/.stackhpc/activate
111-
cd $APPLIANCES_ENVIRONMENT_ROOT/terraform
112-
terraform apply -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
111+
cd $APPLIANCES_ENVIRONMENT_ROOT/tofu
112+
tofu apply -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
113113
114114
- name: Delete infrastructure if provisioning failed
115115
run: |
116116
. venv/bin/activate
117117
. environments/.stackhpc/activate
118-
cd $APPLIANCES_ENVIRONMENT_ROOT/terraform
119-
terraform destroy -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
118+
cd $APPLIANCES_ENVIRONMENT_ROOT/tofu
119+
tofu destroy -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
120120
if: failure() && steps.provision_servers.outcome == 'failure'
121121

122122
- name: Configure cluster
@@ -201,8 +201,8 @@ jobs:
201201
run: |
202202
. venv/bin/activate
203203
. environments/.stackhpc/activate
204-
cd $APPLIANCES_ENVIRONMENT_ROOT/terraform
205-
terraform destroy -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
204+
cd $APPLIANCES_ENVIRONMENT_ROOT/tofu
205+
tofu destroy -auto-approve -var-file="${{ env.CI_CLOUD }}.tfvars"
206206
if: ${{ success() || cancelled() }}
207207

208208
# - name: Delete images

.github/workflows/trivyscan.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ on:
55
branches:
66
- main
77
paths:
8-
- 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json'
8+
- 'environments/.stackhpc/tofu/cluster_image.auto.tfvars.json'
99

1010
jobs:
1111
scan:
@@ -18,7 +18,7 @@ jobs:
1818
matrix:
1919
build: ["RL8", "RL9"]
2020
env:
21-
JSON_PATH: environments/.stackhpc/terraform/cluster_image.auto.tfvars.json
21+
JSON_PATH: environments/.stackhpc/tofu/cluster_image.auto.tfvars.json
2222
OS_CLOUD: openstack
2323
CI_CLOUD: ${{ vars.CI_CLOUD }}
2424

ansible/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,7 @@ roles/*
7676
!roles/pulp_site/**
7777
!roles/doca/
7878
!roles/doca/**
79+
!roles/slurm_stats/
80+
!roles/slurm_stats/**
81+
!roles/pytools/
82+
!roles/pytools/**

ansible/monitoring.yml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,9 @@
1717
- name: Setup slurm stats
1818
hosts: slurm_stats
1919
tags: slurm_stats
20-
collections:
21-
- stackhpc.slurm_openstack_tools
2220
tasks:
2321
- include_role:
24-
name: slurm-stats
25-
apply:
26-
# Collection currently requires root for all tasks.
27-
become: true
22+
name: slurm_stats
2823

2924
- name: Deploy filebeat
3025
hosts: filebeat
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
stackhpc.slurm_openstack_tools.slurm-stats
2+
==========================================
3+
4+
Configures slurm-stats from https://github.com/stackhpc/slurm-openstack-tools.git which
5+
transforms sacct output into a form that is more amenable for importing into elasticsearch/loki.
6+
7+
Requirements
8+
------------
9+
10+
Role Variables
11+
--------------
12+
13+
See `defaults/main.yml`.
14+
15+
Dependencies
16+
------------
17+
18+
Example Playbook
19+
----------------
20+
21+
- hosts: compute
22+
tasks:
23+
- import_role:
24+
name: stackhpc.slurm_openstack_tools.slurm-stats
25+
26+
27+
License
28+
-------
29+
30+
Apache-2.0
31+
32+
Author Information
33+
------------------
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
---
2+
####################
3+
# log rotate options
4+
####################
5+
6+
# These options affect the contents of the log-rotate file.
7+
# See: man logrotate
8+
9+
# Log files are rotated count times before being removed
10+
slurm_stats_log_rotate_content_rotate: 7
11+
12+
# How frequently are the log files rotated. Can be one of daily, monthly, ...
13+
slurm_stats_log_rotate_content_frequency: daily
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
---
2+
3+
- name: Setup slurm tools
4+
include_role:
5+
name: slurm_tools
6+
7+
- name: Create a directory to house the log files
8+
file:
9+
state: directory
10+
path: /var/log/slurm-stats
11+
become: true
12+
13+
- name: Create cron job
14+
cron:
15+
name: Generate slurm stats
16+
minute: "*/5"
17+
user: root
18+
# NOTE: lasttimestamp is stored at /root/lasttimestamp
19+
job: "TZ=UTC /opt/slurm-tools/bin/slurm-stats >> /var/log/slurm-stats/finished_jobs.json"
20+
cron_file: slurm-stats
21+
become: true
22+
23+
- name: Setup log rotate
24+
copy:
25+
content: |
26+
# WARNING: This file is managed by ansible, do not modify.
27+
/var/log/slurm-stats/finished_jobs.json {
28+
{{ slurm_stats_log_rotate_content_frequency }}
29+
rotate {{ slurm_stats_log_rotate_content_rotate }}
30+
compress
31+
delaycompress
32+
}
33+
dest: /etc/logrotate.d/slurm-stats
34+
become: true
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
---
2+
language: python
3+
python: "2.7"
4+
5+
# Use the new container infrastructure
6+
sudo: false
7+
8+
# Install ansible
9+
addons:
10+
apt:
11+
packages:
12+
- python-pip
13+
14+
install:
15+
# Install ansible
16+
- pip install ansible
17+
18+
# Check ansible version
19+
- ansible --version
20+
21+
# Create ansible.cfg with correct roles_path
22+
- printf '[defaults]\nroles_path=../' >ansible.cfg
23+
24+
script:
25+
# Basic role syntax check
26+
- ansible-playbook tests/test.yml -i tests/inventory --syntax-check
27+
28+
notifications:
29+
webhooks: https://galaxy.ansible.com/api/v1/notifications/

0 commit comments

Comments
 (0)