Skip to content
Merged
Show file tree
Hide file tree
Changes from 58 commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
dfd98e1
DEMO: configure squid on leafcloud deploy host
sjpb May 23, 2025
594ddd8
make filebeat not require internet during site.yml
sjpb May 23, 2025
cc1e3ae
configure stackhpc env for no outbound internet from cluster
sjpb May 23, 2025
2838930
wip: disable downloaded dashboards
sjpb May 23, 2025
a91b2c4
nearly get bootstrap working in configure mode without internet access
sjpb May 23, 2025
b8d890f
fixup enabling squid in stackhpc
sjpb May 23, 2025
788cdc1
enable proxy for plays only for stackhpc env
sjpb May 23, 2025
b176102
add proxy_plays_only
sjpb May 23, 2025
a831706
fix ansible-init install when proxied
sjpb May 23, 2025
fd7503e
workaround manila install when proxied
sjpb May 23, 2025
2bb5a66
workaround freeipa client install when proxied
sjpb May 23, 2025
2364c0c
add default for appliances_environment_vars
sjpb May 23, 2025
c59f1a4
workaround eessi install when proxied
sjpb May 23, 2025
ddcd0f0
workaround slurm_tools install when proxied
sjpb May 23, 2025
89461e4
automatically disable proxyuing dnf and systemd when proxy_plays_only
sjpb May 23, 2025
9fb8d05
get ondemand and monitoring working
sjpb May 23, 2025
7d6467d
support basic auth in squid role
sjpb May 23, 2025
49ea0f6
add indirection to allow providing basic auth to proxy
sjpb May 23, 2025
baf29bb
configure stackhpc env for basic auth squid
sjpb May 23, 2025
e892bb4
add isolated clusters docs
sjpb May 23, 2025
8a1615e
skip ansible-init install when not building images
sjpb Jun 12, 2025
125669f
fix slurm_exporter for offline site playbook
sjpb Jun 13, 2025
c409e52
fix networking being required during site.yml ansible-init,openondema…
sjpb Jun 20, 2025
192607f
install grafana plugins and dashboard during fatimage and skip by def…
sjpb Jun 24, 2025
999584f
Merge branch 'main' into feat/isolated-env-2
sjpb Jun 24, 2025
04e622f
install slurm_stats during fatimage and skip during site
sjpb Jun 24, 2025
2b9068d
fix rebuild to do installs during fatimage
sjpb Jun 24, 2025
fe49d43
remove unneeded envvar injection
sjpb Jun 24, 2025
c775137
fix role import typo
sjpb Jun 24, 2025
417b63c
backup do-not-merge changes
sjpb Jun 24, 2025
4cc447b
revert changes to squid - now broken out as #718
sjpb Jun 24, 2025
b0640a2
fix slurm_exporter not happening during build
sjpb Jun 25, 2025
e75fae6
skip dashboard installation in configure mode
sjpb Jun 26, 2025
e30ce75
fix ansible jinja template warnings
sjpb Jun 26, 2025
963f0ff
update CI image
sjpb Jun 26, 2025
4a48c2f
Merge branch 'main' into feat/isolated-env-2
sjpb Jun 26, 2025
97910e2
revert proxy_tasks_only but make configuring proxy easier
sjpb Jun 26, 2025
b160d04
Merge branch 'main' into feat/isolated-env-2
sjpb Jun 26, 2025
cebc719
disable all dnf repos at end of build
sjpb Jun 26, 2025
f17e331
add proxy_remove
sjpb Jun 26, 2025
3af4552
add missing common proxy vars
sjpb Jun 26, 2025
fa857e3
swap appliances_environment_vars to appliances_remote_environment_vars
sjpb Jul 1, 2025
8399793
get proxy_remove working properly
sjpb Jul 1, 2025
de48073
wip isolated docs
sjpb Jul 1, 2025
f8e7578
Merge branch 'main' @ v2.2 into feat/isolated-env-2
sjpb Jul 1, 2025
95585ca
bump CI image
sjpb Jul 1, 2025
70161e7
revise description for jupyter noticing there is osc.ood:apps as well…
sjpb Jul 1, 2025
1c8eed3
skip ipa-client dnf package installation in configure mode
sjpb Jul 1, 2025
0e77b2e
revert changes to sms tf config for testing
sjpb Jul 1, 2025
ef5eecf
fix grafana datasources
sjpb Jul 2, 2025
3e9c1d9
fix jinja warnings
sjpb Jul 2, 2025
2f73621
fix jupyter app when isolated
sjpb Jul 2, 2025
3641f48
make extra ondemand apps config easier
sjpb Jul 2, 2025
0cd904f
fix manila for isolated network
sjpb Jul 2, 2025
966033e
fix eessi on isolated networks
sjpb Jul 2, 2025
8986d3d
update isolated docs
sjpb Jul 2, 2025
6355aeb
tidy PR
sjpb Jul 2, 2025
ba71337
improve isolated docs
sjpb Jul 3, 2025
ff88ca4
Merge branch 'main' into feat/isolated-env-2
sjpb Jul 4, 2025
045291a
Merge branch 'main' into feat/isolated-env-2
bertiethorpe Jul 7, 2025
a7059e8
Bump CI image
sjpb Jul 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ansible/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,5 @@ roles/*
!roles/slurm_recompile/**
!roles/nhc/
!roles/nhc/**
!roles/eessi/
!roles/eessi/**
23 changes: 11 additions & 12 deletions ansible/bootstrap.yml
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,9 @@
become: yes
tasks:
- name: Install and configure tuneD
import_role:
include_role:
name: tuned
tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}"

- hosts: freeipa_server
# Done here as it might be providing DNS
Expand Down Expand Up @@ -217,31 +218,27 @@
become: yes
tags: firewalld
tasks:
- import_role:
- include_role:
name: firewalld
tasks_from: "{{ 'runtime.yml' if appliances_mode == 'configure' else 'main.yml' }}"

- hosts: fail2ban
gather_facts: false
become: yes
tags: fail2ban
tasks:
- import_role:
- include_role:
name: fail2ban
tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}"

- name: Setup podman
gather_facts: false
hosts: podman
tags: podman
tasks:
- import_role:
name: podman
tasks_from: prereqs.yml
tags: prereqs

- import_role:
- include_role:
name: podman
tasks_from: config.yml
tags: config
tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}"

- hosts: update
gather_facts: false
Expand Down Expand Up @@ -317,8 +314,10 @@
become: yes
tags: linux_ansible_init
tasks:
- include_role:
- name: Install ansible-init
include_role:
name: azimuth_cloud.image_utils.linux_ansible_init
when: "appliances_mode == 'build'"

- hosts: k3s:&builder
become: yes
Expand Down
5 changes: 3 additions & 2 deletions ansible/extras.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@
become: true
gather_facts: false
tasks:
- name: Install and configure EESSI
import_role:
- name: Install / configure EESSI
include_role:
name: eessi
tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}"

- name: Setup CUDA
hosts: cuda
Expand Down
25 changes: 18 additions & 7 deletions ansible/fatimage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,12 @@
tasks_from: install.yml
when: "'mysql' in group_names"

- name: OpenHPC
- name: Install rebuild
include_role:
name: rebuild
tasks_from: install.yml

- name: Install OpenHPC
import_role:
name: stackhpc.openhpc
tasks_from: install.yml
Expand All @@ -134,7 +139,6 @@
import_role:
name: openondemand
tasks_from: vnc_compute.yml

when: "'openondemand_desktop' in group_names"

- name: Open Ondemand jupyter node
Expand All @@ -153,7 +157,11 @@
tasks_from: install.yml
when: "'opensearch' in group_names"

# slurm_stats - nothing to do
- import_role:
name: slurm_stats
tasks_from: install.yml
when: "'slurm_stats' in group_names"

- import_role:
name: filebeat
tasks_from: install.yml
Expand All @@ -171,11 +179,9 @@
when: "'openondemand' in group_names"

- name: slurm exporter
import_role:
include_role:
name: slurm_exporter
tasks_from: install
vars:
slurm_exporter_state: stopped
tasks_from: install.yml
when: "'slurm_exporter' in group_names"

- name: Install alertmanager
Expand Down Expand Up @@ -249,6 +255,11 @@
- import_role:
name: cloudalchemy.grafana
tasks_from: install.yml
- import_role:
name: cloudalchemy.grafana
tasks_from: plugins.yml
- include_role: # done in same play so it can use handlers from cloudalchemy.grafana
name: grafana-dashboards

- name: Add support for NVIDIA GPU auto detection to Slurm
hosts: cuda
Expand Down
2 changes: 2 additions & 0 deletions ansible/filesystems.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
tasks:
- include_role:
name: stackhpc.os-manila-mount
tasks_from: "{{ item }}"
loop: "{{ ['lookup.yml', 'mount.yml'] if appliances_mode == 'configure' else ['main.yml'] }}"

- name: Setup Lustre clients
hosts: lustre
Expand Down
11 changes: 11 additions & 0 deletions ansible/final.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,14 @@
- include_role:
name: compute_init
tasks_from: export.yml

- hosts: proxy
gather_facts: false
tags: proxy
become: yes
tasks:
- include_role:
name: proxy
vars:
proxy_state: absent
when: proxy_remove | default(false) | bool == true
3 changes: 2 additions & 1 deletion ansible/iam.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@
become: yes
tasks:
- name: Install FreeIPA client
import_role:
include_role:
name: freeipa
tasks_from: client-install.yml
when: "appliances_mode != 'configure'"
- name: Enrol FreeIPA client
import_role:
name: freeipa
Expand Down
36 changes: 26 additions & 10 deletions ansible/monitoring.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,22 @@
tasks:
- include_role:
name: slurm_stats
tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}"

- name: Deploy filebeat
hosts: filebeat
tags: filebeat
tasks:
- import_role:
- include_role:
name: filebeat
tasks_from: "{{ 'runtime.yml' if appliances_mode == 'configure' else 'main.yml' }}"

- name: Deploy node_exporter
hosts: node_exporter
tags: node_exporter
tasks:
- import_role: name=cloudalchemy.node_exporter
- import_role:
name: cloudalchemy.node_exporter

- name: Deploy OpenOndemand exporter
hosts: openondemand
Expand All @@ -46,12 +49,13 @@
tasks_from: exporter.yml

- name: Deploy Slurm exporter
hosts: control
hosts: slurm_exporter
become: true
tags: slurm_exporter
tasks:
- import_role:
- include_role:
name: slurm_exporter
tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'main.yml' }}"

- name: Setup core monitoring software
hosts: prometheus
Expand All @@ -68,24 +72,36 @@
# i.e. if prometheus_version isn't defined we don't care, so use what's already there
set_fact:
prometheus_skip_install: "{{ false if prometheus_version is defined else true }}"
when: "{{ (prometheus_binaries.results | map(attribute='stat') | map(attribute='exists')) + [prometheus_skip_install is not defined] }}"
when: "(prometheus_binaries.results | map(attribute='stat') | map(attribute='exists')) + [prometheus_skip_install is not defined]"
- import_role:
name: cloudalchemy.prometheus

- name: Deploy grafana
hosts: grafana
tags: grafana
tasks:
- assert:
that: vault_grafana_admin_password is defined
fail_msg: "Must define vault_grafana_admin_password - use `ansible-playbook generate-passwords.yml` to generate a set of passwords"
- name: Skip plugin installation in configure mode
# done during fatimage - can't do this in vars block as that is recursive
ansible.builtin.set_fact:
grafana_plugins: "{{ [] if appliances_mode == 'configure' else grafana_plugins }}"
- name: Copy Grafana plugins installed in image into persistent grafana state
ansible.builtin.copy:
remote_src: true
src: /var/lib/grafana/plugins/ # trailing / means copy contents
dest: "{{ grafana_data_dir }}/plugins/"
# below matches what already exists:
owner: root
group: root
mode: '0755'
become: true
- include_role:
name: cloudalchemy.grafana
vars:
# We use internal roles to register the dashboards as the role does not support all options that we require.
# Internal role used to install dashboards as cloudalchemy role does not support all required options:
grafana_dashboards: []
- import_role: # done in same play so it can use handlers from cloudalchemy.grafana
- include_role: # done in same play so it can use handlers from cloudalchemy.grafana
name: grafana-dashboards
when: "appliances_mode != 'configure'"

- name: Deploy alertmanager
hosts: alertmanager
Expand Down
6 changes: 6 additions & 0 deletions ansible/portal.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
become: yes
gather_facts: yes # TODO
tasks:
- name: Skip openondemand apps installation in configure mode
set_fact:
ood_install_apps: {}
when: appliances_mode == 'configure'
- import_role:
name: openondemand
tasks_from: main.yml
Expand All @@ -19,6 +23,7 @@
- import_role:
name: openondemand
tasks_from: vnc_compute.yml
when: appliances_mode != 'configure' # is run during build

- hosts: openondemand_jupyter
tags:
Expand All @@ -30,3 +35,4 @@
- import_role:
name: openondemand
tasks_from: jupyter_compute.yml
when: appliances_mode != 'configure' # is run during build
18 changes: 16 additions & 2 deletions ansible/roles/dnf_repos/tasks/disable_repos.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
- name: Disable Pulp repos
- name: Remove password and disable Pulp repos
ansible.builtin.yum_repository:
file: "{{ item.file }}"
name: "{{ item.name }}"
Expand All @@ -8,11 +8,25 @@
enabled: false
loop: "{{ dnf_repos_repolist }}"

- name: Disable EPEL repo
- name: Remove password and disable EPEL repo
ansible.builtin.yum_repository:
name: epel
file: epel
description: "{{ dnf_repos_epel_description }}"
baseurl: "{{ dnf_repos_epel_baseurl }}"
gpgcheck: false
enabled: false

- name: Get all repo files
ansible.builtin.find:
paths: /etc/yum.repos.d
patterns: '*.repo'
register: _dnf_repo_files

- name: Disable every repo
ansible.builtin.replace:
path: "{{ item.path }}"
regexp: '^enabled\ ?=\ ?1'
replace: 'enabled=0'
backup: yes
loop: "{{ _dnf_repo_files.files }}"
16 changes: 16 additions & 0 deletions ansible/roles/eessi/tasks/configure.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---

- name: Add base CVMFS config
community.general.ini_file:
dest: /etc/cvmfs/default.local
section: null
option: "{{ item.key }}"
value: "{{ item.value }}"
no_extra_spaces: true
loop: "{{ cvmfs_config | dict2items }}"


# NOTE: Not clear how to make this idempotent
- name: Ensure CVMFS config is setup
command:
cmd: "cvmfs_config setup"
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
---

- name: Download Cern GPG key
ansible.builtin.get_url:
url: http://cvmrepo.web.cern.ch/cvmrepo/yum/RPM-GPG-KEY-CernVM
Expand Down Expand Up @@ -31,18 +32,3 @@
# - name: Install EESSI CVMFS config
# dnf:
# name: cvmfs-config-eessi

- name: Add base CVMFS config
community.general.ini_file:
dest: /etc/cvmfs/default.local
section: null
option: "{{ item.key }}"
value: "{{ item.value }}"
no_extra_spaces: true
loop: "{{ cvmfs_config | dict2items }}"


# NOTE: Not clear how to make this idempotent
- name: Ensure CVMFS config is setup
command:
cmd: "cvmfs_config setup"
4 changes: 4 additions & 0 deletions ansible/roles/eessi/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---

- include_tasks: install.yml
- include_tasks: configure.yml
15 changes: 15 additions & 0 deletions ansible/roles/fail2ban/tasks/configure.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
- name: Create config
template:
dest: /etc/fail2ban/jail.local
src: jail.local.j2
notify: Restart fail2ban

- name: flush handlers
meta: flush_handlers

- name: Ensure fail2ban running even if no config change
service:
name: fail2ban
state: started
enabled: true
11 changes: 11 additions & 0 deletions ansible/roles/fail2ban/tasks/install.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
- name: Install EPEL repo
package:
name: epel-release

- name: Install fail2ban packages
package:
name:
- fail2ban-server
- fail2ban-firewalld
state: present
Loading
Loading