Skip to content

Commit c8131bf

Browse files
author
Matt Pryor
authored
Merge pull request #3 from stackhpc/feature/ood
Add Open OnDemand to CaaS clusters
2 parents 23ab9db + 7db2936 commit c8131bf

File tree

23 files changed

+467
-318
lines changed

23 files changed

+467
-318
lines changed

assets/ood-icon.png

11 KB
Loading

group_vars/basic_users.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
basic_users_users:
2+
- name: azimuth
3+
# Hash the password with a salt that is different for each host
4+
password: "{{ vault_azimuth_user_password | password_hash('sha512', 65534 | random(seed=inventory_hostname) | string) }}"
5+
uid: 1005
6+
public_key: "{{ cluster_user_ssh_public_key }}"

group_vars/cluster.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
update_enable: "{{ cluster_upgrade_system_packages | default('false') | bool }}"
33

44
# Read the secrets from the Ansible local facts on the control host
5+
vault_azimuth_user_password: "{{ hostvars[groups['control'][0]].ansible_local.openhpc_secrets.vault_azimuth_user_password }}"
56
vault_grafana_admin_password: "{{ hostvars[groups['control'][0]].ansible_local.openhpc_secrets.vault_grafana_admin_password }}"
67
vault_elasticsearch_admin_password: "{{ hostvars[groups['control'][0]].ansible_local.openhpc_secrets.vault_elasticsearch_admin_password }}"
78
vault_elasticsearch_kibana_password: "{{ hostvars[groups['control'][0]].ansible_local.openhpc_secrets.vault_elasticsearch_kibana_password }}"
@@ -11,3 +12,7 @@ vault_openhpc_mungekey: "{{ hostvars[groups['control'][0]].ansible_local.openhpc
1112

1213
# Override this to cope with the case where the podman group just doesn't exist
1314
appliances_local_users_podman_enable: "{{ groups.get('podman', []) | length > 0 }}"
15+
16+
# The server name for Open OnDemand depends on whether Zenith is enabled or not
17+
openondemand_servername_default: "{{ hostvars[groups['openstack'][0]].cluster_floating_ip_address | replace('.', '-') ~ '.sslip.io' }}"
18+
openondemand_servername: "{{ zenith_fqdn_ood | default(openondemand_servername_default) }}"

group_vars/grafana.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
---
22

3-
zenith_proxy_podman_user: podman
3+
# Override when there's no OOD
4+
grafana_serve_from_sub_path: "{{ 'openondemand' in groups }}"
5+
grafana_auth_anonymous: "{{ 'openondemand' in groups }}"
46

5-
grafana_address: "{{ ansible_default_ipv4.address }}"
6-
grafana_port: 3000
7+
grafana_url: "{{ grafana_url_openondemand_proxy if 'openondemand' in groups else grafana_url_direct }}"

group_vars/openhpc.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
11
openhpc_cluster_name: "{{ cluster_name }}"
2+
3+
openhpc_config:
4+
SlurmctldDebug: debug
5+
SlurmdDebug: debug

group_vars/openondemand.yml

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
---
2+
openondemand_auth: basic_pam
3+
openondemand_jupyter_partition: "compute"
4+
openondemand_desktop_partition: "compute"
5+
6+
httpd_listen_addr_port:
7+
- 80
8+
- 443
9+
10+
# Allow proxying to compute nodes for apps and control for monitoring only when the grafana group is available
11+
openondemand_host_regex: '({{ openhpc_cluster_name ~ "-compute-\d+)" ~ ( "|(" ~ groups["grafana"][0] ~ ")" if "grafana" in groups else "" ) }}'
12+
13+
# Add grafana to dashboard links to OOD only if grafana group is available
14+
openondemand_dashboard_links_grafana:
15+
- name: Grafana
16+
app_name: grafana
17+
category: Monitoring
18+
description: Dashboards
19+
url: "{{ grafana_url_openondemand_proxy }}"
20+
openondemand_dashboard_links: "{{ openondemand_dashboard_links_grafana if 'grafana' in groups else [] }}"
21+
22+
# Add grafana panel to jobs page only if grafana group is available
23+
openondemand_clusters:
24+
slurm:
25+
v2:
26+
metadata:
27+
title: "{{ openhpc_cluster_name }}" # interpolation here works as openondemand is lexically after openhpc
28+
login:
29+
host: "{{ hostvars[groups['login'].0].api_address }}"
30+
default: true
31+
job:
32+
adapter: slurm
33+
cluster: "{{ openhpc_cluster_name }}"
34+
batch_connect:
35+
basic:
36+
script_wrapper: |-
37+
module purge
38+
export PATH=/opt/jupyter/bin/:$PATH
39+
%s
40+
set_host: host=$(hostname -s)
41+
vnc:
42+
script_wrapper: |-
43+
module purge
44+
export PATH=/opt/TurboVNC/bin:$PATH
45+
# Workaround to avoid "Unable to contact settings server" when
46+
# lauching xfce4-session
47+
xfce4-session() { /bin/dbus-launch /bin/xfce4-session $@ ; }
48+
export -f xfce4-session
49+
%s
50+
set_host: host=$(hostname -s)
51+
custom: "{{ openondemand_clusters_grafana if 'grafana' in groups else {} }}"
52+
53+
grafana_address: "{{ hostvars[groups['grafana'][0]]['api_address'] if 'grafana' in groups else '' }}"
54+
grafana_url_openondemand_proxy: "https://{{ openondemand_servername }}/node/{{ groups['grafana'][0] if 'grafana' in groups else '' }}/{{ grafana_port }}"
55+
56+
openondemand_clusters_grafana:
57+
# embed grafana panels in Jobs app: https://osc.github.io/ood-documentation/latest/customization.html#grafana-support
58+
grafana:
59+
host: "{{ grafana_url_openondemand_proxy if 'openondemand' in groups else grafana_url_direct }}"
60+
orgId: 1
61+
dashboard:
62+
name: "node-exporter-slurm"
63+
uid: "node-exporter-slurm"
64+
panels:
65+
cpu: 77
66+
memory: 78
67+
labels:
68+
cluster: "cluster"
69+
host: "host"
70+
jobid: "jobid"
71+
72+
_opeonondemand_unset_auth: ' RequestHeader unset Authorization'
73+
74+
# Fix grafana proxying for basic auth if anonymous grafana access enabled:
75+
openondemand_node_proxy_directives: "{{ _opeonondemand_unset_auth if (openondemand_auth == 'basic_pam' and 'openondemand_host_regex' and 'grafana' in groups and hostvars[groups['grafana'][0]]._grafana_auth_is_anonymous) else '' }}"
76+
77+

group_vars/openstack.yml

Lines changed: 8 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,18 @@
11
# The default Terraform state key for backends that support it
22
terraform_state_key: "cluster/{{ cluster_id }}/tfstate"
33

4+
# Set up the terraform backend
5+
terraform_backend_type: "{{ 'consul' if 'CONSUL_HTTP_ADDR' in ansible_env else 'local' }}"
46
terraform_backend_config_defaults:
57
consul:
68
path: "{{ terraform_state_key }}"
79
gzip: "true"
810
local: {}
11+
terraform_backend_config: "{{ terraform_backend_config_defaults[terraform_backend_type] }}"
912

10-
#####
11-
## WARNING
12-
##
13-
## The groups specified here should replicate the groups in the StackHPC Slurm appliance environments
14-
##
15-
## https://github.com/stackhpc/ansible-slurm-appliance/blob/main/environments/common/inventory/groups
16-
## https://github.com/stackhpc/ansible-slurm-appliance/blob/main/environments/common/layouts/everything
17-
## https://github.com/stackhpc/ansible-slurm-appliance/blob/main/environments/common/layouts/minimal
18-
#####
19-
# These groups should represent the minimal layout
20-
cluster_groups_required:
21-
login: ["{{ cluster_name }}_login"]
22-
control: ["{{ cluster_name }}_control"]
23-
compute: ["{{ cluster_name }}_compute"]
24-
openhpc: [login, control, compute]
25-
cluster: [openhpc]
26-
selinux: [cluster]
27-
nfs: [cluster]
28-
mysql: [control]
29-
update: [cluster]
13+
terraform_binary_directory: "{{ playbook_dir }}/bin"
14+
terraform_binary_path: "{{ terraform_binary_directory }}/terraform"
15+
terraform_project_path: "{{ playbook_dir }}/terraform"
3016

31-
# These are the additional groups required for monitoring (see everything layout)
32-
cluster_groups_monitoring:
33-
podman: [opendistro, filebeat]
34-
prometheus: [control]
35-
grafana: [control]
36-
alertmanager: [control]
37-
node_exporter: [cluster]
38-
opendistro: [control]
39-
slurm_stats: [control]
40-
filebeat: [slurm_stats]
41-
42-
# Additional groups for running the cluster validation
43-
cluster_groups_validation:
44-
hpctests: [login]
45-
46-
# Additional groups for Zenith support
47-
cluster_groups_zenith:
48-
# Any hosts in the grafana group should go in the zenith group
49-
zenith: [grafana]
17+
terraform_state: "{{ cluster_state | default('present') }}"
18+
cluster_ssh_user: rocky

group_vars/prometheus.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
# Override openondemand_address because its needed in openondemand_scrape_configs
3+
# which is used in prometheus_scrape_configs
4+
openondemand_address: "{{ hostvars[groups['openondemand'].0].api_address if 'openondemand' in groups else '' }}"
5+
6+
# Override group_var set in ansible-slurm-appliance all group - unless
7+
# OOD is being deployed then there won't be an OOD group
8+
prometheus_scrape_configs: "{{ prometheus_scrape_configs_default + (openondemand_scrape_configs if ( 'openondemand' in groups ) else [] ) }}"

group_vars/zenith.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
zenith_proxy_podman_user: podman

requirements.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ roles:
1818
- src: cloudalchemy.grafana
1919
- src: geerlingguy.mysql
2020
- src: jriguera.configdrive
21-
- name: stackhpc.terraform-infra
22-
src: https://github.com/stackhpc/ansible-role-terraform-infra
21+
- src: https://github.com/OSC/ood-ansible.git
22+
name: osc.ood
2323
type: git
24-
version: 4d9d67b5a1866edf6988a1f7e9e64868df8f65ae
24+
version: v2.0.5
2525

2626
collections:
2727
- name: ansible.posix
@@ -33,6 +33,9 @@ collections:
3333
- name: community.mysql
3434
- name: containers.podman
3535
- name: openstack.cloud
36+
- name: https://github.com/stackhpc/ansible-collection-terraform
37+
type: git
38+
version: ae1dc46a9d266bcdc6e79a6e290edbb080596f7f
3639
- name: https://github.com/stackhpc/ansible_collection_slurm_openstack_tools
3740
type: git
3841
version: v0.1.0

0 commit comments

Comments
 (0)