File tree Expand file tree Collapse file tree 5 files changed +51
-23
lines changed
persist_openhpc_secrets/tasks Expand file tree Collapse file tree 5 files changed +51
-23
lines changed Original file line number Diff line number Diff line change @@ -17,4 +17,8 @@ terraform_project_path: "{{ playbook_dir }}/terraform"
1717terraform_state : " {{ cluster_state | default('present') }}"
1818cluster_ssh_user : rocky
1919
20- state_volume_size : 150 # GB
20+ # Set the size of the state volume to metrics_db_maximum_size + 10
21+ state_volume_size : " {{ metrics_db_maximum_size + 10 }}"
22+
23+ state_volume_device_path : " {{ cluster_state_volume_device_path | default('/dev/vdb') }}"
24+ home_volume_device_path : " {{ cluster_home_volume_device_path | default('/dev/vdc') }}"
Original file line number Diff line number Diff line change @@ -6,3 +6,6 @@ openondemand_address: "{{ hostvars[groups['openondemand'].0].api_address if 'ope
66# Override group_var set in ansible-slurm-appliance all group - unless
77# OOD is being deployed then there won't be an OOD group
88prometheus_scrape_configs : " {{ prometheus_scrape_configs_default + (openondemand_scrape_configs if ( 'openondemand' in groups ) else [] ) }}"
9+
10+ # Set Prometheus storage retention size
11+ prometheus_storage_retention_size : " {{ metrics_db_maximum_size }}GB"
Original file line number Diff line number Diff line change @@ -81,7 +81,7 @@ resource "openstack_blockstorage_volume_v3" "state" {
8181resource "openstack_blockstorage_volume_v3" "home" {
8282 name = "{{ cluster_name }}-home"
8383 description = "Home for control node"
84- size = "{{ home_volme_size }}"
84+ size = "{{ home_volume_size }}"
8585}
8686
8787
@@ -162,11 +162,11 @@ resource "openstack_compute_instance_v2" "control" {
162162 fs_setup:
163163 - label: state
164164 filesystem: ext4
165- device: / dev / vdb
165+ device: {{ state_volume_device_path }}
166166 partition: auto
167167 - label: home
168168 filesystem: ext4
169- device: / dev / vdc
169+ device: {{ home_volume_device_path }}
170170 partition: auto
171171 mounts:
172172 - [LABEL =state, /var /lib /state]
Original file line number Diff line number Diff line change 22
33- name : Check if OpenHPC secrets exist in persistent storage
44 stat :
5- path : " {{ appliances_state_dir }}/openhpc_secrets.fact"
5+ path : " {{ appliances_state_dir }}/ansible/facts.d/ openhpc_secrets.fact"
66 register : openhpc_secrets_stat
77
8- - name : Ensure Ansible facts directory exists
8+ - name : Ensure Ansible facts directories exist
99 file :
10- path : /etc/ansible/facts.d
10+ path : " {{ item }} "
1111 state : directory
12- recurse : yes
13-
14- - name : Write OpenHPC secrets to persistent storage
12+ owner : root
13+ mode : 0600
14+ loop :
15+ - " {{ appliances_state_dir }}/ansible.facts.d"
16+ - " /etc/ansible/facts.d"
17+
18+ - name : Write OpenHPC secrets
1519 template :
1620 src : openhpc_secrets.fact
17- dest : " {{ appliances_state_dir }}/openhpc_secrets.fact"
18- when : " not openhpc_secrets_stat.stat.exists" # required as templated ones are random
21+ dest : " {{ appliances_state_dir }}/ansible.facts.d/openhpc_secrets.fact"
22+ owner : root
23+ mode : 0600
24+ when : " not openhpc_secrets_stat.stat.exists"
1925
20- - name : Make OpenHPC secrets available to local facts
21- copy :
22- src : " {{ appliances_state_dir }}/openhpc_secrets.fact"
26+ - name : Symlink persistent facts to facts_path
27+ file :
28+ state : link
29+ src : " {{ appliances_state_dir }}/ansible.facts.d/openhpc_secrets.fact"
2330 dest : /etc/ansible/facts.d/openhpc_secrets.fact
24- remote_src : yes
25-
26- - name : Re-read facts after adding custom fact
31+ owner : root
32+
33+ - name : Read facts
2734 ansible.builtin.setup :
2835 filter : ansible_local
Original file line number Diff line number Diff line change @@ -27,17 +27,31 @@ parameters:
2727 immutable : true
2828 options :
2929 min_ram : 2048
30- min_disk : 10
30+ min_disk : 20
3131
32- - name : home_volme_size
33- label : Home volume size
34- description : The size in GB of the volume to use for home directories
32+ - name : home_volume_size
33+ label : Home volume size (GB)
34+ description : The size of the cloud volume to use for home directories
3535 kind : integer
3636 immutable : true
3737 options :
38- min : 1
38+ min : 10
3939 default : 100
4040
41+ - name : metrics_db_maximum_size
42+ label : Metrics database size (GB)
43+ description : |
44+ The oldest metrics records in the [Prometheus](https://prometheus.io/) database will be
45+ discarded to ensure that the database does not grow larger than this size.
46+
47+ **A cloud volume of this size +10GB will be created to hold and persist the metrics
48+ database and important Slurm files.**
49+ kind : integer
50+ immutable : true
51+ options :
52+ min : 10
53+ default : 10
54+
4155 - name : cluster_run_validation
4256 label : Post-configuration validation
4357 description : >-
You can’t perform that action at this time.
0 commit comments