File tree Expand file tree Collapse file tree 4 files changed +22
-5
lines changed
roles/cluster_infra/templates Expand file tree Collapse file tree 4 files changed +22
-5
lines changed Original file line number Diff line number Diff line change @@ -17,5 +17,8 @@ terraform_project_path: "{{ playbook_dir }}/terraform"
1717terraform_state : " {{ cluster_state | default('present') }}"
1818cluster_ssh_user : rocky
1919
20+ # Set the size of the state volume to metrics_db_maximum_size + 10
21+ state_volume_size : " {{ metrics_db_maximum_size + 10 }}"
22+
2023state_volume_device_path : " {{ cluster_state_volume_device_path | default('/dev/vdb') }}"
2124home_volume_device_path : " {{ cluster_home_volume_device_path | default('/dev/vdc') }}"
Original file line number Diff line number Diff line change @@ -7,5 +7,5 @@ openondemand_address: "{{ hostvars[groups['openondemand'].0].api_address if 'ope
77# OOD is being deployed then there won't be an OOD group
88prometheus_scrape_configs : " {{ prometheus_scrape_configs_default + (openondemand_scrape_configs if ( 'openondemand' in groups ) else [] ) }}"
99
10- # Fix Prometheus storage retention size to 80% of state volume size
11- prometheus_storage_retention_size : " {{ ( 0.8 * ( cluster_state_volume_size | int ) ) | int ~ 'GB' }} "
10+ # Set Prometheus storage retention size
11+ prometheus_storage_retention_size : " {{ metrics_db_maximum_size }}GB "
Original file line number Diff line number Diff line change @@ -75,7 +75,7 @@ resource "openstack_networking_secgroup_rule_v2" "secgroup_slurm_login_rule_ingr
7575resource "openstack_blockstorage_volume_v3" "state" {
7676 name = "{{ cluster_name }}-state"
7777 description = "State for control node"
78- size = "{{ cluster_state_volume_size }}"
78+ size = "{{ state_volume_size }}"
7979}
8080
8181resource "openstack_blockstorage_volume_v3" "home" {
Original file line number Diff line number Diff line change @@ -30,14 +30,28 @@ parameters:
3030 min_disk : 20
3131
3232 - name : home_volume_size
33- label : Home volume size
34- description : The size in GB of the volume to use for home directories
33+ label : Home volume size (GB)
34+ description : The size of the cloud volume to use for home directories
3535 kind : integer
3636 immutable : true
3737 options :
3838 min : 10
3939 default : 100
4040
41+ - name : metrics_db_maximum_size
42+ label : Metrics database size (GB)
43+ description : |
44+ The oldest metrics records in the [Prometheus](https://prometheus.io/) database will be
45+ discarded to ensure that the database does not grow larger than this size.
46+
47+ **A cloud volume of this size +10GB will be created to hold and persist the metrics
48+ database and important Slurm files.**
49+ kind : integer
50+ immutable : true
51+ options :
52+ min : 10
53+ default : 10
54+
4155 - name : cluster_run_validation
4256 label : Post-configuration validation
4357 description : >-
You can’t perform that action at this time.
0 commit comments