@@ -19,18 +19,42 @@ Role Variables
1919
2020` openhpc_packages ` : additional OpenHPC packages to install
2121
22- ` openhpc_enable ` :
22+ ` openhpc_enable ` :
2323* ` control ` : whether to enable control host
24- * ` batch ` : whether to enable compute nodes
24+ * ` batch ` : whether to enable compute nodes
2525* ` runtime ` : whether to enable OpenHPC runtime
26+ * ` drain ` : whether to drain compute nodes
27+ * ` resume ` : whether to resume compute nodes
2628
27- Example Playbook
29+ Example Inventory
30+ -----------------
31+
32+ And an Ansible inventory as this:
33+
34+ [openhpc_login]
35+ openhpc-login-0 ansible_host=10.60.253.40 ansible_user=centos
36+
37+ [openhpc_compute]
38+ openhpc-compute-0 ansible_host=10.60.253.31 ansible_user=centos
39+ openhpc-compute-1 ansible_host=10.60.253.32 ansible_user=centos
40+
41+ [cluster_login:children]
42+ openhpc_login
43+
44+ [cluster_control:children]
45+ openhpc_login
46+
47+ [cluster_batch:children]
48+ openhpc_compute
49+
50+ Example Playbooks
2851----------------
29-
52+
3053To deploy, create a playbook which looks like this:
3154
3255 ---
3356 - hosts:
57+ - cluster_login
3458 - cluster_control
3559 - cluster_batch
3660 become: yes
@@ -53,19 +77,52 @@ To deploy, create a playbook which looks like this:
5377 openhpc_packages: []
5478 ...
5579
56- Example Inventory
57- -----------------
58-
59- And an Ansible inventory as this:
60-
61- [openhpc_login]
62- openhpc-login-0 ansible_host=10.60.253.40 ansible_user=centos
6380
64- [openhpc_compute]
65- openhpc-compute-0 ansible_host=10.60.253.33 ansible_user=centos
81+ To drain nodes, for example, before scaling down the cluster to 6 nodes:
6682
67- [cluster_control:children]
68- openhpc_login
83+ ---
84+ - hosts: openstack
85+ gather_facts: false
86+ vars:
87+ partition: "{{ cluster_group.output_value | selectattr('group', 'equalto', item.name) | list }}"
88+ openhpc_slurm_partitions:
89+ - name: "compute"
90+ flavor: "compute-A"
91+ image: "CentOS7.5-OpenHPC"
92+ num_nodes: 6
93+ user: "centos"
94+ openhpc_cluster_name: openhpc
95+ roles:
96+ # Our stackhpc.cluster-infra role can be invoked in `query` mode which
97+ # looks up the state of the cluster by querying the Heat API.
98+ - role: stackhpc.cluster-infra
99+ cluster_name: "{{ cluster_name }}"
100+ cluster_state: query
101+ cluster_params:
102+ cluster_groups: "{{ cluster_groups }}"
103+ tasks:
104+ # Given that the original cluster that was created had 8 nodes and the
105+ # cluster we want to create has 6 nodes, the computed desired_state
106+ # variable stores the list of instances to leave untouched.
107+ - name: Count the number of compute nodes per slurm partition
108+ set_fact:
109+ desired_state: "{{ (( partition | first).nodes | map(attribute='name') | list )[:item.num_nodes] + desired_state | default([]) }}"
110+ when: partition | length > 0
111+ with_items: "{{ openhpc_slurm_partitions }}"
112+ - debug: var=desired_state
113+
114+ - hosts: cluster_batch
115+ become: yes
116+ vars:
117+ desired_state: "{{ hostvars['localhost']['desired_state'] | default([]) }}"
118+ roles:
119+ # Now, the stackhpc.openhpc role is invoked in drain/resume modes where
120+ # the instances in desired_state are resumed if in a drained state and
121+ # drained if in a resumed state.
122+ - role: stackhpc.openhpc
123+ openhpc_slurm_control_host: "{{ groups['cluster_control'] | first }}"
124+ openhpc_enable:
125+ drain: "{{ inventory_hostname not in desired_state }}"
126+ resume: "{{ inventory_hostname in desired_state }}"
127+ ...
69128
70- [cluster_batch:children]
71- openhpc_compute
0 commit comments