Skip to content

Commit cfa2d26

Browse files
authored
Merge pull request #2 from stackhpc/mark-verbal-feedback
Incorporate Mark's verbal feedback
2 parents affd5d3 + f7edc10 commit cfa2d26

File tree

12 files changed

+157
-37
lines changed

12 files changed

+157
-37
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
*.swp
2+
*.retry
3+
*.pyc

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ install:
2727
script:
2828
# Basic role syntax check
2929
- ansible-playbook tests/test.yml -i tests/inventory --syntax-check
30+
- ansible-playbook tests/filter.yml -i tests/inventory -i tests/inventory-mock-groups
3031

3132
notifications:
3233
webhooks: https://galaxy.ansible.com/api/v1/notifications/

defaults/main.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
---
22
slurm_service_enabled: true
33
slurm_service: slurmd
4-
slurm_control_host: "{{ groups['cluster_login'] | first }}"
4+
slurm_control_host: # Example: "{{ groups['cluster_login'] | first }}"
5+
slurm_partitions: []
6+
slurm_cluster_name:
7+
openhpc_packages: []
58
openhpc_enable:
69
control: false
710
batch: false

filter_plugins/group_hosts.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Copyright (c) 2019 StackHPC Ltd.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
4+
# not use this file except in compliance with the License. You may obtain
5+
# a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12+
# License for the specific language governing permissions and limitations
13+
# under the License.
14+
15+
16+
from ansible import errors
17+
import jinja2
18+
import re
19+
20+
# Pattern to match a hostname with numerical ending
21+
pattern = re.compile("^(.*\D(?=\d))(\d+)$")
22+
23+
def _get_hostvar(context, var_name, inventory_hostname=None):
24+
if inventory_hostname is None:
25+
namespace = context
26+
else:
27+
if inventory_hostname not in context['hostvars']:
28+
raise errors.AnsibleFilterError(
29+
"Inventory hostname '%s' not in hostvars" % inventory_hostname)
30+
namespace = context["hostvars"][inventory_hostname]
31+
return namespace.get(var_name)
32+
33+
@jinja2.contextfilter
34+
def group_hosts(context, group_names):
35+
return {g:_group_hosts(context["groups"].get(g, [])) for g in sorted(group_names)}
36+
37+
def _group_hosts(hosts):
38+
results = {}
39+
unmatchable = []
40+
for v in hosts:
41+
m = pattern.match(v)
42+
if m:
43+
prefix, suffix = m.groups()
44+
r = results.setdefault(prefix, [])
45+
r.append(int(suffix))
46+
else:
47+
unmatchable.append(v)
48+
return ['{}[{}]'.format(k, _group_numbers(v)) for k, v in results.iteritems()] + unmatchable
49+
50+
def _group_numbers(numbers):
51+
units = []
52+
prev = min(numbers)
53+
for v in sorted(numbers):
54+
if v == prev + 1:
55+
units[-1].append(v)
56+
else:
57+
units.append([v])
58+
prev = v
59+
return ','.join(['{}-{}'.format(u[0], u[-1]) if len(u) > 1 else str(u[0]) for u in units])
60+
61+
class FilterModule(object):
62+
63+
def filters(self):
64+
return {
65+
'group_hosts': group_hosts
66+
}

handlers/main.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,9 @@
44
name: "{{slurm_service}}"
55
state: reloaded
66
when: slurm_service_enabled | bool
7+
8+
- name: Restart Munge service
9+
service:
10+
name: "munge"
11+
state: restarted
12+
when: slurm_service_enabled | bool

tasks/infiniband.yml

Lines changed: 0 additions & 28 deletions
This file was deleted.

tasks/runtime.yml

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
11
---
2+
- name: Fail if slurm_control_host or slurm_cluster_name or slurm_partitions are undefined
3+
fail:
4+
msg: "Undefined slurm_control_host or slurm_cluster_name or slurm_partitions."
5+
when:
6+
slurm_control_host == none or
7+
slurm_cluster_name == none or
8+
slurm_partitions | length == 0
9+
210
- name: Install OpenHPC runtime Slurm packages
311
yum:
412
name:
@@ -16,9 +24,34 @@
1624
mode: 0755
1725
state: directory
1826

19-
- name: What is the service
20-
debug:
21-
var: slurm_service
27+
- name: Ensure the Munge service is enabled
28+
service:
29+
name: munge
30+
enabled: "{{ slurm_service_enabled | bool }}"
31+
notify:
32+
- Restart Munge service
33+
34+
- name: Generate a Munge key for the platform
35+
command: "dd if=/dev/urandom of=/etc/munge/munge.key bs=1 count=1024"
36+
args:
37+
creates: "/etc/munge/munge.key"
38+
when: inventory_hostname == slurm_control_host
39+
40+
- name: Retrieve Munge key from Slurm control host
41+
slurp:
42+
src: "/etc/munge/munge.key"
43+
register: slurm_munge_key
44+
when: inventory_hostname == slurm_control_host
45+
46+
- name: Write Munge key
47+
copy:
48+
content: "{{ hostvars[slurm_control_host]['slurm_munge_key']['content'] | b64decode }}"
49+
dest: "/etc/munge/munge.key"
50+
owner: munge
51+
group: munge
52+
mode: 0400
53+
notify:
54+
- Restart Munge service
2255

2356
- name: Ensure SLURM services are enabled
2457
service:

templates/slurm.conf.j2

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#
99
# See the slurm.conf man page for more information.
1010
#
11-
ClusterName={{ cluster_name }}
11+
ClusterName={{ slurm_cluster_name }}
1212
ControlMachine={{ slurm_control_host }}
1313
#ControlAddr=
1414
#BackupController=
@@ -104,11 +104,11 @@ AccountingStorageType=accounting_storage/filetxt
104104
Epilog=/etc/slurm/slurm.epilog.clean
105105
{% for part in slurm_partitions %}
106106
{% for group in part.get('groups', [part]) %}
107-
NodeName={{group.cluster_name|default(cluster_name)}}-{{group.name}}-[0-{{group.num_nodes-1}}] \
107+
NodeName={{group.cluster_name|default(slurm_cluster_name)}}-{{group.name}}-[0-{{group.num_nodes-1}}] \
108108
{% if 'ram_mb' in group %}
109109
RealMemory={{group.ram_mb}} \
110110
{% endif %}
111-
{% set group_name = group.cluster_name|default(cluster_name) ~ '_' ~ group.name %}
111+
{% set group_name = group.cluster_name|default(slurm_cluster_name) ~ '_' ~ group.name %}
112112
{# If using --limit, the first host in each group may not have facts available. Find one that does. #}
113113
{% set group_hosts = groups[group_name] | intersect(play_hosts) %}
114114
{% if group_hosts | length > 0 %}
@@ -121,7 +121,7 @@ NodeName={{group.cluster_name|default(cluster_name)}}-{{group.name}}-[0-{{group.
121121
{% endfor %}
122122
{% endfor %}
123123
{% for part in slurm_partitions %}
124-
PartitionName={{part.name}} Nodes={% for group in part.get('groups', [part]) %}{{group.cluster_name|default(cluster_name)}}-{{group.name}}-[0-{{group.num_nodes-1}}]{% if not loop.last %},{% endif %}{% endfor %} Default=YES MaxTime=24:00:00 State=UP
124+
PartitionName={{part.name}} Nodes={% for group in part.get('groups', [part]) %}{{group.cluster_name|default(slurm_cluster_name)}}-{{group.name}}-[0-{{group.num_nodes-1}}]{% if not loop.last %},{% endif %}{% endfor %} Default=YES MaxTime=24:00:00 State=UP
125125
{% endfor %}
126126
# Want nodes that drop out of SLURM's configuration to be automatically
127127
# returned to service when they come back.

tests/filter.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
---
2+
- hosts: openstack
3+
connection: local
4+
gather_facts: false
5+
vars:
6+
mock_groups:
7+
- mock-group-0
8+
- mock-group-1
9+
grouped_hosts: "{{ mock_groups | group_hosts }}"
10+
tasks:
11+
- name: Hosts
12+
debug: var=grouped_hosts
13+
- name: Test filter
14+
assert:
15+
that:
16+
- "['localhost-0-[0-3,5]', 'localhost-non-numerical'] == grouped_hosts['mock-group-0']"
17+
- "['localhost-1-[1-2,4-5,10]', 'localhost-2-[1-3]'] == grouped_hosts['mock-group-1']"
18+
msg: "Some assertions did not pass" # alias for fail_msg in 2.7+
19+
...

tests/filter_plugins

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../filter_plugins

0 commit comments

Comments
 (0)