Skip to content

Commit 6ebea62

Browse files
committed
auto gres v5 - proper top-level/overrride
1 parent 56317bb commit 6ebea62

File tree

4 files changed

+41
-109
lines changed

4 files changed

+41
-109
lines changed

defaults/main.yml

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ openhpc_default_config:
4141
PropagateResourceLimitsExcept: MEMLOCK
4242
Epilog: /etc/slurm/slurm.epilog.clean
4343
ReturnToService: 2
44-
GresTypes: "{{ ohpc_nodegroup_gres_types if ohpc_nodegroup_gres_types != '' else 'omit' }}"
44+
GresTypes: "{{ ohpc_gres_types | flatten | sort | unique | join(',') if ohpc_gres_types else 'omit' }}"
4545
openhpc_cgroup_default_config:
4646
ConstrainCores: "yes"
4747
ConstrainDevices: "yes"
@@ -50,19 +50,18 @@ openhpc_cgroup_default_config:
5050

5151
openhpc_config: {}
5252
openhpc_cgroup_config: {}
53-
ohpc_nodegroup_gres_types: >-
54-
{% if openhpc_gres_autodetect == 'nvml' %}
55-
gpu
56-
{% else %}
57-
{{
58-
openhpc_nodegroups |
59-
community.general.json_query('[].gres[].conf') |
60-
map('split', ':') |
61-
map('first') |
62-
unique |
63-
join(',')
64-
}}
65-
{% endif %}
53+
ohpc_gres_types:
54+
# toplevel nvml autodetect:
55+
- "{{ ['gpu'] if openhpc_gres_autodetect == 'nvml' else [] }}"
56+
# nodegroup nvml autodetect:
57+
- "{{ ['gpu'] if openhpc_nodegroups | map(attribute='gres_autodetect', default='') | unique | select('eq', 'nvml') else [] }}"
58+
# nodegroup specific gres conf:
59+
- "{{
60+
openhpc_nodegroups |
61+
community.general.json_query('[].gres[].conf') |
62+
map('regex_search', '^(\\w+)')
63+
}}"
64+
6665
openhpc_gres_template: gres.conf.j2
6766
openhpc_cgroup_template: cgroup.conf.j2
6867

files/nodegroup.schema

Lines changed: 0 additions & 86 deletions
This file was deleted.

tasks/validate.yml

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,35 @@
2121
delegate_to: localhost
2222
run_once: true
2323

24-
- name: Validate openhpc_nodegroups
25-
ansible.utils.validate:
26-
criteria: "{{ lookup('file', 'nodegroup.schema') }}"
27-
engine: 'ansible.utils.jsonschema'
28-
data: "{{ item }}"
29-
vars:
30-
ansible_jsonschema_draft: '2020-12'
24+
25+
# - name: Validate openhpc_nodegroups contain name
26+
# ansible.builtin.assert:
27+
# that:
28+
# - "'name' in nodegroup
29+
# - openhpc_cluster_name ~ '_' ~ nodegroup.name in group_names
30+
# fail_msg: "nodegroup does not specify a name or openhpc_cluster_name ~ '_' ~ nodegroup.name not found in inventory"
31+
32+
# loop: "{{ openhpc_nodegroups }}"
33+
34+
- name: debug openhpc_nodegroups
35+
ansible.builtin.debug:
36+
msg: "{{ (item.gres | select('contains', 'file')) }} == {{ (item.gres | length) }}"
37+
loop: "{{ openhpc_nodegroups }}"
38+
when:
39+
- item.gres_autodetect | default(openhpc_gres_autodetect) == 'off'
40+
- "'gres' in item"
3141
delegate_to: localhost
42+
run_once: true
43+
44+
- name: Validate openhpc_nodegroups
45+
ansible.builtin.assert:
46+
that: "(item.gres | select('contains', 'file') | length) == (item.gres | length)"
47+
fail_msg: "GRES configuration(s) in openhpc_nodegroup '{{ item.name }}' do not include 'file' but GRES autodetection is not enabled"
3248
loop: "{{ openhpc_nodegroups }}"
49+
when:
50+
- item.gres_autodetect | default(openhpc_gres_autodetect) == 'off'
51+
- "'gres' in item"
52+
delegate_to: localhost
3353
run_once: true
3454

3555
- name: Fail if partition configuration is outdated

templates/gres.conf.j2

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
AutoDetect={{ openhpc_gres_autodetect }}
22
{% for nodegroup in openhpc_nodegroups %}
3-
{% set nodegroup_gres_autodetect = nodegroup.gres_autodetect | default('off') %}
43
{% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %}
54
{% set inventory_group_hosts = groups.get(inventory_group_name, []) %}
65
{% set hostlist_string = inventory_group_hosts | hostlist_expression | join(',') %}
76
{% for gres in nodegroup.gres | default([]) %}
87
{% set gres_name, gres_type, _ = gres.conf.split(':') %}
9-
NodeName={{ hostlist_string }} AutoDetect={{ nodegroup_gres_autodetect }} Name={{ gres_name }} Type={{ gres_type }}{% if openhpc_gres_autodetect == nodegroup_gres_autodetect == 'off' %} File={{ gres.file | mandatory('The gres configuration dictionary: ' ~ gres ~ ' is missing the file key, but gres_autodetect is not specified. The error occured on node group: ' ~ nodegroup.name ~ '. Please add the file key or set gres_autodetect.') }}{% endif %}
8+
NodeName={{ hostlist_string }}{% if 'gres_autodetect' in nodegroup %} AutoDetect={{ nodegroup.gres_autodetect }}{% endif %} Name={{ gres_name }} Type={{ gres_type }}{% if 'file' in gres %} File={{ gres.file }}{% endif %}
109
{% endfor %}{# gres #}
1110
{% endfor %}{# nodegroup #}

0 commit comments

Comments
 (0)