Skip to content

Commit ec07266

Browse files
committed
Fix templating
1 parent 0682178 commit ec07266

File tree

3 files changed

+8
-7
lines changed

3 files changed

+8
-7
lines changed

defaults/main.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ openhpc_retry_delay: 10
1313
openhpc_job_maxtime: '60-0' # quote this to avoid ansible converting some formats to seconds, which is interpreted as minutes by Slurm
1414
openhpc_config: "{{ openhpc_extra_config | default({}) }}"
1515
openhpc_gres_template: gres.conf.j2
16-
openhpc_gres_autodetect: 'disabled'
16+
# Autodetection mechanism to use: https://slurm.schedmd.com/gres.html#AutoDetect e.g nvml.
17+
# NOTE: 'off' is a special sentinal value to indicate that autodetection should not be used.
18+
openhpc_gres_autodetect: 'off'
1719
openhpc_slurm_configless: "{{ 'enable_configless' in openhpc_config.get('SlurmctldParameters', []) }}"
1820

1921
openhpc_state_save_location: /var/spool/slurm

filter_plugins/slurm_conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def group_by_gres_autodetect(context, hosts):
7272
"""
7373
result = defaultdict(list)
7474
for host in hosts:
75-
gres_autodetect = _get_hostvar(context, 'gres_autodetect', inventory_hostname=host)
75+
gres_autodetect = _get_hostvar(context, 'openhpc_gres_autodetect', inventory_hostname=host) or 'off'
7676
result[gres_autodetect].append(host)
7777
return result
7878

templates/gres.conf.j2

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,28 @@
11
AutoDetect=off
22
{% for part in openhpc_slurm_partitions %}
33
{% set nodelist = [] %}
4-
{% set donehosts = set() %}
4+
{% set donehosts = [] | unique %}
55
{% for group in part.get('groups', [part]) %}
66
{% if 'gres' in group %}
77
{% set group_name = group.cluster_name|default(openhpc_cluster_name) ~ '_' ~ group.name %}
88
{% set inventory_group_hosts = groups.get(group_name, []) %}
99
{% set autodetect_mechanisms = inventory_group_hosts | group_by_gres_autodetect %}
1010
{% for mechanism, _mechanism_hosts in autodetect_mechanisms.items() %}
11-
{% set mechanism_hosts = _mechanism_hosts - donehosts %}
11+
{% set mechanism_hosts = _mechanism_hosts | difference(donehosts) %}
1212
{% if mechanism != 'disabled' %}
1313
{% for hostlist in (mechanism_hosts | hostlist_expression) %}
1414
NodeName={{ hostlist }} AutoDetect={{ mechanism }}
15-
1615
{% endfor %}
1716
{% else %}
1817
{% for gres in group.gres %}
1918
{% set gres_name, gres_type, _ = gres.conf.split(':') %}
2019
{% for hostlist in (mechanism_hosts | hostlist_expression) %}
21-
NodeName={{ hostlist }} Name={{ gres_name }} Type={{ gres_type }} File={{ gres.file }}
20+
NodeName={{ hostlist }} Name={{ gres_name }} Type={{ gres_type }} File={{ gres.file | mandatory('The gres configuration dictionary: ' ~ gres ~ ' is missing the file key, but openhpc_gres_autodetect is set to off. The error occured on partition: ' ~ part.name ~ '. Please add the file key or set openhpc_gres_autodetect.') }}
2221

2322
{% endfor %}
2423
{% endfor %}
2524
{% endif %}
26-
{% set _ = donehosts.update(mechanism_hosts) %}
25+
{% set donehosts = donehosts | union(mechanism_hosts) %}
2726
{% endfor %}
2827
{% endif %}
2928
{% endfor %}

0 commit comments

Comments
 (0)