Skip to content

Commit 19d4f9f

Browse files
jianguo-ukaeajovial
authored andcommitted
Fix duplication of nodes in NodeName
Prior to this change slurmctld would fail to start if adding nodes to multiple partitions e.g: openhpc_slurm_partitions: - name: allnodes groups: - name: alfa - name: beta partition_params: Priority: 50 default: YES - name: alfa partition_params: Priority: 100 - name: bravo partition_params: Priority: 100 This change tracks all nodes that already have a NodeName entry and will not add again.
1 parent a599130 commit 19d4f9f

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

templates/slurm.conf.j2

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,10 @@ NodeName={{ node }}
150150
# OpenHPC default configuration
151151
PropagateResourceLimitsExcept=MEMLOCK
152152
Epilog=/etc/slurm/slurm.epilog.clean
153+
{% set donehosts = [] %}
153154
{% for part in openhpc_slurm_partitions %}
154155
{% set nodelist = [] %}
155156
{% for group in part.get('groups', [part]) %}
156-
157157
{% set group_name = group.cluster_name|default(openhpc_cluster_name) ~ '_' ~ group.name %}
158158
# openhpc_slurm_partitions group: {{ group_name }}
159159
{% set inventory_group_hosts = groups.get(group_name, []) %}
@@ -164,9 +164,11 @@ Epilog=/etc/slurm/slurm.epilog.clean
164164
{% set ram_mb = (first_host_hv['ansible_memory_mb']['real']['total'] * (group.ram_multiplier | default(openhpc_ram_multiplier))) | int %}
165165
{% for hostlist in (inventory_group_hosts | hostlist_expression) %}
166166
{% set gres = ' Gres=%s' % (','.join(group.gres | map(attribute='conf') )) if 'gres' in group else '' %}
167-
167+
{% if hostlist not in donehosts %}
168168
NodeName={{ hostlist }} State=UNKNOWN RealMemory={{ group.get('ram_mb', ram_mb) }} Sockets={{first_host_hv['ansible_processor_count']}} CoresPerSocket={{ first_host_hv['ansible_processor_cores'] }} ThreadsPerCore={{ first_host_hv['ansible_processor_threads_per_core'] }}{{ gres }}
169+
{% endif %}
169170
{% set _ = nodelist.append(hostlist) %}
171+
{% set _ = donehosts.append(hostlist) %}
170172
{% endfor %}{# nodes #}
171173
{% endif %}{# inventory_group_hosts #}
172174
{% for extra_node_defn in group.get('extra_nodes', []) %}

0 commit comments

Comments
 (0)