From 19d4f9f5347b407888d10d296a0d32edb6b78d40 Mon Sep 17 00:00:00 2001 From: Jianguo Rao Date: Fri, 8 Nov 2024 14:56:57 +0000 Subject: [PATCH] Fix duplication of nodes in NodeName Prior to this change slurmctld would fail to start if adding nodes to multiple partitions e.g: openhpc_slurm_partitions: - name: allnodes groups: - name: alfa - name: beta partition_params: Priority: 50 default: YES - name: alfa partition_params: Priority: 100 - name: bravo partition_params: Priority: 100 This change tracks all nodes that already have a NodeName entry and will not add again. --- templates/slurm.conf.j2 | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/templates/slurm.conf.j2 b/templates/slurm.conf.j2 index 1b288cf..67cc180 100644 --- a/templates/slurm.conf.j2 +++ b/templates/slurm.conf.j2 @@ -150,10 +150,10 @@ NodeName={{ node }} # OpenHPC default configuration PropagateResourceLimitsExcept=MEMLOCK Epilog=/etc/slurm/slurm.epilog.clean +{% set donehosts = [] %} {% for part in openhpc_slurm_partitions %} {% set nodelist = [] %} {% for group in part.get('groups', [part]) %} - {% set group_name = group.cluster_name|default(openhpc_cluster_name) ~ '_' ~ group.name %} # openhpc_slurm_partitions group: {{ group_name }} {% set inventory_group_hosts = groups.get(group_name, []) %} @@ -164,9 +164,11 @@ Epilog=/etc/slurm/slurm.epilog.clean {% set ram_mb = (first_host_hv['ansible_memory_mb']['real']['total'] * (group.ram_multiplier | default(openhpc_ram_multiplier))) | int %} {% for hostlist in (inventory_group_hosts | hostlist_expression) %} {% set gres = ' Gres=%s' % (','.join(group.gres | map(attribute='conf') )) if 'gres' in group else '' %} - + {% if hostlist not in donehosts %} NodeName={{ hostlist }} State=UNKNOWN RealMemory={{ group.get('ram_mb', ram_mb) }} Sockets={{first_host_hv['ansible_processor_count']}} CoresPerSocket={{ first_host_hv['ansible_processor_cores'] }} ThreadsPerCore={{ first_host_hv['ansible_processor_threads_per_core'] }}{{ gres }} + {% endif %} {% set _ = nodelist.append(hostlist) %} + {% set _ = donehosts.append(hostlist) %} {% endfor %}{# nodes #} {% endif %}{# inventory_group_hosts #} {% for extra_node_defn in group.get('extra_nodes', []) %}