From ba8a38aa2900cdaf2634ea772035d73fbbea2db2 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 7 May 2025 12:59:35 +0000 Subject: [PATCH 1/9] nodegroups using nodesets - doesn't handle empty nodegroups --- README.md | 52 ++++++++++++++++++----------- defaults/main.yml | 3 +- templates/slurm.conf.j2 | 72 ++++++++++++++++++++--------------------- 3 files changed, 71 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index 34c6908..d5ce75d 100644 --- a/README.md +++ b/README.md @@ -50,30 +50,44 @@ each list element: ### slurm.conf -`openhpc_slurm_partitions`: Optional. List of one or more slurm partitions, default `[]`. Each partition may contain the following values: -* `groups`: If there are multiple node groups that make up the partition, a list of group objects can be defined here. - Otherwise, `groups` can be omitted and the following attributes can be defined in the partition object: - * `name`: The name of the nodes within this group. - * `cluster_name`: Optional. An override for the top-level definition `openhpc_cluster_name`. - * `extra_nodes`: Optional. A list of additional node definitions, e.g. for nodes in this group/partition not controlled by this role. Each item should be a dict, with keys/values as per the ["NODE CONFIGURATION"](https://slurm.schedmd.com/slurm.conf.html#lbAE) docs for slurm.conf. Note the key `NodeName` must be first. - * `ram_mb`: Optional. The physical RAM available in each node of this group ([slurm.conf](https://slurm.schedmd.com/slurm.conf.html) parameter `RealMemory`) in MiB. This is set using ansible facts if not defined, equivalent to `free --mebi` total * `openhpc_ram_multiplier`. - * `ram_multiplier`: Optional. An override for the top-level definition `openhpc_ram_multiplier`. Has no effect if `ram_mb` is set. +`openhpc_nodegroups`: Optional, default `[]`. List of mappings, each defining a +unique set of homogenous nodes: + * `name`: Required. Name of node group. + * `ram_mb`: Optional. The physical RAM available in each node of this group + ([slurm.conf](https://slurm.schedmd.com/slurm.conf.html) parameter `RealMemory`) + in MiB. This is set using ansible facts if not defined, equivalent to + `free --mebi` total * `openhpc_ram_multiplier`. + * `ram_multiplier`: Optional. An override for the top-level definition + `openhpc_ram_multiplier`. Has no effect if `ram_mb` is set. * `gres`: Optional. List of dicts defining [generic resources](https://slurm.schedmd.com/gres.html). Each dict must define: - `conf`: A string with the [resource specification](https://slurm.schedmd.com/slurm.conf.html#OPT_Gres_1) but requiring the format `::`, e.g. `gpu:A100:2`. Note the `type` is an arbitrary string. - `file`: A string with the [File](https://slurm.schedmd.com/gres.conf.html#OPT_File) (path to device(s)) for this resource, e.g. `/dev/nvidia[0-1]` for the above example. - Note [GresTypes](https://slurm.schedmd.com/slurm.conf.html#OPT_GresTypes) must be set in `openhpc_config` if this is used. - -* `default`: Optional. A boolean flag for whether this partion is the default. Valid settings are `YES` and `NO`. -* `maxtime`: Optional. A partition-specific time limit following the format of [slurm.conf](https://slurm.schedmd.com/slurm.conf.html) parameter `MaxTime`. The default value is + * `params`: Optional. Mapping of additional parameters and values for + [node configuration](https://slurm.schedmd.com/slurm.conf.html#lbAE). + + Each nodegroup will contain hosts from an Ansible inventory group named + `{{ openhpc_cluster_name }}_{{ group_name}}`. Note that: + - Each host may only appear in one nodegroup. + - Hosts in a nodegroup are assumed to be homogenous in terms of processor and memory. + - Hosts may have arbitrary hostnames, but these should be lowercase to avoid a + mismatch between inventory and actual hostname. + - An inventory group may be missing or empty, in which case the node group + contains no hosts. + - If the inventory group is not empty the play must contain at least one host. + This is used to set `Sockets`, `CoresPerSocket`, `ThreadsPerCore` and + optionally `RealMemory` for the nodegroup. + +`openhpc_partitions`: Optional, default `[]`. List of mappings, each defining a +partition. Each partition mapping may contain: + * `name`: Required. Name of partition. + * `groups`: Optional. List of nodegroup names. If omitted, the partition name + is assumed to match a nodegroup name. + * `default`: Optional. A boolean flag for whether this partion is the default. Valid settings are `YES` and `NO`. + * `maxtime`: Optional. A partition-specific time limit following the format of [slurm.conf](https://slurm.schedmd.com/slurm.conf.html) parameter `MaxTime`. The default value is given by `openhpc_job_maxtime`. The value should be quoted to avoid Ansible conversions. -* `partition_params`: Optional. Mapping of additional parameters and values for [partition configuration](https://slurm.schedmd.com/slurm.conf.html#SECTION_PARTITION-CONFIGURATION). - -For each group (if used) or partition any nodes in an ansible inventory group `_` will be added to the group/partition. Note that: -- Nodes may have arbitrary hostnames but these should be lowercase to avoid a mismatch between inventory and actual hostname. -- Nodes in a group are assumed to be homogenous in terms of processor and memory. -- An inventory group may be empty or missing, but if it is not then the play must contain at least one node from it (used to set processor information). - + * `params`: Optional. Mapping of additional parameters and values for + [partition configuration](https://slurm.schedmd.com/slurm.conf.html#SECTION_PARTITION-CONFIGURATION). `openhpc_job_maxtime`: Maximum job time limit, default `'60-0'` (60 days). See [slurm.conf](https://slurm.schedmd.com/slurm.conf.html) parameter `MaxTime` for format. The default is 60 days. The value should be quoted to avoid Ansible conversions. diff --git a/defaults/main.yml b/defaults/main.yml index c806809..d96af28 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -4,7 +4,8 @@ openhpc_slurm_service_started: "{{ openhpc_slurm_service_enabled }}" openhpc_slurm_service: openhpc_slurm_control_host: "{{ inventory_hostname }}" #openhpc_slurm_control_host_address: -openhpc_slurm_partitions: [] +openhpc_partitions: [] +openhpc_nodegroups: [] openhpc_cluster_name: openhpc_packages: - slurm-libpmi-ohpc diff --git a/templates/slurm.conf.j2 b/templates/slurm.conf.j2 index 94f0465..f0b0886 100644 --- a/templates/slurm.conf.j2 +++ b/templates/slurm.conf.j2 @@ -135,9 +135,6 @@ SlurmdSyslogDebug=info #SlurmSchedLogFile= #SlurmSchedLogLevel= #DebugFlags= -# -# -# POWER SAVE SUPPORT FOR IDLE NODES - NOT SUPPORTED IN THIS APPLIANCE VERSION # LOGIN-ONLY NODES # Define slurmd nodes not in partitions for login-only nodes in "configless" mode: @@ -145,45 +142,48 @@ SlurmdSyslogDebug=info NodeName={{ node }} {% endfor %}{% endif %} -# COMPUTE NODES -# OpenHPC default configuration PropagateResourceLimitsExcept=MEMLOCK Epilog=/etc/slurm/slurm.epilog.clean -{% set donehosts = [] %} -{% for part in openhpc_slurm_partitions %} - {% set nodelist = [] %} - {% for group in part.get('groups', [part]) %} - {% set group_name = group.cluster_name|default(openhpc_cluster_name) ~ '_' ~ group.name %} -# openhpc_slurm_partitions group: {{ group_name }} - {% set inventory_group_hosts = groups.get(group_name, []) %} - {% if inventory_group_hosts | length > 0 %} - {% set play_group_hosts = inventory_group_hosts | intersect (play_hosts) %} - {% set first_host = play_group_hosts | first | mandatory('Group "' ~ group_name ~ '" contains no hosts in this play - was --limit used?') %} - {% set first_host_hv = hostvars[first_host] %} - {% set ram_mb = (first_host_hv['ansible_memory_mb']['real']['total'] * (group.ram_multiplier | default(openhpc_ram_multiplier))) | int %} - {% for hostlist in (inventory_group_hosts | hostlist_expression) %} - {% set gres = ' Gres=%s' % (','.join(group.gres | map(attribute='conf') )) if 'gres' in group else '' %} - {% if hostlist not in donehosts %} -NodeName={{ hostlist }} State=UNKNOWN RealMemory={{ group.get('ram_mb', ram_mb) }} Sockets={{first_host_hv['ansible_processor_count']}} CoresPerSocket={{ first_host_hv['ansible_processor_cores'] }} ThreadsPerCore={{ first_host_hv['ansible_processor_threads_per_core'] }}{{ gres }} - {% endif %} - {% set _ = nodelist.append(hostlist) %} - {% set _ = donehosts.append(hostlist) %} - {% endfor %}{# nodes #} - {% endif %}{# inventory_group_hosts #} - {% for extra_node_defn in group.get('extra_nodes', []) %} -{{ extra_node_defn.items() | map('join', '=') | join(' ') }} - {% set _ = nodelist.append(extra_node_defn['NodeName']) %} - {% endfor %} - {% endfor %}{# group #} -{% if not nodelist %}{# empty partition #} -{% set nodelist = ['""'] %} -{% endif %} -PartitionName={{part.name}} Default={{ part.get('default', 'YES') }} MaxTime={{ part.get('maxtime', openhpc_job_maxtime) }} State=UP Nodes={{ nodelist | join(',') }} {{ part.partition_params | default({}) | dict2parameters }} -{% endfor %}{# partitions #} + +# COMPUTE NODES +# OpenHPC default configuration +{% for nodegroup in openhpc_nodegroups %} +{% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %} +{% set inventory_group_hosts = groups.get(inventory_group_name, []) %} +{% if inventory_group_hosts | length > 0 %} +{% set play_group_hosts = inventory_group_hosts | intersect (play_hosts) %} +{% set first_host = play_group_hosts | first | mandatory('Inventory group "' ~ inventory_group_name ~ '" contains no hosts in this play - was --limit used?') %} +{% set first_host_hv = hostvars[first_host] %} +{% set ram_mb = (first_host_hv['ansible_memory_mb']['real']['total'] * (nodegroup.ram_multiplier | default(openhpc_ram_multiplier))) | int %} +{% set hostlists = (inventory_group_hosts | hostlist_expression) %}{# hosts in inventory group aren't necessarily a single hostlist expression #} +{% for hostlist in hostlists %} +NodeName={{ hostlist }} {{ '' -}} + State=UNKNOWN {{ '' -}} + RealMemory={{ nodegroup.ram_mb | default(ram_mb) }} {{ '' -}} + Sockets={{first_host_hv['ansible_processor_count'] }} {{ '' -}} + CoresPerSocket={{ first_host_hv['ansible_processor_cores'] }} {{ '' -}} + ThreadsPerCore={{ first_host_hv['ansible_processor_threads_per_core'] }} {{ '' -}} + {{ nodegroup.params | default({}) | dict2parameters }} {{ '' -}} + {% if 'gres' in nodegroup %}Gres={{ ','.join(nodegroup.gres | map(attribute='conf')) }}{% endif %} +{% endfor %}{# hostlists #} + +NodeSet={{ nodegroup.name }} Nodes={{ ','.join(hostlists) }}{# no support for creating nodesets by Feature #} +{% endif %}{# 1 or more hosts in inventory #} +{% endfor %} # Define a non-existent node, in no partition, so that slurmctld starts even with all partitions empty NodeName=nonesuch +# PARTITIONS +{% for partition in openhpc_partitions %} +PartitionName={{partition.name}} {{ '' -}} + Default={{ partition.get('default', 'YES') }} {{ '' -}} + MaxTime={{ partition.get('maxtime', openhpc_job_maxtime) }} {{ '' -}} + State=UP Nodes={{ partition.get('groups', [partition.name]) | join(',') }} {{ '' -}} + {{ partition.params | default({}) | dict2parameters }} +{% endfor %}{# openhpc_partitions #} + {% if openhpc_slurm_configless | bool %}SlurmctldParameters=enable_configless{% endif %} + ReturnToService=2 From 8f9436f9e6f157c0b51c790c55276caee2afdfa2 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 7 May 2025 13:04:48 +0000 Subject: [PATCH 2/9] cope with empty nodegroups/partitions --- templates/slurm.conf.j2 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/templates/slurm.conf.j2 b/templates/slurm.conf.j2 index f0b0886..d6c9f96 100644 --- a/templates/slurm.conf.j2 +++ b/templates/slurm.conf.j2 @@ -166,9 +166,10 @@ NodeName={{ hostlist }} {{ '' -}} {{ nodegroup.params | default({}) | dict2parameters }} {{ '' -}} {% if 'gres' in nodegroup %}Gres={{ ','.join(nodegroup.gres | map(attribute='conf')) }}{% endif %} {% endfor %}{# hostlists #} - -NodeSet={{ nodegroup.name }} Nodes={{ ','.join(hostlists) }}{# no support for creating nodesets by Feature #} {% endif %}{# 1 or more hosts in inventory #} + +NodeSet={{ nodegroup.name }} Nodes={{ ','.join(hostlists | default(['""'])) }}{# no support for creating nodesets by Feature #} + {% endfor %} # Define a non-existent node, in no partition, so that slurmctld starts even with all partitions empty From 0abbf768dad2beda08abe450226662302a817b93 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 7 May 2025 13:13:47 +0000 Subject: [PATCH 3/9] make gres work again --- templates/gres.conf.j2 | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/templates/gres.conf.j2 b/templates/gres.conf.j2 index a6fa27b..bc23ed5 100644 --- a/templates/gres.conf.j2 +++ b/templates/gres.conf.j2 @@ -1,16 +1,11 @@ AutoDetect=off -{% for part in openhpc_slurm_partitions %} -{% set nodelist = [] %} -{% for group in part.get('groups', [part]) %} -{% if 'gres' in group %} -{% for gres in group.gres %} -{% set gres_name, gres_type, _ = gres.conf.split(':') %} -{% set group_name = group.cluster_name|default(openhpc_cluster_name) ~ '_' ~ group.name %} -{% set inventory_group_hosts = groups.get(group_name, []) %} -{% for hostlist in (inventory_group_hosts | hostlist_expression) %} +{% for nodegroup in openhpc_nodegroups %} +{% for gres in nodegroup.gres | default([]) %} +{% set gres_name, gres_type, _ = gres.conf.split(':') %} +{% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %} +{% set inventory_group_hosts = groups.get(inventory_group_name, []) %} +{% for hostlist in (inventory_group_hosts | hostlist_expression) %} NodeName={{ hostlist }} Name={{ gres_name }} Type={{ gres_type }} File={{ gres.file }} -{% endfor %} -{% endfor %} -{% endif %} -{% endfor %} -{% endfor %} +{% endfor %}{# hostlists #} +{% endfor %}{# gres #} +{% endfor %}{# nodegroup #} From 04f3bbbb872cac8305ccb06035d15f1bd2c8ceaf Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 7 May 2025 15:49:29 +0000 Subject: [PATCH 4/9] template slurm.conf parameters from combined variables --- README.md | 7 +- defaults/main.yml | 32 ++++++++- tasks/runtime.yml | 36 +--------- templates/slurm.conf.j2 | 146 ++-------------------------------------- 4 files changed, 44 insertions(+), 177 deletions(-) diff --git a/README.md b/README.md index d5ce75d..d77663a 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,12 @@ partition. Each partition mapping may contain: `openhpc_cluster_name`: name of the cluster. -`openhpc_config`: Optional. Mapping of additional parameters and values for `slurm.conf`. Note these will override any included in `templates/slurm.conf.j2`. +`openhpc_config`: Optional. Mapping of additional parameters and values for +[slurm.conf](https://slurm.schedmd.com/slurm.conf.html). Keys are parameter +names and values are lists or strings as appropriate. This can be used to +supplement or override the template defaults, or to remove a template parameter +by setting the value to `'omit'` - note this is the literal string, not the +Ansible special variable. `openhpc_ram_multiplier`: Optional, default `0.95`. Multiplier used in the calculation: `total_memory * openhpc_ram_multiplier` when setting `RealMemory` for the partition in slurm.conf. Can be overriden on a per partition basis using `openhpc_slurm_partitions.ram_multiplier`. Has no effect if `openhpc_slurm_partitions.ram_mb` is set. diff --git a/defaults/main.yml b/defaults/main.yml index d96af28..56b15c4 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -12,7 +12,37 @@ openhpc_packages: openhpc_resume_timeout: 300 openhpc_retry_delay: 10 openhpc_job_maxtime: '60-0' # quote this to avoid ansible converting some formats to seconds, which is interpreted as minutes by Slurm -openhpc_config: "{{ openhpc_extra_config | default({}) }}" +openhpc_default_config: + # This only defines values which are not Slurm defaults + SlurmctldHost: "{{ openhpc_slurm_control_host }}{% if openhpc_slurm_control_host_address is defined %}({{ openhpc_slurm_control_host_address }}){% endif %}" + ProctrackType: proctrack/linuxproc # TODO: really want cgroup but needs cgroup.conf and workaround for CI + SlurmdSpoolDir: /var/spool/slurm # NB: not OpenHPC default! + SlurmUser: slurm + StateSaveLocation: "{{ openhpc_state_save_location }}" + SlurmctldTimeout: 300 + SchedulerType: sched/backfill + SelectType: select/cons_tres + SelectTypeParameters: CR_Core + PriorityWeightPartition: 1000 + PreemptType: preempt/partition_prio + PreemptMode: SUSPEND,GANG + AccountingStoragePass: "{{ openhpc_slurm_accounting_storage_pass | default('omit') }}" + AccountingStorageHost: "{{ openhpc_slurm_accounting_storage_host }}" + AccountingStoragePort: "{{ openhpc_slurm_accounting_storage_port }}" + AccountingStorageType: "{{ openhpc_slurm_accounting_storage_type }}" + AccountingStorageUser: "{{ openhpc_slurm_accounting_storage_user }}" + JobCompLoc: "{{ openhpc_slurm_job_comp_loc }}" + JobCompType: "{{ openhpc_slurm_job_comp_type }}" + JobAcctGatherFrequency: "{{ openhpc_slurm_job_acct_gather_frequency }}" + JobAcctGatherType: "{{ openhpc_slurm_job_acct_gather_type }}" + SlurmctldSyslogDebug: info + SlurmdSyslogDebug: info + PropagateResourceLimitsExcept: MEMLOCK + Epilog: /etc/slurm/slurm.epilog.clean + ReturnToService: 2 + SlurmctldParameters: "{{ 'enable_configless' if openhpc_slurm_configless else 'omit' }}" + +openhpc_config: {} openhpc_gres_template: gres.conf.j2 openhpc_slurm_configless: "{{ 'enable_configless' in openhpc_config.get('SlurmctldParameters', []) }}" diff --git a/tasks/runtime.yml b/tasks/runtime.yml index 18d75f7..358bf95 100644 --- a/tasks/runtime.yml +++ b/tasks/runtime.yml @@ -80,43 +80,9 @@ notify: Restart slurmdbd service when: openhpc_enable.database | default(false) | bool -- name: Make local tempfile for slurm.conf templating # ensures simultaneous runs don't clobber each other - ansible.builtin.tempfile: - register: _slurm_conf_tmpfile - delegate_to: localhost - when: openhpc_enable.control | default(false) or not openhpc_slurm_configless | bool - changed_when: false # so molecule doesn't fail - become: no - -- name: Template basic slurm.conf +- name: Template slurm.conf template: src: slurm.conf.j2 - dest: "{{ _slurm_conf_tmpfile.path }}" - lstrip_blocks: true - mode: 0644 - delegate_to: localhost - when: openhpc_enable.control | default(false) or not openhpc_slurm_configless | bool - changed_when: false # so molecule doesn't fail - become: no - -- name: Customise slurm.conf - community.general.ini_file: - path: "{{ _slurm_conf_tmpfile.path }}" - option: "{{ item.key }}" - section: '' - value: "{{ (item.value | join(',')) if (item.value is sequence and item.value is not string) else item.value }}" - no_extra_spaces: true - create: no - mode: 0644 - loop: "{{ openhpc_config | dict2items }}" - delegate_to: localhost - when: openhpc_enable.control | default(false) or not openhpc_slurm_configless | bool - changed_when: false # so molecule doesn't fail - become: no - -- name: Create slurm.conf - copy: - src: "{{ _slurm_conf_tmpfile.path }}" dest: /etc/slurm/slurm.conf owner: root group: root diff --git a/templates/slurm.conf.j2 b/templates/slurm.conf.j2 index d6c9f96..3fc7075 100644 --- a/templates/slurm.conf.j2 +++ b/templates/slurm.conf.j2 @@ -1,140 +1,12 @@ -# -# Example slurm.conf file. Please run configurator.html -# (in doc/html) to build a configuration file customized -# for your environment. -# -# -# slurm.conf file generated by configurator.html. -# -# See the slurm.conf man page for more information. -# ClusterName={{ openhpc_cluster_name }} -SlurmctldHost={{ openhpc_slurm_control_host }}{% if openhpc_slurm_control_host_address is defined %}({{ openhpc_slurm_control_host_address }}){% endif %} -#DisableRootJobs=NO -#EnforcePartLimits=NO -#EpilogSlurmctld= -#FirstJobId=1 -#MaxJobId=67043328 -#GresTypes= -#GroupUpdateForce=0 -#GroupUpdateTime=600 -#JobFileAppend=0 -#JobRequeue=1 -#JobSubmitPlugins=lua -#KillOnBadExit=0 -#LaunchType=launch/slurm -#Licenses=foo*4,bar -#MailProg=/bin/mail -#MaxJobCount=10000 -#MaxStepCount=40000 -#MaxTasksPerNode=512 -MpiDefault=none -#MpiParams=ports=#-# -#PluginDir= -#PlugStackConfig= -#PrivateData=jobs -ProctrackType=proctrack/linuxproc # TODO: really want cgroup but needs cgroup.conf and workaround for CI -#Prolog= -#PrologFlags= -#PrologSlurmctld= -#PropagatePrioProcess=0 -#PropagateResourceLimits= -#PropagateResourceLimitsExcept= -#RebootProgram= -SlurmctldPidFile=/var/run/slurmctld.pid -SlurmctldPort=6817 -SlurmdPidFile=/var/run/slurmd.pid -SlurmdPort=6818 -SlurmdSpoolDir=/var/spool/slurm # NB: not OpenHPC default! -SlurmUser=slurm -#SlurmdUser=root -#SrunEpilog= -#SrunProlog= -StateSaveLocation={{ openhpc_state_save_location }} -SwitchType=switch/none -#TaskEpilog= -#TaskPlugin=task/affinity -#TaskProlog= -#TopologyPlugin=topology/tree -#TmpFS=/tmp -#TrackWCKey=no -#TreeWidth= -#UnkillableStepProgram= -#UsePAM=0 -# -# -# TIMERS -#BatchStartTimeout=10 -#CompleteWait=0 -#EpilogMsgTime=2000 -#GetEnvTimeout=2 -#HealthCheckInterval=0 -#HealthCheckProgram= -InactiveLimit=0 -KillWait=30 -#MessageTimeout=10 -#ResvOverRun=0 -MinJobAge=300 -#OverTimeLimit=0 -SlurmctldTimeout=300 -SlurmdTimeout=300 -#UnkillableStepTimeout=60 -#VSizeFactor=0 -Waittime=0 -# -# -# SCHEDULING -#DefMemPerCPU=0 -#MaxMemPerCPU=0 -#SchedulerTimeSlice=30 -SchedulerType=sched/backfill -SelectType=select/cons_tres -SelectTypeParameters=CR_Core -# -# -# JOB PRIORITY -#PriorityFlags= -PriorityType=priority/multifactor -#PriorityDecayHalfLife= -#PriorityCalcPeriod= -#PriorityFavorSmall= -#PriorityMaxAge= -#PriorityUsageResetPeriod= -#PriorityWeightAge= -#PriorityWeightFairshare= -#PriorityWeightJobSize= -PriorityWeightPartition=1000 -#PriorityWeightQOS= -PreemptType=preempt/partition_prio -PreemptMode=SUSPEND,GANG -# -# LOGGING AND ACCOUNTING -#AccountingStorageEnforce=0 -AccountingStorageHost={{ openhpc_slurm_accounting_storage_host }} -{% if openhpc_slurm_accounting_storage_pass | default(false, true) %} -AccountingStoragePass={{ openhpc_slurm_accounting_storage_pass }} -{% endif %} -AccountingStoragePort={{ openhpc_slurm_accounting_storage_port }} -AccountingStorageType={{ openhpc_slurm_accounting_storage_type }} -AccountingStorageUser={{ openhpc_slurm_accounting_storage_user }} -#AccountingStoreFlags= -#JobCompHost= -JobCompLoc={{ openhpc_slurm_job_comp_loc }} -#JobCompPass= -#JobCompPort= -JobCompType={{ openhpc_slurm_job_comp_type }} -#JobCompUser= -#JobContainerType=job_container/none -JobAcctGatherFrequency={{ openhpc_slurm_job_acct_gather_frequency }} -JobAcctGatherType={{ openhpc_slurm_job_acct_gather_type }} +# PARAMETERS +{% for k, v in openhpc_default_config | combine(openhpc_config) | items %} +{% if v != "omit" %}{# allow removing items using setting key: null #} +{{ k }}={{ v | join(',') if (v is sequence and v is not string) else v }} +{% endif %} +{% endfor %} -# By default, SLURM will log to syslog, which is what we want -SlurmctldSyslogDebug=info -SlurmdSyslogDebug=info -#SlurmSchedLogFile= -#SlurmSchedLogLevel= -#DebugFlags= # LOGIN-ONLY NODES # Define slurmd nodes not in partitions for login-only nodes in "configless" mode: @@ -142,11 +14,8 @@ SlurmdSyslogDebug=info NodeName={{ node }} {% endfor %}{% endif %} -PropagateResourceLimitsExcept=MEMLOCK -Epilog=/etc/slurm/slurm.epilog.clean # COMPUTE NODES -# OpenHPC default configuration {% for nodegroup in openhpc_nodegroups %} {% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %} {% set inventory_group_hosts = groups.get(inventory_group_name, []) %} @@ -185,6 +54,3 @@ PartitionName={{partition.name}} {{ '' -}} {% endfor %}{# openhpc_partitions #} {% if openhpc_slurm_configless | bool %}SlurmctldParameters=enable_configless{% endif %} - - -ReturnToService=2 From 39a5393b26bff0c2731dffed0579a4664c54cd8f Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 27 May 2025 10:17:53 +0000 Subject: [PATCH 5/9] simplify omit description --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5e980e8..01a9de6 100644 --- a/README.md +++ b/README.md @@ -104,11 +104,10 @@ partition configuration for each. `openhpc_cluster_name`: name of the cluster. `openhpc_config`: Optional. Mapping of additional parameters and values for -[slurm.conf](https://slurm.schedmd.com/slurm.conf.html). Keys are parameter -names and values are lists or strings as appropriate. This can be used to -supplement or override the template defaults, or to remove a template parameter -by setting the value to `'omit'` - note this is the literal string, not the -Ansible special variable. +[slurm.conf](https://slurm.schedmd.com/slurm.conf.html). Keys are slurm.conf +parameter names and values are lists or strings as appropriate. This can be +used to supplement or override the template defaults. Templated parameters can +also be removed by setting the value to the literal string`'omit'`. `openhpc_ram_multiplier`: Optional, default `0.95`. Multiplier used in the calculation: `total_memory * openhpc_ram_multiplier` when setting `RealMemory` for the partition in slurm.conf. Can be overriden on a per partition basis using `openhpc_slurm_partitions.ram_multiplier`. Has no effect if `openhpc_slurm_partitions.ram_mb` is set. From eb98a09978e6fd3506e2256107f4ee99566a3d08 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 27 May 2025 10:32:48 +0000 Subject: [PATCH 6/9] remove dead code --- filter_plugins/slurm_conf.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/filter_plugins/slurm_conf.py b/filter_plugins/slurm_conf.py index 3f0ba56..9c38638 100644 --- a/filter_plugins/slurm_conf.py +++ b/filter_plugins/slurm_conf.py @@ -22,16 +22,6 @@ # Pattern to match a hostname with numerical ending pattern = re.compile("^(.*\D(?=\d))(\d+)$") -def _get_hostvar(context, var_name, inventory_hostname=None): - if inventory_hostname is None: - namespace = context - else: - if inventory_hostname not in context['hostvars']: - raise errors.AnsibleFilterError( - "Inventory hostname '%s' not in hostvars" % inventory_hostname) - namespace = context["hostvars"][inventory_hostname] - return namespace.get(var_name) - def hostlist_expression(hosts): """ Group hostnames using Slurm's hostlist expression format. From 8649af41b8f440192cf9e128761e40663572bf6e Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 27 May 2025 10:40:45 +0000 Subject: [PATCH 7/9] fixup SlurmctldParameters for configless mode --- defaults/main.yml | 1 - templates/slurm.conf.j2 | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/defaults/main.yml b/defaults/main.yml index 98f04c9..94ba868 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -40,7 +40,6 @@ openhpc_default_config: PropagateResourceLimitsExcept: MEMLOCK Epilog: /etc/slurm/slurm.epilog.clean ReturnToService: 2 - SlurmctldParameters: "{{ 'enable_configless' if openhpc_slurm_configless else 'omit' }}" openhpc_config: {} openhpc_gres_template: gres.conf.j2 diff --git a/templates/slurm.conf.j2 b/templates/slurm.conf.j2 index b1be623..cec2f5f 100644 --- a/templates/slurm.conf.j2 +++ b/templates/slurm.conf.j2 @@ -3,10 +3,14 @@ ClusterName={{ openhpc_cluster_name }} # PARAMETERS {% for k, v in openhpc_default_config | combine(openhpc_config) | items %} {% if v != "omit" %}{# allow removing items using setting key: null #} +{% if k != 'SlurmctldParameters' %}{# handled separately due to openhpc_slurm_configless #} {{ k }}={{ v | join(',') if (v is sequence and v is not string) else v }} +{% endif %} {% endif %} {% endfor %} +{% set slurmctldparameters = ((openhpc_config.get('SlurmctldParameters', []) + (['enable_configless'] if openhpc_slurm_configless | bool else [])) | unique) %} +SlurmctldParameters={{ slurmctldparameters | join(',') }} # LOGIN-ONLY NODES # Define slurmd nodes not in partitions for login-only nodes in "configless" mode: @@ -53,5 +57,3 @@ PartitionName={{partition.name}} {{ '' -}} Nodes={{ partition.get('nodegroups', [partition.name]) | map('regex_replace', '^', 'nodegroup_') | join(',') }} {{ '' -}} {{ partition.partition_params | default({}) | dict2parameters }} {% endfor %}{# openhpc_partitions #} - -{% if openhpc_slurm_configless | bool %}SlurmctldParameters=enable_configless{% endif %} From 81a33704054af6fc4201342aac8ad9ebbf7d63ba Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 27 May 2025 12:56:59 +0000 Subject: [PATCH 8/9] fix slurmctld when not defined --- templates/slurm.conf.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/templates/slurm.conf.j2 b/templates/slurm.conf.j2 index cec2f5f..3ffeff5 100644 --- a/templates/slurm.conf.j2 +++ b/templates/slurm.conf.j2 @@ -10,7 +10,9 @@ ClusterName={{ openhpc_cluster_name }} {% endfor %} {% set slurmctldparameters = ((openhpc_config.get('SlurmctldParameters', []) + (['enable_configless'] if openhpc_slurm_configless | bool else [])) | unique) %} +{% if slurmctldparameters | length > 0 %} SlurmctldParameters={{ slurmctldparameters | join(',') }} +{% endif %} # LOGIN-ONLY NODES # Define slurmd nodes not in partitions for login-only nodes in "configless" mode: From 771ed73852993c04a69ba005e5cd64fc5da38a76 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 27 May 2025 14:35:05 +0000 Subject: [PATCH 9/9] use base-64 encoded munge key --- README.md | 2 +- defaults/main.yml | 2 +- tasks/runtime.yml | 18 ++---------------- tasks/validate.yml | 9 ++++++++- 4 files changed, 12 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 01a9de6..0eb73f3 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ each list element: `openhpc_slurm_configless`: Optional, default false. If true then slurm's ["configless" mode](https://slurm.schedmd.com/configless_slurm.html) is used. -`openhpc_munge_key`: Optional. Define a munge key to use. If not provided then one is generated but the `openhpc_slurm_control_host` must be in the play. +`openhpc_munge_key_b64`: Optional. A base-64 encoded munge key. If not provided then the one generated on package install is used, but the `openhpc_slurm_control_host` must be in the play. `openhpc_login_only_nodes`: Optional. If using "configless" mode specify the name of an ansible group containing nodes which are login-only nodes (i.e. not also control nodes), if required. These nodes will run `slurmd` to contact the control node for config. diff --git a/defaults/main.yml b/defaults/main.yml index 94ba868..bb06672 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -125,7 +125,7 @@ ohpc_default_extra_repos: # Concatenate all repo definitions here ohpc_repos: "{{ ohpc_openhpc_repos[ansible_distribution_major_version] + ohpc_default_extra_repos[ansible_distribution_major_version] + openhpc_extra_repos }}" -openhpc_munge_key: +openhpc_munge_key_b64: openhpc_login_only_nodes: '' openhpc_module_system_install: true diff --git a/tasks/runtime.yml b/tasks/runtime.yml index b08a451..0ba2b12 100644 --- a/tasks/runtime.yml +++ b/tasks/runtime.yml @@ -18,31 +18,17 @@ state: directory when: inventory_hostname == openhpc_slurm_control_host -- name: Generate a Munge key on control host - # NB this is usually a no-op as the package install actually generates a (node-unique) one, so won't usually trigger handler - command: "dd if=/dev/urandom of=/etc/munge/munge.key bs=1 count=1024" - args: - creates: "/etc/munge/munge.key" - when: inventory_hostname == openhpc_slurm_control_host - - name: Retrieve Munge key from control host + # package install generates a node-unique one slurp: src: "/etc/munge/munge.key" register: openhpc_control_munge_key delegate_to: "{{ openhpc_slurm_control_host }}" when: openhpc_slurm_control_host in ansible_play_hosts -- name: Fix permissions on /etc to pass Munge startup checks - # Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2 makes /etc g=rwx rather than g=rx (where group=root) - # which fails munged startup checks - file: - path: /etc - state: directory - mode: g-w - - name: Write Munge key copy: - content: "{{ openhpc_munge_key or (openhpc_control_munge_key.content | b64decode) }}" + content: "{{ (openhpc_munge_key_b64 or openhpc_control_munge_key.content) | b64decode }}" dest: "/etc/munge/munge.key" owner: munge group: munge diff --git a/tasks/validate.yml b/tasks/validate.yml index b1bc104..0a58c99 100644 --- a/tasks/validate.yml +++ b/tasks/validate.yml @@ -40,9 +40,16 @@ loop: "{{ _openhpc_gres_autodetect_groups }}" run_once: true -- name: Fail if configuration is old +- name: Fail if partition configuration is outdated assert: that: openhpc_slurm_partitions is not defined fail_msg: stackhpc.openhpc parameter openhpc_slurm_partitions has been replaced - see openhpc_nodegroups and openhpc_partitions delegate_to: localhost run_once: true + +- name: Fail if munge key configuration is outdated + assert: + that: openhpc_munge_key is not defined + fail_msg: stackhpc.openhpc parameter openhpc_munge_key has been replaced with openhpc_munge_key_b64 + delegate_to: localhost + run_once: true