Skip to content
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ each list element:

`openhpc_slurm_configless`: Optional, default false. If true then slurm's ["configless" mode](https://slurm.schedmd.com/configless_slurm.html) is used.

`openhpc_munge_key`: Optional. Define a munge key to use. If not provided then one is generated but the `openhpc_slurm_control_host` must be in the play.
`openhpc_munge_key_b64`: Optional. A base-64 encoded munge key. If not provided then the one generated on package install is used, but the `openhpc_slurm_control_host` must be in the play.

`openhpc_login_only_nodes`: Optional. If using "configless" mode specify the name of an ansible group containing nodes which are login-only nodes (i.e. not also control nodes), if required. These nodes will run `slurmd` to contact the control node for config.

Expand Down
2 changes: 1 addition & 1 deletion defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ ohpc_default_extra_repos:
# Concatenate all repo definitions here
ohpc_repos: "{{ ohpc_openhpc_repos[ansible_distribution_major_version] + ohpc_default_extra_repos[ansible_distribution_major_version] + openhpc_extra_repos }}"

openhpc_munge_key:
openhpc_munge_key_b64:
openhpc_login_only_nodes: ''
openhpc_module_system_install: true

Expand Down
18 changes: 2 additions & 16 deletions tasks/runtime.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,31 +18,17 @@
state: directory
when: inventory_hostname == openhpc_slurm_control_host

- name: Generate a Munge key on control host
# NB this is usually a no-op as the package install actually generates a (node-unique) one, so won't usually trigger handler
command: "dd if=/dev/urandom of=/etc/munge/munge.key bs=1 count=1024"
args:
creates: "/etc/munge/munge.key"
when: inventory_hostname == openhpc_slurm_control_host

- name: Retrieve Munge key from control host
# package install generates a node-unique one
slurp:
src: "/etc/munge/munge.key"
register: openhpc_control_munge_key
delegate_to: "{{ openhpc_slurm_control_host }}"
when: openhpc_slurm_control_host in ansible_play_hosts

- name: Fix permissions on /etc to pass Munge startup checks
# Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2 makes /etc g=rwx rather than g=rx (where group=root)
# which fails munged startup checks
file:
path: /etc
state: directory
mode: g-w

- name: Write Munge key
copy:
content: "{{ openhpc_munge_key or (openhpc_control_munge_key.content | b64decode) }}"
content: "{{ (openhpc_munge_key_b64 or openhpc_control_munge_key.content) | b64decode }}"
dest: "/etc/munge/munge.key"
owner: munge
group: munge
Expand Down
9 changes: 8 additions & 1 deletion tasks/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,16 @@
loop: "{{ openhpc_nodegroups }}"
run_once: true

- name: Fail if configuration is old
- name: Fail if partition configuration is outdated
assert:
that: openhpc_slurm_partitions is not defined
fail_msg: stackhpc.openhpc parameter openhpc_slurm_partitions has been replaced - see openhpc_nodegroups and openhpc_partitions
delegate_to: localhost
run_once: true

- name: Fail if munge key configuration is outdated
assert:
that: openhpc_munge_key is not defined
fail_msg: stackhpc.openhpc parameter openhpc_munge_key has been replaced with openhpc_munge_key_b64
delegate_to: localhost
run_once: true