diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9cbc5d5..e0cb66f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,7 +53,6 @@ jobs: - test4 - test5 - test6 - - test7 - test8 - test9 - test10 @@ -61,7 +60,10 @@ jobs: - test12 - test13 - test14 - exclude: [] + exclude: + # mariadb package provides /usr/bin/mysql on RL8 which doesn't work with geerlingguy/mysql role + - scenario: test4 + image: 'rockylinux:8.9' steps: - name: Check out the codebase. diff --git a/README.md b/README.md index 6f5ae1e..34c6908 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,14 @@ You will need to configure these variables if you have set `openhpc_enable.datab `openhpc_slurmdbd_mysql_username`: Username for authenticating with the database, defaults to `slurm`. +## Facts + +This role creates local facts from the live Slurm configuration, which can be +accessed (with facts gathering enabled) using `ansible_local.slurm`. As per the +`scontrol show config` man page, uppercase keys are derived parameters and keys +in mixed case are from from config files. Note the facts are only refreshed +when this role is run. + ## Example Inventory And an Ansible inventory as this: diff --git a/filter_plugins/slurm_conf.py b/filter_plugins/slurm_conf.py index 631a409..3f0ba56 100644 --- a/filter_plugins/slurm_conf.py +++ b/filter_plugins/slurm_conf.py @@ -91,6 +91,40 @@ def dict2parameters(d): parts = ['%s=%s' % (k, v) for k, v in d.items()] return ' '.join(parts) +def config2dict(lines): + """ Convert a sequence of output lines from `scontrol show config` to a dict. + + As per man page uppercase keys are derived parameters, mixed case are from + from config files. + + The following case-insensitive conversions of values are carried out: + - '(null)' and 'n/a' are converted to None. + - yes and no are converted to True and False respectively + + Except for these, values are always strings. + """ + cfg = {} + for line in lines: + if '=' not in line: # ditch blank/info lines + continue + else: + parts = [x.strip() for x in line.split('=', maxsplit=1)] # maxplit handles '=' in values + if len(parts) != 2: + raise errors.AnsibleFilterError(f'line {line} cannot be split into key=value') + k, v = parts + small_v = v.lower() + if small_v == '(null)': + v = None + elif small_v == 'n/a': + v = None + elif small_v == 'no': + v = False + elif small_v == 'yes': + v = True + cfg[k] = v + return cfg + + class FilterModule(object): def filters(self): @@ -98,4 +132,5 @@ def filters(self): 'hostlist_expression': hostlist_expression, 'error': error, 'dict2parameters': dict2parameters, + 'config2dict': config2dict, } diff --git a/handlers/main.yml b/handlers/main.yml index 531d85a..9922cfb 100644 --- a/handlers/main.yml +++ b/handlers/main.yml @@ -66,3 +66,7 @@ - openhpc_slurm_service_started | bool - openhpc_enable.batch | default(false) | bool # 2nd condition required as notification happens on controller, which isn't necessarily a compute note + +- name: Reload facts + ansible.builtin.setup: + filter: ansible_local diff --git a/molecule/README.md b/molecule/README.md index 77db89a..c697f49 100644 --- a/molecule/README.md +++ b/molecule/README.md @@ -14,7 +14,7 @@ test3 | 1 | Y | - test4 | 1 | N | 2x compute node, accounting enabled test5 | 1 | N | As for #1 but configless test6 | 1 | N | 0x compute nodes, configless -test7 | 1 | N | 1x compute node, no login node so specified munge key, configless (checks image build should work) +test7 | 1 | N | [removed, image build should just run install.yml task, this is not expected to work] test8 | 1 | N | 2x compute node, 2x login-only nodes, configless test9 | 1 | N | As test8 but uses `--limit=testohpc-control,testohpc-compute-0` and checks login nodes still end up in slurm.conf test10 | 1 | N | As for #5 but then tries to add an additional node diff --git a/molecule/test7/converge.yml b/molecule/test7/converge.yml deleted file mode 100644 index 31c629d..0000000 --- a/molecule/test7/converge.yml +++ /dev/null @@ -1,30 +0,0 @@ ---- -- name: Converge - hosts: all - vars: - openhpc_enable: - batch: true - runtime: true - openhpc_slurm_service_started: false - openhpc_slurm_control_host: testohpc-login-0 - openhpc_slurm_partitions: - - name: "compute" - openhpc_cluster_name: testohpc - openhpc_slurm_configless: true - openhpc_munge_key: "{{ specified_munge_key.content | b64decode }}" - tasks: - - name: Generate munge key on ansible control host (so can verify) - command: "dd if=/dev/urandom of=/tmp/ansible-role-openhpc-test7 bs=1 count=1024" # can't use tmpfile as not idempotent - args: - creates: "/tmp/ansible-role-openhpc-test7" - delegate_to: localhost - - - name: Get generated munge key - slurp: - src: /tmp/ansible-role-openhpc-test7 - delegate_to: localhost - register: specified_munge_key - - - name: "Include ansible-role-openhpc" - include_role: - name: "{{ lookup('env', 'MOLECULE_PROJECT_DIRECTORY') | basename }}" diff --git a/molecule/test7/molecule.yml b/molecule/test7/molecule.yml deleted file mode 100644 index 5b6d4c6..0000000 --- a/molecule/test7/molecule.yml +++ /dev/null @@ -1,20 +0,0 @@ ---- -driver: - name: podman -platforms: - - name: testohpc-compute-0 - image: ${MOLECULE_IMAGE} - pre_build_image: true - groups: - - testohpc_compute - command: /sbin/init - tmpfs: - - /run - - /tmp - volumes: - - /sys/fs/cgroup:/sys/fs/cgroup:ro - network: net1 -provisioner: - name: ansible -verifier: - name: ansible diff --git a/molecule/test7/testohpc-compute-0/etc/munge/munge.key b/molecule/test7/testohpc-compute-0/etc/munge/munge.key deleted file mode 100644 index 789294e..0000000 Binary files a/molecule/test7/testohpc-compute-0/etc/munge/munge.key and /dev/null differ diff --git a/molecule/test7/verify.yml b/molecule/test7/verify.yml deleted file mode 100644 index f00af55..0000000 --- a/molecule/test7/verify.yml +++ /dev/null @@ -1,32 +0,0 @@ ---- - -# NB this isn't a very good test but with only a compute node there's not much which can be checked! -- name: Check slurmd configuration - hosts: all - tasks: - - name: Get slurmd config - command: systemctl show slurmd - register: systemctl_slurmd - - assert: - that: "'EnvironmentFiles=/etc/sysconfig/slurmd' in systemctl_slurmd.stdout" - fail_msg: "FAILED - no reference to /etc/sysconfig/slurmd in slurmd config" - - name: Get specified munge key - slurp: - src: /tmp/ansible-role-openhpc-test7 - register: specified_munge_key - delegate_to: localhost - - - name: Get actual munge key - slurp: - src: /etc/munge/munge.key - register: actual_munge_key - - - assert: - that: specified_munge_key.content | b64decode == actual_munge_key.content | b64decode - fail_msg: | - munge key on node does not match specified one: - specified: - {{ specified_munge_key.content }} - - actual: - {{ actual_munge_key.content }} diff --git a/tasks/facts.yml b/tasks/facts.yml new file mode 100644 index 0000000..8fe8811 --- /dev/null +++ b/tasks/facts.yml @@ -0,0 +1,24 @@ +- name: Capture configuration from scontrol + # this includes any dynamically-generated config, not just what is set in + # slurm.conf + ansible.builtin.command: scontrol show config + changed_when: false + register: _scontrol_config + +- name: Create facts directory + ansible.builtin.file: + path: /etc/ansible/facts.d/ + state: directory + owner: root + group: root + mode: ugo=rwX + +- name: Template slurm configuration facts + copy: + dest: /etc/ansible/facts.d/slurm.fact + content: "{{ _scontrol_config.stdout_lines | config2dict | to_nice_json }}" + owner: slurm + group: slurm + mode: ug=rw,o=r # any user can run scontrol show config anyway + register: _template_facts + notify: Reload facts diff --git a/tasks/runtime.yml b/tasks/runtime.yml index d947e5b..3eeb52f 100644 --- a/tasks/runtime.yml +++ b/tasks/runtime.yml @@ -216,3 +216,5 @@ enabled: "{{ openhpc_slurm_service_enabled | bool }}" state: "{{ 'started' if openhpc_slurm_service_started | bool else 'stopped' }}" when: openhpc_enable.batch | default(false) | bool + +- import_tasks: facts.yml