Skip to content

Commit ee2fb9c

Browse files
authored
Merge pull request #83 from stackhpc/fix/login-only
Fix login-only nodes in configless mode
2 parents 83ff39f + a30faea commit ee2fb9c

File tree

14 files changed

+283
-2
lines changed

14 files changed

+283
-2
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,12 @@ package in the image.
3131

3232
`openhpc_slurmdbd_host`: Optional. Where to deploy slurmdbd if are using this role to deploy slurmdbd, otherwise where an existing slurmdbd is running. This should be the name of a host in your inventory. Set this to `none` to prevent the role from managing slurmdbd. Defaults to `openhpc_slurm_control_host`.
3333

34-
`openhpc_slurm_configless`: Optional, default false. If True then slurm's ["configless" mode](https://slurm.schedmd.com/configless_slurm.html) is used. **NB: Requires Centos8/OpenHPC v2.**
34+
`openhpc_slurm_configless`: Optional, default false. If true then slurm's ["configless" mode](https://slurm.schedmd.com/configless_slurm.html) is used. **NB: Requires Centos8/OpenHPC v2.**
3535

3636
`openhpc_munge_key_path`: Optional, default ''. Define a path for a local file containing a munge key to use, otherwise one will be generated on the slurm control node.
3737

38+
`openhpc_login_only_nodes`: Optional. If using "configless" mode specify the name of an ansible group containing nodes which are login-only nodes (i.e. not also control nodes), if required. These nodes will run `slurmd` to contact the control node for config.
39+
3840
### slurm.conf
3941

4042
`openhpc_slurm_partitions`: list of one or more slurm partitions. Each partition may contain the following values:

defaults/main.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,4 @@ ohpc_release_repos:
5050
"8": "http://repos.openhpc.community/OpenHPC/2/CentOS_8/x86_64/ohpc-release-2-1.el8.x86_64.rpm" # ohpc v2 for Centos 8
5151
openhpc_slurm_configless: false
5252
openhpc_munge_key: ''
53+
openhpc_login_only_nodes: ''

molecule/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ test4 | 1 | N | 2x compute node, accounting en
1515
test5 | 1 | N | As for #1 but configless
1616
test6 | 1 | N | 0x compute nodes, configless
1717
test7 | 1 | N | 1x compute node, no login node, configless
18+
test8 | 1 | N | 2x compute node, 2x login-only nodes, configless
19+
test9 | 1 | N | As test8 but uses `--limit=testohpc-control,testohpc-compute-0` and checks login nodes still end up in slurm.conf
1820

1921
# Local Installation & Running
2022

molecule/test8/INSTALL.rst

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
*******
2+
Docker driver installation guide
3+
*******
4+
5+
Requirements
6+
============
7+
8+
* Docker Engine
9+
10+
Install
11+
=======
12+
13+
Please refer to the `Virtual environment`_ documentation for installation best
14+
practices. If not using a virtual environment, please consider passing the
15+
widely recommended `'--user' flag`_ when invoking ``pip``.
16+
17+
.. _Virtual environment: https://virtualenv.pypa.io/en/latest/
18+
.. _'--user' flag: https://packaging.python.org/tutorials/installing-packages/#installing-to-the-user-site
19+
20+
.. code-block:: bash
21+
22+
$ python3 -m pip install 'molecule[docker]'

molecule/test8/converge.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
---
2+
- name: Converge
3+
hosts: all
4+
tasks:
5+
- name: "Include ansible-role-openhpc"
6+
include_role:
7+
name: "ansible-role-openhpc/"
8+
vars:
9+
openhpc_enable:
10+
control: "{{ inventory_hostname in groups['testohpc_control'] }}"
11+
batch: "{{ inventory_hostname in groups['testohpc_compute'] }}"
12+
runtime: true
13+
openhpc_slurm_control_host: "{{ groups['testohpc_control'] | first }}"
14+
openhpc_slurm_partitions:
15+
- name: "compute"
16+
openhpc_cluster_name: testohpc
17+
openhpc_slurm_configless: true
18+
openhpc_login_only_nodes: 'testohpc_login'
19+

molecule/test8/molecule.yml

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
---
2+
name: single partition, group is partition
3+
driver:
4+
name: docker
5+
platforms:
6+
- name: testohpc-control
7+
image: ${MOLECULE_IMAGE}
8+
pre_build_image: true
9+
groups:
10+
- testohpc_control
11+
command: /sbin/init
12+
tmpfs:
13+
- /run
14+
- /tmp
15+
volumes:
16+
- /sys/fs/cgroup:/sys/fs/cgroup:ro
17+
networks:
18+
- name: net1
19+
20+
- name: testohpc-login-0
21+
image: ${MOLECULE_IMAGE}
22+
pre_build_image: true
23+
groups:
24+
- testohpc_login
25+
command: /sbin/init
26+
tmpfs:
27+
- /run
28+
- /tmp
29+
volumes:
30+
- /sys/fs/cgroup:/sys/fs/cgroup:ro
31+
networks:
32+
- name: net1
33+
34+
- name: testohpc-login-1
35+
image: ${MOLECULE_IMAGE}
36+
pre_build_image: true
37+
groups:
38+
- testohpc_login
39+
command: /sbin/init
40+
tmpfs:
41+
- /run
42+
- /tmp
43+
volumes:
44+
- /sys/fs/cgroup:/sys/fs/cgroup:ro
45+
networks:
46+
- name: net1
47+
48+
- name: testohpc-compute-0
49+
image: ${MOLECULE_IMAGE}
50+
pre_build_image: true
51+
groups:
52+
- testohpc_compute
53+
command: /sbin/init
54+
tmpfs:
55+
- /run
56+
- /tmp
57+
volumes:
58+
- /sys/fs/cgroup:/sys/fs/cgroup:ro
59+
networks:
60+
- name: net1
61+
- name: testohpc-compute-1
62+
image: ${MOLECULE_IMAGE}
63+
pre_build_image: true
64+
groups:
65+
- testohpc_compute
66+
command: /sbin/init
67+
tmpfs:
68+
- /run
69+
- /tmp
70+
volumes:
71+
- /sys/fs/cgroup:/sys/fs/cgroup:ro
72+
networks:
73+
- name: net1
74+
provisioner:
75+
name: ansible
76+
# ansible_args:
77+
# - --limit=testohpc-control,testohpc-compute-0
78+
verifier:
79+
name: ansible

molecule/test8/verify.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
3+
- name: Check slurm hostlist
4+
hosts: testohpc_login # NB for this test this is 2x non-control nodes, so tests they can contact slurmctld too
5+
tasks:
6+
- name: Get slurm partition info
7+
command: sinfo --noheader --format="%P,%a,%l,%D,%t,%N" # using --format ensures we control whitespace
8+
register: sinfo
9+
- name:
10+
assert: # PARTITION AVAIL TIMELIMIT NODES STATE NODELIST
11+
that: "sinfo.stdout_lines == ['compute*,up,60-00:00:00,2,idle,testohpc-compute-[0-1]']"
12+
fail_msg: "FAILED - actual value: {{ sinfo.stdout_lines }}"

molecule/test9/INSTALL.rst

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
*******
2+
Docker driver installation guide
3+
*******
4+
5+
Requirements
6+
============
7+
8+
* Docker Engine
9+
10+
Install
11+
=======
12+
13+
Please refer to the `Virtual environment`_ documentation for installation best
14+
practices. If not using a virtual environment, please consider passing the
15+
widely recommended `'--user' flag`_ when invoking ``pip``.
16+
17+
.. _Virtual environment: https://virtualenv.pypa.io/en/latest/
18+
.. _'--user' flag: https://packaging.python.org/tutorials/installing-packages/#installing-to-the-user-site
19+
20+
.. code-block:: bash
21+
22+
$ python3 -m pip install 'molecule[docker]'

molecule/test9/converge.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
---
2+
- name: Converge
3+
hosts: all
4+
tasks:
5+
- name: "Include ansible-role-openhpc"
6+
include_role:
7+
name: "ansible-role-openhpc/"
8+
vars:
9+
openhpc_enable:
10+
control: "{{ inventory_hostname in groups['testohpc_control'] }}"
11+
batch: "{{ inventory_hostname in groups['testohpc_compute'] }}"
12+
runtime: true
13+
openhpc_slurm_control_host: "{{ groups['testohpc_control'] | first }}"
14+
openhpc_slurm_partitions:
15+
- name: "compute"
16+
openhpc_cluster_name: testohpc
17+
openhpc_slurm_configless: true
18+
openhpc_login_only_nodes: 'testohpc_login'
19+

molecule/test9/molecule.yml

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
---
2+
name: single partition, group is partition
3+
driver:
4+
name: docker
5+
platforms:
6+
- name: testohpc-control
7+
image: ${MOLECULE_IMAGE}
8+
pre_build_image: true
9+
groups:
10+
- testohpc_control
11+
command: /sbin/init
12+
tmpfs:
13+
- /run
14+
- /tmp
15+
volumes:
16+
- /sys/fs/cgroup:/sys/fs/cgroup:ro
17+
networks:
18+
- name: net1
19+
20+
- name: testohpc-login-0
21+
image: ${MOLECULE_IMAGE}
22+
pre_build_image: true
23+
groups:
24+
- testohpc_login
25+
command: /sbin/init
26+
tmpfs:
27+
- /run
28+
- /tmp
29+
volumes:
30+
- /sys/fs/cgroup:/sys/fs/cgroup:ro
31+
networks:
32+
- name: net1
33+
34+
- name: testohpc-login-1
35+
image: ${MOLECULE_IMAGE}
36+
pre_build_image: true
37+
groups:
38+
- testohpc_login
39+
command: /sbin/init
40+
tmpfs:
41+
- /run
42+
- /tmp
43+
volumes:
44+
- /sys/fs/cgroup:/sys/fs/cgroup:ro
45+
networks:
46+
- name: net1
47+
48+
- name: testohpc-compute-0
49+
image: ${MOLECULE_IMAGE}
50+
pre_build_image: true
51+
groups:
52+
- testohpc_compute
53+
command: /sbin/init
54+
tmpfs:
55+
- /run
56+
- /tmp
57+
volumes:
58+
- /sys/fs/cgroup:/sys/fs/cgroup:ro
59+
networks:
60+
- name: net1
61+
- name: testohpc-compute-1
62+
image: ${MOLECULE_IMAGE}
63+
pre_build_image: true
64+
groups:
65+
- testohpc_compute
66+
command: /sbin/init
67+
tmpfs:
68+
- /run
69+
- /tmp
70+
volumes:
71+
- /sys/fs/cgroup:/sys/fs/cgroup:ro
72+
networks:
73+
- name: net1
74+
provisioner:
75+
name: ansible
76+
ansible_args:
77+
- --limit=testohpc-control,testohpc-compute-0
78+
verifier:
79+
name: ansible

0 commit comments

Comments
 (0)